Implement CSV headers support.

Some CSV files are given with an header line containing the list of their column names, use that when given the option "csv header". Note that when both "skip header" and "csv header" options are used, pgloader first skip as many required lines and then uses the next one as the csv header. Because of temporary failure to install the `ronn` documentation tool, this patch only commits the changes to the source docs and omits to update the man page (pgloader.1). A following patch is intended to be pushed that fixed that. See #236 which is using shell tricks to retrieve the field list from the CSV file itself and motivated this patch to finally get written.
2026-05-04 18:36:12 +02:00 · 2015-05-21 12:54:33 +02:00 · 2015-05-21 12:54:33 +02:00 · abbc105c41
commit abbc105c41
parent dfb4cc2049
6 changed files with 64 additions and 2 deletions
--- a/pgloader.1.md
+++ b/pgloader.1.md
@ -841,6 +841,12 @@ The `csv` format command accepts the following clauses and options:
 	    Takes a numeric value as argument. Instruct pgloader to skip that
 	    many lines at the beginning of the input file.

+      - *csv header*
+
+	    Use the first line read after *skip header* as the list of csv field
+        names to be found in the CSV file, using the same CSV parameters as
+        for the CSV data.
+
      - *trim unquoted blanks*

 	    When reading unquoted values in the `CSV` file, remove the blanks
--- a/src/parsers/command-csv.lisp
+++ b/src/parsers/command-csv.lisp
@ -49,6 +49,9 @@
    (bind (((_ _ _ digits) osh))
      (cons :skip-lines (parse-integer (text digits))))))

+(defrule option-csv-header (and kw-csv kw-header)
+  (:constant (cons :csv-header t)))
+
 (defrule option-fields-enclosed-by
    (and kw-fields (? kw-optionally) kw-enclosed kw-by separator)
  (:lambda (enc)
@ -95,6 +98,7 @@
                        option-truncate
                        option-disable-triggers
                        option-skip-header
+                        option-csv-header
                        option-lines-terminated-by
                        option-fields-not-enclosed
                        option-fields-enclosed-by
--- a/src/sources/csv/csv.lisp
+++ b/src/sources/csv/csv.lisp
@ -39,7 +39,10 @@
 		:initarg :source-type)	  ;  or :filename
   (encoding    :accessor encoding	  ; file encoding
 	        :initarg :encoding)	  ;
-   (skip-lines  :accessor skip-lines	  ; CSV headers
+   (csv-header  :accessor csv-header      ; CSV headers are col names
+                :initarg :csv-header
+                :initform nil)            ;
+   (skip-lines  :accessor skip-lines	  ; CSV skip firt N lines
 	        :initarg :skip-lines	  ;
 		:initform 0)		  ;
   (separator   :accessor csv-separator	  ; CSV separator
@ -78,6 +81,20 @@
 ;;;
 ;;; Read a file format in CSV format, and call given function on each line.
 ;;;
+(defun parse-csv-header (csv header)
+  "Parse the header line given csv setup."
+  ;; a field entry is a list of field name and options
+  (mapcar #'list
+          (car                          ; parsing a single line
+           (cl-csv:read-csv header
+                            :separator (csv-separator csv)
+                            :quote (csv-quote csv)
+                            :escape (csv-escape csv)
+                            :unquoted-empty-string-is-nil t
+                            :quoted-empty-string-is-nil nil
+                            :trim-outer-whitespace (csv-trim-blanks csv)
+                            :newline (csv-newline csv)))))
+
 (defmethod map-rows ((csv copy-csv) &key process-row-fn)
  "Load data from a text file in CSV format, with support for advanced
   projecting capabilities. See `project-fields' for details.
@ -114,6 +131,13 @@
 	     ;; about skipping the first line
 	     (loop repeat (skip-lines csv) do (read-line input nil nil))

+             ;; we might now have to read the CSV fields from the header line
+             (when (csv-header csv)
+               (setf (fields csv)
+                     (parse-csv-header csv (read-line input nil nil)))
+
+               (log-message :debug "Parsed header columns ~s" (fields csv)))
+
 	     ;; read in the text file, split it into columns, process NULL
 	     ;; columns the way postmodern expects them, and call
 	     ;; PROCESS-ROW-FN on them
@ -153,7 +177,7 @@
    (with-stats-collection ((target csv)
                            :dbname (db-name (target-db csv))
                            :state *state* :summary summary)
-      (lp:task-handler-bind ((error #'lp:invoke-transfer-error))
+      (lp:task-handler-bind () ;; ((error #'lp:invoke-transfer-error))
        (log-message :notice "COPY ~a" (target csv))
        (lp:submit-task channel #'copy-to-queue csv queue)

--- a/test/Makefile
+++ b/test/Makefile
@ -10,6 +10,7 @@ REGRESS= allcols.load 		    \
         csv-parse-date.load 	    \
         csv-error.load 	    \
         csv-filename-pattern.load  \
+         csv-header.load            \
         csv-keep-extra-blanks.load \
         csv-nulls.load             \
         csv-trim-extra-blanks.load \
--- a/test/csv-header.load
+++ b/test/csv-header.load
@ -0,0 +1,25 @@
+LOAD CSV
+     FROM INLINE
+     INTO postgresql://dim@localhost/pgloader?header
+
+     WITH truncate,
+          fields terminated by ',',
+          csv header
+
+   BEFORE LOAD DO
+    $$ drop table if exists header; $$,
+    $$ CREATE TABLE header
+       (
+          somefields     text,
+          rekplcode      text,
+          "repl$grpid"   text,
+          "repl$id"      text,
+          another        text,
+          fields         text
+       )
+    $$;
+
+
+somefields,reklpcode,repl$grpid,repl$id,another,fields
+a,b,c,d,e,f
+foo,bar,baz,quux,foobar,fizzbuzz
--- a/test/regress/expected/csv-header.out
+++ b/test/regress/expected/csv-header.out
@ -0,0 +1,2 @@
+a	b	c	d	e	f
+foo	bar	baz	quux	foobar	fizzbuzz