diff --git a/pgloader.1.md b/pgloader.1.md index ba722fd..4238bf6 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -841,6 +841,12 @@ The `csv` format command accepts the following clauses and options: Takes a numeric value as argument. Instruct pgloader to skip that many lines at the beginning of the input file. + - *csv header* + + Use the first line read after *skip header* as the list of csv field + names to be found in the CSV file, using the same CSV parameters as + for the CSV data. + - *trim unquoted blanks* When reading unquoted values in the `CSV` file, remove the blanks diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index cad8fde..1162700 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -49,6 +49,9 @@ (bind (((_ _ _ digits) osh)) (cons :skip-lines (parse-integer (text digits)))))) +(defrule option-csv-header (and kw-csv kw-header) + (:constant (cons :csv-header t))) + (defrule option-fields-enclosed-by (and kw-fields (? kw-optionally) kw-enclosed kw-by separator) (:lambda (enc) @@ -95,6 +98,7 @@ option-truncate option-disable-triggers option-skip-header + option-csv-header option-lines-terminated-by option-fields-not-enclosed option-fields-enclosed-by diff --git a/src/sources/csv/csv.lisp b/src/sources/csv/csv.lisp index dcd8f69..8ad9c77 100644 --- a/src/sources/csv/csv.lisp +++ b/src/sources/csv/csv.lisp @@ -39,7 +39,10 @@ :initarg :source-type) ; or :filename (encoding :accessor encoding ; file encoding :initarg :encoding) ; - (skip-lines :accessor skip-lines ; CSV headers + (csv-header :accessor csv-header ; CSV headers are col names + :initarg :csv-header + :initform nil) ; + (skip-lines :accessor skip-lines ; CSV skip firt N lines :initarg :skip-lines ; :initform 0) ; (separator :accessor csv-separator ; CSV separator @@ -78,6 +81,20 @@ ;;; ;;; Read a file format in CSV format, and call given function on each line. ;;; +(defun parse-csv-header (csv header) + "Parse the header line given csv setup." + ;; a field entry is a list of field name and options + (mapcar #'list + (car ; parsing a single line + (cl-csv:read-csv header + :separator (csv-separator csv) + :quote (csv-quote csv) + :escape (csv-escape csv) + :unquoted-empty-string-is-nil t + :quoted-empty-string-is-nil nil + :trim-outer-whitespace (csv-trim-blanks csv) + :newline (csv-newline csv))))) + (defmethod map-rows ((csv copy-csv) &key process-row-fn) "Load data from a text file in CSV format, with support for advanced projecting capabilities. See `project-fields' for details. @@ -114,6 +131,13 @@ ;; about skipping the first line (loop repeat (skip-lines csv) do (read-line input nil nil)) + ;; we might now have to read the CSV fields from the header line + (when (csv-header csv) + (setf (fields csv) + (parse-csv-header csv (read-line input nil nil))) + + (log-message :debug "Parsed header columns ~s" (fields csv))) + ;; read in the text file, split it into columns, process NULL ;; columns the way postmodern expects them, and call ;; PROCESS-ROW-FN on them @@ -153,7 +177,7 @@ (with-stats-collection ((target csv) :dbname (db-name (target-db csv)) :state *state* :summary summary) - (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) + (lp:task-handler-bind () ;; ((error #'lp:invoke-transfer-error)) (log-message :notice "COPY ~a" (target csv)) (lp:submit-task channel #'copy-to-queue csv queue) diff --git a/test/Makefile b/test/Makefile index 36d5f8d..a4cd15c 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,6 +10,7 @@ REGRESS= allcols.load \ csv-parse-date.load \ csv-error.load \ csv-filename-pattern.load \ + csv-header.load \ csv-keep-extra-blanks.load \ csv-nulls.load \ csv-trim-extra-blanks.load \ diff --git a/test/csv-header.load b/test/csv-header.load new file mode 100644 index 0000000..ccd72d2 --- /dev/null +++ b/test/csv-header.load @@ -0,0 +1,25 @@ +LOAD CSV + FROM INLINE + INTO postgresql://dim@localhost/pgloader?header + + WITH truncate, + fields terminated by ',', + csv header + + BEFORE LOAD DO + $$ drop table if exists header; $$, + $$ CREATE TABLE header + ( + somefields text, + rekplcode text, + "repl$grpid" text, + "repl$id" text, + another text, + fields text + ) + $$; + + +somefields,reklpcode,repl$grpid,repl$id,another,fields +a,b,c,d,e,f +foo,bar,baz,quux,foobar,fizzbuzz diff --git a/test/regress/expected/csv-header.out b/test/regress/expected/csv-header.out new file mode 100644 index 0000000..512042d --- /dev/null +++ b/test/regress/expected/csv-header.out @@ -0,0 +1,2 @@ +a b c d e f +foo bar baz quux foobar fizzbuzz