Allow fields/columns projections when parsing header.

When using a CSV header, we might find fields in a different order than the
target table columns, and maybe not all of the fields are going to be read.
Take account of the header we read rather than expecting the header to look
like the target table definition.

Fix #888.
This commit is contained in:
Dimitri Fontaine 2019-01-15 22:39:08 +01:00
parent 1306b4c953
commit dae5dec03c
6 changed files with 33 additions and 25 deletions

View File

@ -98,6 +98,8 @@
(loop :for path-spec :in path-list (loop :for path-spec :in path-list
:count t :count t
:do (let ((table-source (clone-copy-for copy path-spec))) :do (let ((table-source (clone-copy-for copy path-spec)))
(when (and (header table-source) (null (fields table-source)))
(parse-header table-source))
(incf task-count (incf task-count
(copy-from table-source (copy-from table-source
:concurrency concurrency :concurrency concurrency

View File

@ -95,7 +95,7 @@
:initform nil)) ; :initform nil)) ;
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy).")) (:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
(defgeneric parse-header (md-copy header) (defgeneric parse-header (md-copy)
(:documentation "Parse the file header and return a list of fields.")) (:documentation "Parse the file header and return a list of fields."))
(defgeneric process-rows (md-copy stream process-fn) (defgeneric process-rows (md-copy stream process-fn)

View File

@ -4,7 +4,7 @@
(in-package #:pgloader.sources) (in-package #:pgloader.sources)
(defmethod parse-header ((copy md-copy) header) (defmethod parse-header ((copy md-copy))
"Unsupported by default, to be implemented in each md-copy subclass." "Unsupported by default, to be implemented in each md-copy subclass."
(error "Parsing the header of a ~s is not implemented yet." (type-of copy))) (error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
@ -59,12 +59,8 @@
;; about skipping the first line ;; about skipping the first line
(loop :repeat (skip-lines copy) :do (read-line input nil nil)) (loop :repeat (skip-lines copy) :do (read-line input nil nil))
;; we might now have to read the fields from the header line ;; we might now have to skip the header line
(when (header copy) (when (header copy) (read-line input nil nil))
(setf (fields copy)
(parse-header copy (read-line input nil nil)))
(log-message :debug "Parsed header columns ~s" (fields copy)))
;; read in the text file, split it into columns ;; read in the text file, split it into columns
(process-rows copy input process-row-fn)))) (process-rows copy input process-row-fn))))

View File

@ -57,19 +57,29 @@
;;; ;;;
;;; Read a file format in CSV format, and call given function on each line. ;;; Read a file format in CSV format, and call given function on each line.
;;; ;;;
(defmethod parse-header ((csv copy-csv) header) (defmethod parse-header ((csv copy-csv))
"Parse the header line given csv setup." "Parse the header line given csv setup."
;; a field entry is a list of field name and options ;; a field entry is a list of field name and options
(mapcar #'list (with-connection (cnx (source csv)
(car ; parsing a single line :direction :input
(cl-csv:read-csv header :external-format (encoding csv)
:separator (csv-separator csv) :if-does-not-exist nil)
:quote (csv-quote csv) (let ((input (md-strm cnx)))
:escape (csv-escape csv) (loop :repeat (skip-lines csv) :do (read-line input nil nil))
:unquoted-empty-string-is-nil t (let* ((header-line (read-line input nil nil))
:quoted-empty-string-is-nil nil (field-name-list
:trim-outer-whitespace (csv-trim-blanks csv) (mapcar #'list ; we need each field to be a list
:newline (csv-newline csv))))) (car ; parsing a single line
(cl-csv:read-csv header-line
:separator (csv-separator csv)
:quote (csv-quote csv)
:escape (csv-escape csv)
:unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv)
:newline (csv-newline csv))))))
(log-message :notice "Parsed header columns ~s" (fields csv))
(setf (fields csv) field-name-list )))))
(defmethod process-rows ((csv copy-csv) stream process-fn) (defmethod process-rows ((csv copy-csv) stream process-fn)
"Process rows from STREAM according to COPY specifications and PROCESS-FN." "Process rows from STREAM according to COPY specifications and PROCESS-FN."

View File

@ -15,11 +15,11 @@ LOAD CSV
"repl$grpid" text, "repl$grpid" text,
"repl$id" text, "repl$id" text,
another text, another text,
fields text fields integer
) )
$$; $$;
somefields,rekplcode,repl$grpid,repl$id,another,fields somefields,rekplcode,repl$grpid,repl$id,fields,another
a,b,c,d,e,f a,b,c,d,1,e
foo,bar,baz,quux,foobar,fizzbuzz foo,bar,baz,quux,2,foobar

View File

@ -1,2 +1,2 @@
a b c d e f a b c d e 1
foo bar baz quux foobar fizzbuzz foo bar baz quux foobar 2