Allow fields/columns projections when parsing header.

When using a CSV header, we might find fields in a different order than the
target table columns, and maybe not all of the fields are going to be read.
Take account of the header we read rather than expecting the header to look
like the target table definition.

Fix #888.
This commit is contained in:
Dimitri Fontaine 2019-01-15 22:39:08 +01:00
parent 1306b4c953
commit dae5dec03c
6 changed files with 33 additions and 25 deletions

View File

@ -98,6 +98,8 @@
(loop :for path-spec :in path-list
:count t
:do (let ((table-source (clone-copy-for copy path-spec)))
(when (and (header table-source) (null (fields table-source)))
(parse-header table-source))
(incf task-count
(copy-from table-source
:concurrency concurrency

View File

@ -95,7 +95,7 @@
:initform nil)) ;
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
(defgeneric parse-header (md-copy header)
(defgeneric parse-header (md-copy)
(:documentation "Parse the file header and return a list of fields."))
(defgeneric process-rows (md-copy stream process-fn)

View File

@ -4,7 +4,7 @@
(in-package #:pgloader.sources)
(defmethod parse-header ((copy md-copy) header)
(defmethod parse-header ((copy md-copy))
"Unsupported by default, to be implemented in each md-copy subclass."
(error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
@ -59,12 +59,8 @@
;; about skipping the first line
(loop :repeat (skip-lines copy) :do (read-line input nil nil))
;; we might now have to read the fields from the header line
(when (header copy)
(setf (fields copy)
(parse-header copy (read-line input nil nil)))
(log-message :debug "Parsed header columns ~s" (fields copy)))
;; we might now have to skip the header line
(when (header copy) (read-line input nil nil))
;; read in the text file, split it into columns
(process-rows copy input process-row-fn))))

View File

@ -57,19 +57,29 @@
;;;
;;; Read a file format in CSV format, and call given function on each line.
;;;
(defmethod parse-header ((csv copy-csv) header)
(defmethod parse-header ((csv copy-csv))
"Parse the header line given csv setup."
;; a field entry is a list of field name and options
(mapcar #'list
(car ; parsing a single line
(cl-csv:read-csv header
:separator (csv-separator csv)
:quote (csv-quote csv)
:escape (csv-escape csv)
:unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv)
:newline (csv-newline csv)))))
(with-connection (cnx (source csv)
:direction :input
:external-format (encoding csv)
:if-does-not-exist nil)
(let ((input (md-strm cnx)))
(loop :repeat (skip-lines csv) :do (read-line input nil nil))
(let* ((header-line (read-line input nil nil))
(field-name-list
(mapcar #'list ; we need each field to be a list
(car ; parsing a single line
(cl-csv:read-csv header-line
:separator (csv-separator csv)
:quote (csv-quote csv)
:escape (csv-escape csv)
:unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv)
:newline (csv-newline csv))))))
(log-message :notice "Parsed header columns ~s" (fields csv))
(setf (fields csv) field-name-list )))))
(defmethod process-rows ((csv copy-csv) stream process-fn)
"Process rows from STREAM according to COPY specifications and PROCESS-FN."

View File

@ -15,11 +15,11 @@ LOAD CSV
"repl$grpid" text,
"repl$id" text,
another text,
fields text
fields integer
)
$$;
somefields,rekplcode,repl$grpid,repl$id,another,fields
a,b,c,d,e,f
foo,bar,baz,quux,foobar,fizzbuzz
somefields,rekplcode,repl$grpid,repl$id,fields,another
a,b,c,d,1,e
foo,bar,baz,quux,2,foobar

View File

@ -1,2 +1,2 @@
a b c d e f
foo bar baz quux foobar fizzbuzz
a b c d e 1
foo bar baz quux foobar 2