mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 18:36:12 +02:00
Implement CSV headers support.
Some CSV files are given with an header line containing the list of their column names, use that when given the option "csv header". Note that when both "skip header" and "csv header" options are used, pgloader first skip as many required lines and then uses the next one as the csv header. Because of temporary failure to install the `ronn` documentation tool, this patch only commits the changes to the source docs and omits to update the man page (pgloader.1). A following patch is intended to be pushed that fixed that. See #236 which is using shell tricks to retrieve the field list from the CSV file itself and motivated this patch to finally get written.
This commit is contained in:
parent
dfb4cc2049
commit
abbc105c41
@ -841,6 +841,12 @@ The `csv` format command accepts the following clauses and options:
|
||||
Takes a numeric value as argument. Instruct pgloader to skip that
|
||||
many lines at the beginning of the input file.
|
||||
|
||||
- *csv header*
|
||||
|
||||
Use the first line read after *skip header* as the list of csv field
|
||||
names to be found in the CSV file, using the same CSV parameters as
|
||||
for the CSV data.
|
||||
|
||||
- *trim unquoted blanks*
|
||||
|
||||
When reading unquoted values in the `CSV` file, remove the blanks
|
||||
|
||||
@ -49,6 +49,9 @@
|
||||
(bind (((_ _ _ digits) osh))
|
||||
(cons :skip-lines (parse-integer (text digits))))))
|
||||
|
||||
(defrule option-csv-header (and kw-csv kw-header)
|
||||
(:constant (cons :csv-header t)))
|
||||
|
||||
(defrule option-fields-enclosed-by
|
||||
(and kw-fields (? kw-optionally) kw-enclosed kw-by separator)
|
||||
(:lambda (enc)
|
||||
@ -95,6 +98,7 @@
|
||||
option-truncate
|
||||
option-disable-triggers
|
||||
option-skip-header
|
||||
option-csv-header
|
||||
option-lines-terminated-by
|
||||
option-fields-not-enclosed
|
||||
option-fields-enclosed-by
|
||||
|
||||
@ -39,7 +39,10 @@
|
||||
:initarg :source-type) ; or :filename
|
||||
(encoding :accessor encoding ; file encoding
|
||||
:initarg :encoding) ;
|
||||
(skip-lines :accessor skip-lines ; CSV headers
|
||||
(csv-header :accessor csv-header ; CSV headers are col names
|
||||
:initarg :csv-header
|
||||
:initform nil) ;
|
||||
(skip-lines :accessor skip-lines ; CSV skip firt N lines
|
||||
:initarg :skip-lines ;
|
||||
:initform 0) ;
|
||||
(separator :accessor csv-separator ; CSV separator
|
||||
@ -78,6 +81,20 @@
|
||||
;;;
|
||||
;;; Read a file format in CSV format, and call given function on each line.
|
||||
;;;
|
||||
(defun parse-csv-header (csv header)
|
||||
"Parse the header line given csv setup."
|
||||
;; a field entry is a list of field name and options
|
||||
(mapcar #'list
|
||||
(car ; parsing a single line
|
||||
(cl-csv:read-csv header
|
||||
:separator (csv-separator csv)
|
||||
:quote (csv-quote csv)
|
||||
:escape (csv-escape csv)
|
||||
:unquoted-empty-string-is-nil t
|
||||
:quoted-empty-string-is-nil nil
|
||||
:trim-outer-whitespace (csv-trim-blanks csv)
|
||||
:newline (csv-newline csv)))))
|
||||
|
||||
(defmethod map-rows ((csv copy-csv) &key process-row-fn)
|
||||
"Load data from a text file in CSV format, with support for advanced
|
||||
projecting capabilities. See `project-fields' for details.
|
||||
@ -114,6 +131,13 @@
|
||||
;; about skipping the first line
|
||||
(loop repeat (skip-lines csv) do (read-line input nil nil))
|
||||
|
||||
;; we might now have to read the CSV fields from the header line
|
||||
(when (csv-header csv)
|
||||
(setf (fields csv)
|
||||
(parse-csv-header csv (read-line input nil nil)))
|
||||
|
||||
(log-message :debug "Parsed header columns ~s" (fields csv)))
|
||||
|
||||
;; read in the text file, split it into columns, process NULL
|
||||
;; columns the way postmodern expects them, and call
|
||||
;; PROCESS-ROW-FN on them
|
||||
@ -153,7 +177,7 @@
|
||||
(with-stats-collection ((target csv)
|
||||
:dbname (db-name (target-db csv))
|
||||
:state *state* :summary summary)
|
||||
(lp:task-handler-bind ((error #'lp:invoke-transfer-error))
|
||||
(lp:task-handler-bind () ;; ((error #'lp:invoke-transfer-error))
|
||||
(log-message :notice "COPY ~a" (target csv))
|
||||
(lp:submit-task channel #'copy-to-queue csv queue)
|
||||
|
||||
|
||||
@ -10,6 +10,7 @@ REGRESS= allcols.load \
|
||||
csv-parse-date.load \
|
||||
csv-error.load \
|
||||
csv-filename-pattern.load \
|
||||
csv-header.load \
|
||||
csv-keep-extra-blanks.load \
|
||||
csv-nulls.load \
|
||||
csv-trim-extra-blanks.load \
|
||||
|
||||
25
test/csv-header.load
Normal file
25
test/csv-header.load
Normal file
@ -0,0 +1,25 @@
|
||||
LOAD CSV
|
||||
FROM INLINE
|
||||
INTO postgresql://dim@localhost/pgloader?header
|
||||
|
||||
WITH truncate,
|
||||
fields terminated by ',',
|
||||
csv header
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists header; $$,
|
||||
$$ CREATE TABLE header
|
||||
(
|
||||
somefields text,
|
||||
rekplcode text,
|
||||
"repl$grpid" text,
|
||||
"repl$id" text,
|
||||
another text,
|
||||
fields text
|
||||
)
|
||||
$$;
|
||||
|
||||
|
||||
somefields,reklpcode,repl$grpid,repl$id,another,fields
|
||||
a,b,c,d,e,f
|
||||
foo,bar,baz,quux,foobar,fizzbuzz
|
||||
2
test/regress/expected/csv-header.out
Normal file
2
test/regress/expected/csv-header.out
Normal file
@ -0,0 +1,2 @@
|
||||
a b c d e f
|
||||
foo bar baz quux foobar fizzbuzz
|
||||
Loading…
x
Reference in New Issue
Block a user