Implement more COPY options, fix #218.

The COPY format now supports user defined delimiter and null options,
and we don't require the column names anymore as it's useless in that
context.
This commit is contained in:
Dimitri Fontaine 2015-04-30 14:24:22 +02:00
parent 53dcdfd8ef
commit 95a5eb3184
7 changed files with 98 additions and 26 deletions

View File

@ -1,7 +1,7 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "PGLOADER" "1" "March 2015" "ff" ""
.TH "PGLOADER" "1" "April 2015" "ff" ""
.
.SH "NAME"
\fBpgloader\fR \- PostgreSQL data loader
@ -636,7 +636,7 @@ When omitted, the \fIuser\fR name defaults to the value of the \fBPGUSER\fR envi
\fIpassword\fR
.
.IP
Can contain any character, including that at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\.
Can contain any character, including the at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\.
.
.IP
When omitted, the \fIpassword\fR defaults to the value of the \fBPGPASSWORD\fR environment variable if it is set, otherwise the password is left unset\.
@ -1310,6 +1310,24 @@ Filename where to load the data from\. This support local files, HTTP URLs and z
When loading from a \fBCOPY\fR file, the following options are supported:
.
.IP "\(bu" 4
\fIdelimiter\fR
.
.IP
Takes a single character as argument, which must be found inside single quotes, and might be given as the printable character itself, the special value \et to denote a tabulation character, or \fB0x\fR then an hexadecimal value read as the ASCII code for the character\.
.
.IP
This character is used as the \fIdelimiter\fR when reading the data, in a similar way to the PostgreSQL \fBCOPY\fR option\.
.
.IP "\(bu" 4
\fInull\fR
.
.IP
Takes a quoted string as an argument (quotes can be either double quotes or single quotes) and uses that string as the \fBNULL\fR representation in the data\.
.
.IP
This is similar to the \fInull\fR \fBCOPY\fR option in PostgreSQL\.
.
.IP "\(bu" 4
\fItruncate\fR
.
.IP

View File

@ -1103,7 +1103,25 @@ The `COPY` format command accepts the following clauses and options:
When loading from a `COPY` file, the following options are supported:
- *truncate*
- *delimiter*
Takes a single character as argument, which must be found inside
single quotes, and might be given as the printable character itself,
the special value \t to denote a tabulation character, or `0x` then
an hexadecimal value read as the ASCII code for the character.
This character is used as the *delimiter* when reading the data, in
a similar way to the PostgreSQL `COPY` option.
- *null*
Takes a quoted string as an argument (quotes can be either double
quotes or single quotes) and uses that string as the `NULL`
representation in the data.
This is similar to the *null* `COPY` option in PostgreSQL.
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command
against the PostgreSQL target table before reading the data file.

View File

@ -24,12 +24,23 @@
(:lambda (source)
(bind (((_ field-defs _) source)) field-defs)))
(defrule option-delimiter (and kw-delimiter separator)
(:lambda (delimiter)
(destructuring-bind (kw sep) delimiter
(declare (ignore kw))
(cons :delimiter sep))))
(defrule option-null (and kw-null quoted-string)
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
(defrule copy-option (or option-batch-rows
option-batch-size
option-batch-concurrency
option-truncate
option-disable-triggers
option-skip-header))
option-skip-header
option-delimiter
option-null))
(defrule another-copy-option (and comma copy-option)
(:lambda (source)
@ -40,7 +51,7 @@
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule copy-options (and kw-with csv-option-list)
(defrule copy-options (and kw-with copy-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :copy-options opts))))
@ -51,11 +62,11 @@
(make-instance 'copy-connection :specs filename))))
(defrule copy-file-source (or stdin
inline
http-uri
copy-uri
filename-matching
maybe-quoted-filename)
inline
http-uri
copy-uri
filename-matching
maybe-quoted-filename)
(:lambda (src)
(if (typep src 'copy-connection) src
(destructuring-bind (type &rest specs) src
@ -88,7 +99,7 @@
(alexandria:alist-plist clauses-list)))
(defrule load-copy-file-command (and copy-source (? file-encoding)
copy-source-field-list
(? copy-source-field-list)
target
(? csv-target-column-list)
load-copy-file-optional-clauses)
@ -97,11 +108,11 @@
`(,source ,encoding ,fields ,target ,columns ,@clauses))))
(defun lisp-code-for-loading-from-copy (copy-conn fields pg-db-conn
&key
(encoding :utf-8)
columns
gucs before after
((:copy-options options)))
&key
(encoding :utf-8)
columns
gucs before after
((:copy-options options)))
`(lambda ()
(let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
@ -125,7 +136,9 @@
:encoding ,encoding
:fields ',fields
:columns ',columns
:skip-lines ,(or (getf options :skip-line) 0))))
,@(remove-batch-control-option
options :extras '(:truncate
:disable-triggers)))))
(pgloader.sources:copy-from source
:truncate truncate
:disable-triggers disable-triggers))

View File

@ -91,6 +91,7 @@
(def-keyword-rule "keep")
(def-keyword-rule "trim")
(def-keyword-rule "unquoted")
(def-keyword-rule "delimiter")
;; option for MySQL imports
(def-keyword-rule "schema")
(def-keyword-rule "only")

View File

@ -16,7 +16,13 @@
:initarg :encoding) ;
(skip-lines :accessor skip-lines ; we might want to skip COPY lines
:initarg :skip-lines ;
:initform 0))
:initform 0) ;
(delimiter :accessor delimiter ; see COPY options for TEXT
:initarg :delimiter ; in PostgreSQL docs
:initform #\Tab)
(null-as :accessor null-as
:initarg :null-as
:initform "\\N"))
(:documentation "pgloader COPY Data Source"))
(defmethod initialize-instance :after ((copy copy-copy) &key)
@ -37,14 +43,24 @@
(declaim (inline parse-row))
(defun parse-row (line)
(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N"))
"Parse a single line of COPY input file and return a row of columns."
(mapcar (lambda (x)
;; we want Postmodern compliant NULLs
(if (string= "\\N" x) :null x))
(cond ((string= null-as x) :null)
;; and we want to avoid injecting default NULL
;; representation down to PostgreSQL when null-as isn't
;; the default
((and (string/= null-as "\\N") (string= x "\\N"))
;; escape the backslash
"\\\\N")
;; default case, just use the value we've just read
(t x)))
;; splitting is easy, it's always on #\Tab
;; see format-row-for-copy for details
(sq:split-sequence #\Tab line)))
(sq:split-sequence delimiter line)))
(defmethod map-rows ((copy copy-copy) &key process-row-fn)
"Load data from a text file in Copy Columns format.
@ -88,7 +104,9 @@
:counting line :into read
:while line
:do (handler-case
(funcall fun (parse-row line))
(funcall fun (parse-row line
:delimiter (delimiter copy)
:null-as (null-as copy)))
(condition (e)
(progn
(log-message :error "~a" e)

View File

@ -1,8 +1,10 @@
LOAD COPY
FROM inline (id, text)
FROM inline
INTO postgresql:///pgloader?copyhex
WITH truncate
WITH truncate,
delimiter '\t',
null "--"
BEFORE LOAD DO
$$ drop table if exists copyhex; $$,
@ -12,4 +14,5 @@ LOAD COPY
2 aa
3 \x1a
4 a\x1a
5 \N
5 \N
6 --

View File

@ -2,4 +2,5 @@
2 aa
3 
4 a
5 \N
5 \\N
6 \N