diff --git a/pgloader.1 b/pgloader.1 index d9bebb6..456f42e 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1,7 +1,7 @@ .\" generated with Ronn/v0.7.3 .\" http://github.com/rtomayko/ronn/tree/0.7.3 . -.TH "PGLOADER" "1" "March 2015" "ff" "" +.TH "PGLOADER" "1" "April 2015" "ff" "" . .SH "NAME" \fBpgloader\fR \- PostgreSQL data loader @@ -636,7 +636,7 @@ When omitted, the \fIuser\fR name defaults to the value of the \fBPGUSER\fR envi \fIpassword\fR . .IP -Can contain any character, including that at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\. +Can contain any character, including the at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\. . .IP When omitted, the \fIpassword\fR defaults to the value of the \fBPGPASSWORD\fR environment variable if it is set, otherwise the password is left unset\. @@ -1310,6 +1310,24 @@ Filename where to load the data from\. This support local files, HTTP URLs and z When loading from a \fBCOPY\fR file, the following options are supported: . .IP "\(bu" 4 +\fIdelimiter\fR +. +.IP +Takes a single character as argument, which must be found inside single quotes, and might be given as the printable character itself, the special value \et to denote a tabulation character, or \fB0x\fR then an hexadecimal value read as the ASCII code for the character\. +. +.IP +This character is used as the \fIdelimiter\fR when reading the data, in a similar way to the PostgreSQL \fBCOPY\fR option\. +. +.IP "\(bu" 4 +\fInull\fR +. +.IP +Takes a quoted string as an argument (quotes can be either double quotes or single quotes) and uses that string as the \fBNULL\fR representation in the data\. +. +.IP +This is similar to the \fInull\fR \fBCOPY\fR option in PostgreSQL\. +. +.IP "\(bu" 4 \fItruncate\fR . .IP diff --git a/pgloader.1.md b/pgloader.1.md index 477611e..6f9ba4d 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -1103,7 +1103,25 @@ The `COPY` format command accepts the following clauses and options: When loading from a `COPY` file, the following options are supported: - - *truncate* + - *delimiter* + + Takes a single character as argument, which must be found inside + single quotes, and might be given as the printable character itself, + the special value \t to denote a tabulation character, or `0x` then + an hexadecimal value read as the ASCII code for the character. + + This character is used as the *delimiter* when reading the data, in + a similar way to the PostgreSQL `COPY` option. + + - *null* + + Takes a quoted string as an argument (quotes can be either double + quotes or single quotes) and uses that string as the `NULL` + representation in the data. + + This is similar to the *null* `COPY` option in PostgreSQL. + + - *truncate* When this option is listed, pgloader issues a `TRUNCATE` command against the PostgreSQL target table before reading the data file. diff --git a/src/parsers/command-copy.lisp b/src/parsers/command-copy.lisp index 1cafdbe..660a222 100644 --- a/src/parsers/command-copy.lisp +++ b/src/parsers/command-copy.lisp @@ -24,12 +24,23 @@ (:lambda (source) (bind (((_ field-defs _) source)) field-defs))) +(defrule option-delimiter (and kw-delimiter separator) + (:lambda (delimiter) + (destructuring-bind (kw sep) delimiter + (declare (ignore kw)) + (cons :delimiter sep)))) + +(defrule option-null (and kw-null quoted-string) + (:destructure (kw null) (declare (ignore kw)) (cons :null-as null))) + (defrule copy-option (or option-batch-rows option-batch-size option-batch-concurrency option-truncate option-disable-triggers - option-skip-header)) + option-skip-header + option-delimiter + option-null)) (defrule another-copy-option (and comma copy-option) (:lambda (source) @@ -40,7 +51,7 @@ (destructuring-bind (opt1 opts) source (alexandria:alist-plist `(,opt1 ,@opts))))) -(defrule copy-options (and kw-with csv-option-list) +(defrule copy-options (and kw-with copy-option-list) (:lambda (source) (bind (((_ opts) source)) (cons :copy-options opts)))) @@ -51,11 +62,11 @@ (make-instance 'copy-connection :specs filename)))) (defrule copy-file-source (or stdin - inline - http-uri - copy-uri - filename-matching - maybe-quoted-filename) + inline + http-uri + copy-uri + filename-matching + maybe-quoted-filename) (:lambda (src) (if (typep src 'copy-connection) src (destructuring-bind (type &rest specs) src @@ -88,7 +99,7 @@ (alexandria:alist-plist clauses-list))) (defrule load-copy-file-command (and copy-source (? file-encoding) - copy-source-field-list + (? copy-source-field-list) target (? csv-target-column-list) load-copy-file-optional-clauses) @@ -97,11 +108,11 @@ `(,source ,encoding ,fields ,target ,columns ,@clauses)))) (defun lisp-code-for-loading-from-copy (copy-conn fields pg-db-conn - &key - (encoding :utf-8) - columns - gucs before after - ((:copy-options options))) + &key + (encoding :utf-8) + columns + gucs before after + ((:copy-options options))) `(lambda () (let* ((state-before (pgloader.utils:make-pgstate)) (summary (null *state*)) @@ -125,7 +136,9 @@ :encoding ,encoding :fields ',fields :columns ',columns - :skip-lines ,(or (getf options :skip-line) 0)))) + ,@(remove-batch-control-option + options :extras '(:truncate + :disable-triggers))))) (pgloader.sources:copy-from source :truncate truncate :disable-triggers disable-triggers)) diff --git a/src/parsers/command-keywords.lisp b/src/parsers/command-keywords.lisp index f5e8fe0..0bc91ca 100644 --- a/src/parsers/command-keywords.lisp +++ b/src/parsers/command-keywords.lisp @@ -91,6 +91,7 @@ (def-keyword-rule "keep") (def-keyword-rule "trim") (def-keyword-rule "unquoted") + (def-keyword-rule "delimiter") ;; option for MySQL imports (def-keyword-rule "schema") (def-keyword-rule "only") diff --git a/src/sources/copy.lisp b/src/sources/copy.lisp index 3228619..7c9c757 100644 --- a/src/sources/copy.lisp +++ b/src/sources/copy.lisp @@ -16,7 +16,13 @@ :initarg :encoding) ; (skip-lines :accessor skip-lines ; we might want to skip COPY lines :initarg :skip-lines ; - :initform 0)) + :initform 0) ; + (delimiter :accessor delimiter ; see COPY options for TEXT + :initarg :delimiter ; in PostgreSQL docs + :initform #\Tab) + (null-as :accessor null-as + :initarg :null-as + :initform "\\N")) (:documentation "pgloader COPY Data Source")) (defmethod initialize-instance :after ((copy copy-copy) &key) @@ -37,14 +43,24 @@ (declaim (inline parse-row)) -(defun parse-row (line) +(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N")) "Parse a single line of COPY input file and return a row of columns." (mapcar (lambda (x) ;; we want Postmodern compliant NULLs - (if (string= "\\N" x) :null x)) + (cond ((string= null-as x) :null) + + ;; and we want to avoid injecting default NULL + ;; representation down to PostgreSQL when null-as isn't + ;; the default + ((and (string/= null-as "\\N") (string= x "\\N")) + ;; escape the backslash + "\\\\N") + + ;; default case, just use the value we've just read + (t x))) ;; splitting is easy, it's always on #\Tab ;; see format-row-for-copy for details - (sq:split-sequence #\Tab line))) + (sq:split-sequence delimiter line))) (defmethod map-rows ((copy copy-copy) &key process-row-fn) "Load data from a text file in Copy Columns format. @@ -88,7 +104,9 @@ :counting line :into read :while line :do (handler-case - (funcall fun (parse-row line)) + (funcall fun (parse-row line + :delimiter (delimiter copy) + :null-as (null-as copy))) (condition (e) (progn (log-message :error "~a" e) diff --git a/test/copy-hex.load b/test/copy-hex.load index 4c6ae4f..5bcf884 100644 --- a/test/copy-hex.load +++ b/test/copy-hex.load @@ -1,8 +1,10 @@ LOAD COPY - FROM inline (id, text) + FROM inline INTO postgresql:///pgloader?copyhex - WITH truncate + WITH truncate, + delimiter '\t', + null "--" BEFORE LOAD DO $$ drop table if exists copyhex; $$, @@ -12,4 +14,5 @@ LOAD COPY 2 aa 3 \x1a 4 a\x1a -5 \N \ No newline at end of file +5 \N +6 -- \ No newline at end of file diff --git a/test/regress/expected/copy-hex.out b/test/regress/expected/copy-hex.out index 4bc7ee4..512b876 100644 --- a/test/regress/expected/copy-hex.out +++ b/test/regress/expected/copy-hex.out @@ -2,4 +2,5 @@ 2 aa 3  4 a -5 \N +5 \\N +6 \N