mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 18:36:12 +02:00
Implement more COPY options, fix #218.
The COPY format now supports user defined delimiter and null options, and we don't require the column names anymore as it's useless in that context.
This commit is contained in:
parent
53dcdfd8ef
commit
95a5eb3184
22
pgloader.1
22
pgloader.1
@ -1,7 +1,7 @@
|
||||
.\" generated with Ronn/v0.7.3
|
||||
.\" http://github.com/rtomayko/ronn/tree/0.7.3
|
||||
.
|
||||
.TH "PGLOADER" "1" "March 2015" "ff" ""
|
||||
.TH "PGLOADER" "1" "April 2015" "ff" ""
|
||||
.
|
||||
.SH "NAME"
|
||||
\fBpgloader\fR \- PostgreSQL data loader
|
||||
@ -636,7 +636,7 @@ When omitted, the \fIuser\fR name defaults to the value of the \fBPGUSER\fR envi
|
||||
\fIpassword\fR
|
||||
.
|
||||
.IP
|
||||
Can contain any character, including that at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\.
|
||||
Can contain any character, including the at sign (\fB@\fR) which must then be doubled (\fB@@\fR)\. To leave the password empty, when the \fIuser\fR name ends with at at sign, you then have to use the syntax user:@\.
|
||||
.
|
||||
.IP
|
||||
When omitted, the \fIpassword\fR defaults to the value of the \fBPGPASSWORD\fR environment variable if it is set, otherwise the password is left unset\.
|
||||
@ -1310,6 +1310,24 @@ Filename where to load the data from\. This support local files, HTTP URLs and z
|
||||
When loading from a \fBCOPY\fR file, the following options are supported:
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fIdelimiter\fR
|
||||
.
|
||||
.IP
|
||||
Takes a single character as argument, which must be found inside single quotes, and might be given as the printable character itself, the special value \et to denote a tabulation character, or \fB0x\fR then an hexadecimal value read as the ASCII code for the character\.
|
||||
.
|
||||
.IP
|
||||
This character is used as the \fIdelimiter\fR when reading the data, in a similar way to the PostgreSQL \fBCOPY\fR option\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fInull\fR
|
||||
.
|
||||
.IP
|
||||
Takes a quoted string as an argument (quotes can be either double quotes or single quotes) and uses that string as the \fBNULL\fR representation in the data\.
|
||||
.
|
||||
.IP
|
||||
This is similar to the \fInull\fR \fBCOPY\fR option in PostgreSQL\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fItruncate\fR
|
||||
.
|
||||
.IP
|
||||
|
||||
@ -1103,7 +1103,25 @@ The `COPY` format command accepts the following clauses and options:
|
||||
|
||||
When loading from a `COPY` file, the following options are supported:
|
||||
|
||||
- *truncate*
|
||||
- *delimiter*
|
||||
|
||||
Takes a single character as argument, which must be found inside
|
||||
single quotes, and might be given as the printable character itself,
|
||||
the special value \t to denote a tabulation character, or `0x` then
|
||||
an hexadecimal value read as the ASCII code for the character.
|
||||
|
||||
This character is used as the *delimiter* when reading the data, in
|
||||
a similar way to the PostgreSQL `COPY` option.
|
||||
|
||||
- *null*
|
||||
|
||||
Takes a quoted string as an argument (quotes can be either double
|
||||
quotes or single quotes) and uses that string as the `NULL`
|
||||
representation in the data.
|
||||
|
||||
This is similar to the *null* `COPY` option in PostgreSQL.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command
|
||||
against the PostgreSQL target table before reading the data file.
|
||||
|
||||
@ -24,12 +24,23 @@
|
||||
(:lambda (source)
|
||||
(bind (((_ field-defs _) source)) field-defs)))
|
||||
|
||||
(defrule option-delimiter (and kw-delimiter separator)
|
||||
(:lambda (delimiter)
|
||||
(destructuring-bind (kw sep) delimiter
|
||||
(declare (ignore kw))
|
||||
(cons :delimiter sep))))
|
||||
|
||||
(defrule option-null (and kw-null quoted-string)
|
||||
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
|
||||
|
||||
(defrule copy-option (or option-batch-rows
|
||||
option-batch-size
|
||||
option-batch-concurrency
|
||||
option-truncate
|
||||
option-disable-triggers
|
||||
option-skip-header))
|
||||
option-skip-header
|
||||
option-delimiter
|
||||
option-null))
|
||||
|
||||
(defrule another-copy-option (and comma copy-option)
|
||||
(:lambda (source)
|
||||
@ -40,7 +51,7 @@
|
||||
(destructuring-bind (opt1 opts) source
|
||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
||||
|
||||
(defrule copy-options (and kw-with csv-option-list)
|
||||
(defrule copy-options (and kw-with copy-option-list)
|
||||
(:lambda (source)
|
||||
(bind (((_ opts) source))
|
||||
(cons :copy-options opts))))
|
||||
@ -51,11 +62,11 @@
|
||||
(make-instance 'copy-connection :specs filename))))
|
||||
|
||||
(defrule copy-file-source (or stdin
|
||||
inline
|
||||
http-uri
|
||||
copy-uri
|
||||
filename-matching
|
||||
maybe-quoted-filename)
|
||||
inline
|
||||
http-uri
|
||||
copy-uri
|
||||
filename-matching
|
||||
maybe-quoted-filename)
|
||||
(:lambda (src)
|
||||
(if (typep src 'copy-connection) src
|
||||
(destructuring-bind (type &rest specs) src
|
||||
@ -88,7 +99,7 @@
|
||||
(alexandria:alist-plist clauses-list)))
|
||||
|
||||
(defrule load-copy-file-command (and copy-source (? file-encoding)
|
||||
copy-source-field-list
|
||||
(? copy-source-field-list)
|
||||
target
|
||||
(? csv-target-column-list)
|
||||
load-copy-file-optional-clauses)
|
||||
@ -97,11 +108,11 @@
|
||||
`(,source ,encoding ,fields ,target ,columns ,@clauses))))
|
||||
|
||||
(defun lisp-code-for-loading-from-copy (copy-conn fields pg-db-conn
|
||||
&key
|
||||
(encoding :utf-8)
|
||||
columns
|
||||
gucs before after
|
||||
((:copy-options options)))
|
||||
&key
|
||||
(encoding :utf-8)
|
||||
columns
|
||||
gucs before after
|
||||
((:copy-options options)))
|
||||
`(lambda ()
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
@ -125,7 +136,9 @@
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
:columns ',columns
|
||||
:skip-lines ,(or (getf options :skip-line) 0))))
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:truncate
|
||||
:disable-triggers)))))
|
||||
(pgloader.sources:copy-from source
|
||||
:truncate truncate
|
||||
:disable-triggers disable-triggers))
|
||||
|
||||
@ -91,6 +91,7 @@
|
||||
(def-keyword-rule "keep")
|
||||
(def-keyword-rule "trim")
|
||||
(def-keyword-rule "unquoted")
|
||||
(def-keyword-rule "delimiter")
|
||||
;; option for MySQL imports
|
||||
(def-keyword-rule "schema")
|
||||
(def-keyword-rule "only")
|
||||
|
||||
@ -16,7 +16,13 @@
|
||||
:initarg :encoding) ;
|
||||
(skip-lines :accessor skip-lines ; we might want to skip COPY lines
|
||||
:initarg :skip-lines ;
|
||||
:initform 0))
|
||||
:initform 0) ;
|
||||
(delimiter :accessor delimiter ; see COPY options for TEXT
|
||||
:initarg :delimiter ; in PostgreSQL docs
|
||||
:initform #\Tab)
|
||||
(null-as :accessor null-as
|
||||
:initarg :null-as
|
||||
:initform "\\N"))
|
||||
(:documentation "pgloader COPY Data Source"))
|
||||
|
||||
(defmethod initialize-instance :after ((copy copy-copy) &key)
|
||||
@ -37,14 +43,24 @@
|
||||
|
||||
(declaim (inline parse-row))
|
||||
|
||||
(defun parse-row (line)
|
||||
(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N"))
|
||||
"Parse a single line of COPY input file and return a row of columns."
|
||||
(mapcar (lambda (x)
|
||||
;; we want Postmodern compliant NULLs
|
||||
(if (string= "\\N" x) :null x))
|
||||
(cond ((string= null-as x) :null)
|
||||
|
||||
;; and we want to avoid injecting default NULL
|
||||
;; representation down to PostgreSQL when null-as isn't
|
||||
;; the default
|
||||
((and (string/= null-as "\\N") (string= x "\\N"))
|
||||
;; escape the backslash
|
||||
"\\\\N")
|
||||
|
||||
;; default case, just use the value we've just read
|
||||
(t x)))
|
||||
;; splitting is easy, it's always on #\Tab
|
||||
;; see format-row-for-copy for details
|
||||
(sq:split-sequence #\Tab line)))
|
||||
(sq:split-sequence delimiter line)))
|
||||
|
||||
(defmethod map-rows ((copy copy-copy) &key process-row-fn)
|
||||
"Load data from a text file in Copy Columns format.
|
||||
@ -88,7 +104,9 @@
|
||||
:counting line :into read
|
||||
:while line
|
||||
:do (handler-case
|
||||
(funcall fun (parse-row line))
|
||||
(funcall fun (parse-row line
|
||||
:delimiter (delimiter copy)
|
||||
:null-as (null-as copy)))
|
||||
(condition (e)
|
||||
(progn
|
||||
(log-message :error "~a" e)
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
LOAD COPY
|
||||
FROM inline (id, text)
|
||||
FROM inline
|
||||
INTO postgresql:///pgloader?copyhex
|
||||
|
||||
WITH truncate
|
||||
WITH truncate,
|
||||
delimiter '\t',
|
||||
null "--"
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists copyhex; $$,
|
||||
@ -12,4 +14,5 @@ LOAD COPY
|
||||
2 aa
|
||||
3 \x1a
|
||||
4 a\x1a
|
||||
5 \N
|
||||
5 \N
|
||||
6 --
|
||||
@ -2,4 +2,5 @@
|
||||
2 aa
|
||||
3
|
||||
4 a
|
||||
5 \N
|
||||
5 \\N
|
||||
6 \N
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user