mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-08 07:16:58 +02:00
Implement concurrency and workers for files sources.
More than the syntax and API tweaks, this patch also make it so that a multi-file specification (using e.g. ALL FILENAMES IN DIRECTORY) can be loaded with several files in the group in parallel. To that effect, tweak again the md-connection and md-copy implementations.
This commit is contained in:
parent
aa8b756315
commit
7dd69a11e1
@ -622,7 +622,7 @@ All data sources specific commands support the following options:
|
|||||||
See the section BATCH BEHAVIOUR OPTIONS for more details\.
|
See the section BATCH BEHAVIOUR OPTIONS for more details\.
|
||||||
.
|
.
|
||||||
.IP
|
.IP
|
||||||
In addition, the data sources \fImysql\fR, \fIsqlite\fR, \fImssql\fR, \fIixf\fR and \fIdbf\fR all support the following settings:
|
In addition, the following settings are available:
|
||||||
.
|
.
|
||||||
.IP "\(bu" 4
|
.IP "\(bu" 4
|
||||||
\fIworkers = W\fR
|
\fIworkers = W\fR
|
||||||
|
@ -550,8 +550,7 @@ Some clauses are common to all commands:
|
|||||||
|
|
||||||
See the section BATCH BEHAVIOUR OPTIONS for more details.
|
See the section BATCH BEHAVIOUR OPTIONS for more details.
|
||||||
|
|
||||||
In addition, the data sources *mysql*, *sqlite*, *mssql*, *ixf* and
|
In addition, the following settings are available:
|
||||||
*dbf* all support the following settings:
|
|
||||||
|
|
||||||
- *workers = W*
|
- *workers = W*
|
||||||
- *concurrency = C*
|
- *concurrency = C*
|
||||||
|
@ -567,7 +567,7 @@
|
|||||||
(lisp-code-for-loading-from-copy source fields target
|
(lisp-code-for-loading-from-copy source fields target
|
||||||
:encoding (or encoding :default)
|
:encoding (or encoding :default)
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:copy-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
@ -575,7 +575,7 @@
|
|||||||
(lisp-code-for-loading-from-fixed source fields target
|
(lisp-code-for-loading-from-fixed source fields target
|
||||||
:encoding encoding
|
:encoding encoding
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:fixed-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
@ -583,21 +583,21 @@
|
|||||||
(lisp-code-for-loading-from-csv source fields target
|
(lisp-code-for-loading-from-csv source fields target
|
||||||
:encoding encoding
|
:encoding encoding
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:csv-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
(dbf-connection
|
(dbf-connection
|
||||||
(lisp-code-for-loading-from-dbf source target
|
(lisp-code-for-loading-from-dbf source target
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:dbf-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
(ixf-connection
|
(ixf-connection
|
||||||
(lisp-code-for-loading-from-ixf source target
|
(lisp-code-for-loading-from-ixf source target
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:ixf-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
@ -605,7 +605,7 @@
|
|||||||
(lisp-code-for-loading-from-sqlite source target
|
(lisp-code-for-loading-from-sqlite source target
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:casts casts
|
:casts casts
|
||||||
:sqlite-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
@ -613,7 +613,7 @@
|
|||||||
(lisp-code-for-loading-from-mysql source target
|
(lisp-code-for-loading-from-mysql source target
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:casts casts
|
:casts casts
|
||||||
:mysql-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))
|
:after after))
|
||||||
|
|
||||||
@ -621,7 +621,7 @@
|
|||||||
(lisp-code-for-loading-from-mssql source target
|
(lisp-code-for-loading-from-mssql source target
|
||||||
:gucs gucs
|
:gucs gucs
|
||||||
:casts casts
|
:casts casts
|
||||||
:mssql-options options
|
:options options
|
||||||
:before before
|
:before before
|
||||||
:after after))))
|
:after after))))
|
||||||
:start-logger nil)))
|
:start-logger nil)))
|
||||||
|
@ -438,7 +438,7 @@
|
|||||||
#:md-spec
|
#:md-spec
|
||||||
#:md-strm
|
#:md-strm
|
||||||
#:expand-spec
|
#:expand-spec
|
||||||
#:open-next-stream
|
#:clone-copy-for
|
||||||
|
|
||||||
;; the db-methods
|
;; the db-methods
|
||||||
#:fetch-metadata
|
#:fetch-metadata
|
||||||
@ -451,7 +451,6 @@
|
|||||||
;; file based utils for CSV, fixed etc
|
;; file based utils for CSV, fixed etc
|
||||||
#:with-open-file-or-stream
|
#:with-open-file-or-stream
|
||||||
#:get-pathname
|
#:get-pathname
|
||||||
#:get-absolute-pathname
|
|
||||||
#:project-fields
|
#:project-fields
|
||||||
#:reformat-then-process
|
#:reformat-then-process
|
||||||
|
|
||||||
|
@ -33,7 +33,9 @@
|
|||||||
(defrule option-null (and kw-null quoted-string)
|
(defrule option-null (and kw-null quoted-string)
|
||||||
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
|
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
|
||||||
|
|
||||||
(defrule copy-option (or option-batch-rows
|
(defrule copy-option (or option-workers
|
||||||
|
option-concurrency
|
||||||
|
option-batch-rows
|
||||||
option-batch-size
|
option-batch-size
|
||||||
option-batch-concurrency
|
option-batch-concurrency
|
||||||
option-truncate
|
option-truncate
|
||||||
@ -43,19 +45,9 @@
|
|||||||
option-delimiter
|
option-delimiter
|
||||||
option-null))
|
option-null))
|
||||||
|
|
||||||
(defrule another-copy-option (and comma copy-option)
|
(defrule copy-options (and kw-with
|
||||||
(:lambda (source)
|
(and copy-option (* (and comma copy-option))))
|
||||||
(bind (((_ option) source)) option)))
|
(:function flatten-option-list))
|
||||||
|
|
||||||
(defrule copy-option-list (and copy-option (* another-copy-option))
|
|
||||||
(:lambda (source)
|
|
||||||
(destructuring-bind (opt1 opts) source
|
|
||||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
|
||||||
|
|
||||||
(defrule copy-options (and kw-with copy-option-list)
|
|
||||||
(:lambda (source)
|
|
||||||
(bind (((_ opts) source))
|
|
||||||
(cons :copy-options opts))))
|
|
||||||
|
|
||||||
(defrule copy-uri (and "copy://" filename)
|
(defrule copy-uri (and "copy://" filename)
|
||||||
(:lambda (source)
|
(:lambda (source)
|
||||||
@ -112,8 +104,10 @@
|
|||||||
&key
|
&key
|
||||||
(encoding :utf-8)
|
(encoding :utf-8)
|
||||||
columns
|
columns
|
||||||
gucs before after
|
gucs before after options
|
||||||
((:copy-options options)))
|
&aux
|
||||||
|
(worker-count (getf options :worker-count))
|
||||||
|
(concurrency (getf options :concurrency)))
|
||||||
`(lambda ()
|
`(lambda ()
|
||||||
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||||
,@(batch-control-bindings options)
|
,@(batch-control-bindings options)
|
||||||
@ -136,10 +130,16 @@
|
|||||||
:fields ',fields
|
:fields ',fields
|
||||||
:columns ',columns
|
:columns ',columns
|
||||||
,@(remove-batch-control-option
|
,@(remove-batch-control-option
|
||||||
options :extras '(:truncate
|
options :extras '(:worker-count
|
||||||
|
:concurrency
|
||||||
|
:truncate
|
||||||
:drop-indexes
|
:drop-indexes
|
||||||
:disable-triggers)))))
|
:disable-triggers)))))
|
||||||
(pgloader.sources:copy-database source
|
(pgloader.sources:copy-database source
|
||||||
|
,@ (when worker-count
|
||||||
|
(list :worker-count worker-count))
|
||||||
|
,@ (when concurrency
|
||||||
|
(list :concurrency concurrency))
|
||||||
:truncate truncate
|
:truncate truncate
|
||||||
:drop-indexes drop-indexes
|
:drop-indexes drop-indexes
|
||||||
:disable-triggers disable-triggers))
|
:disable-triggers disable-triggers))
|
||||||
@ -149,7 +149,7 @@
|
|||||||
(defrule load-copy-file load-copy-file-command
|
(defrule load-copy-file load-copy-file-command
|
||||||
(:lambda (command)
|
(:lambda (command)
|
||||||
(bind (((source encoding fields pg-db-uri columns
|
(bind (((source encoding fields pg-db-uri columns
|
||||||
&key ((:copy-options options)) gucs before after) command))
|
&key options gucs before after) command))
|
||||||
(cond (*dry-run*
|
(cond (*dry-run*
|
||||||
(lisp-code-for-csv-dry-run pg-db-uri))
|
(lisp-code-for-csv-dry-run pg-db-uri))
|
||||||
(t
|
(t
|
||||||
@ -159,4 +159,4 @@
|
|||||||
:gucs gucs
|
:gucs gucs
|
||||||
:before before
|
:before before
|
||||||
:after after
|
:after after
|
||||||
:copy-options options))))))
|
:options options))))))
|
||||||
|
@ -103,7 +103,9 @@
|
|||||||
(bind (((_ _ _ escape-mode) term))
|
(bind (((_ _ _ escape-mode) term))
|
||||||
(cons :escape-mode escape-mode))))
|
(cons :escape-mode escape-mode))))
|
||||||
|
|
||||||
(defrule csv-option (or option-batch-rows
|
(defrule csv-option (or option-workers
|
||||||
|
option-concurrency
|
||||||
|
option-batch-rows
|
||||||
option-batch-size
|
option-batch-size
|
||||||
option-batch-concurrency
|
option-batch-concurrency
|
||||||
option-truncate
|
option-truncate
|
||||||
@ -120,19 +122,9 @@
|
|||||||
option-keep-unquoted-blanks
|
option-keep-unquoted-blanks
|
||||||
option-csv-escape-mode))
|
option-csv-escape-mode))
|
||||||
|
|
||||||
(defrule another-csv-option (and comma csv-option)
|
(defrule csv-options (and kw-with
|
||||||
(:lambda (source)
|
(and csv-option (* (and comma csv-option))))
|
||||||
(bind (((_ option) source)) option)))
|
(:function flatten-option-list))
|
||||||
|
|
||||||
(defrule csv-option-list (and csv-option (* another-csv-option))
|
|
||||||
(:lambda (source)
|
|
||||||
(destructuring-bind (opt1 opts) source
|
|
||||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
|
||||||
|
|
||||||
(defrule csv-options (and kw-with csv-option-list)
|
|
||||||
(:lambda (source)
|
|
||||||
(bind (((_ opts) source))
|
|
||||||
(cons :csv-options opts))))
|
|
||||||
|
|
||||||
;;
|
;;
|
||||||
;; CSV per-field reading options
|
;; CSV per-field reading options
|
||||||
@ -414,8 +406,10 @@
|
|||||||
&key
|
&key
|
||||||
(encoding :utf-8)
|
(encoding :utf-8)
|
||||||
columns
|
columns
|
||||||
gucs before after
|
gucs before after options
|
||||||
((:csv-options options)))
|
&aux
|
||||||
|
(worker-count (getf options :worker-count))
|
||||||
|
(concurrency (getf options :concurrency)))
|
||||||
`(lambda ()
|
`(lambda ()
|
||||||
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||||
,@(batch-control-bindings options)
|
,@(batch-control-bindings options)
|
||||||
@ -438,10 +432,16 @@
|
|||||||
:fields ',fields
|
:fields ',fields
|
||||||
:columns ',columns
|
:columns ',columns
|
||||||
,@(remove-batch-control-option
|
,@(remove-batch-control-option
|
||||||
options :extras '(:truncate
|
options :extras '(:worker-count
|
||||||
|
:concurrency
|
||||||
|
:truncate
|
||||||
:drop-indexes
|
:drop-indexes
|
||||||
:disable-triggers)))))
|
:disable-triggers)))))
|
||||||
(pgloader.sources:copy-database source
|
(pgloader.sources:copy-database source
|
||||||
|
,@ (when worker-count
|
||||||
|
(list :worker-count worker-count))
|
||||||
|
,@ (when concurrency
|
||||||
|
(list :concurrency concurrency))
|
||||||
:truncate truncate
|
:truncate truncate
|
||||||
:drop-indexes drop-indexes
|
:drop-indexes drop-indexes
|
||||||
:disable-triggers disable-triggers))
|
:disable-triggers disable-triggers))
|
||||||
@ -451,7 +451,7 @@
|
|||||||
(defrule load-csv-file load-csv-file-command
|
(defrule load-csv-file load-csv-file-command
|
||||||
(:lambda (command)
|
(:lambda (command)
|
||||||
(bind (((source encoding fields pg-db-uri columns
|
(bind (((source encoding fields pg-db-uri columns
|
||||||
&key ((:csv-options options)) gucs before after) command))
|
&key options gucs before after) command))
|
||||||
(cond (*dry-run*
|
(cond (*dry-run*
|
||||||
(lisp-code-for-csv-dry-run pg-db-uri))
|
(lisp-code-for-csv-dry-run pg-db-uri))
|
||||||
(t
|
(t
|
||||||
@ -461,4 +461,4 @@
|
|||||||
:gucs gucs
|
:gucs gucs
|
||||||
:before before
|
:before before
|
||||||
:after after
|
:after after
|
||||||
:csv-options options))))))
|
:options options))))))
|
||||||
|
@ -43,7 +43,9 @@
|
|||||||
(:lambda (source)
|
(:lambda (source)
|
||||||
(bind (((_ field-defs _) source)) field-defs)))
|
(bind (((_ field-defs _) source)) field-defs)))
|
||||||
|
|
||||||
(defrule fixed-option (or option-batch-rows
|
(defrule fixed-option (or option-workers
|
||||||
|
option-concurrency
|
||||||
|
option-batch-rows
|
||||||
option-batch-size
|
option-batch-size
|
||||||
option-batch-concurrency
|
option-batch-concurrency
|
||||||
option-truncate
|
option-truncate
|
||||||
@ -51,19 +53,9 @@
|
|||||||
option-disable-triggers
|
option-disable-triggers
|
||||||
option-skip-header))
|
option-skip-header))
|
||||||
|
|
||||||
(defrule another-fixed-option (and comma fixed-option)
|
(defrule fixed-options (and kw-with
|
||||||
(:lambda (source)
|
(and fixed-option (* (and comma fixed-option))))
|
||||||
(bind (((_ option) source)) option)))
|
(:function flatten-option-list))
|
||||||
|
|
||||||
(defrule fixed-option-list (and fixed-option (* another-fixed-option))
|
|
||||||
(:lambda (source)
|
|
||||||
(destructuring-bind (opt1 opts) source
|
|
||||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
|
||||||
|
|
||||||
(defrule fixed-options (and kw-with csv-option-list)
|
|
||||||
(:lambda (source)
|
|
||||||
(bind (((_ opts) source))
|
|
||||||
(cons :fixed-options opts))))
|
|
||||||
|
|
||||||
(defrule fixed-uri (and "fixed://" filename)
|
(defrule fixed-uri (and "fixed://" filename)
|
||||||
(:lambda (source)
|
(:lambda (source)
|
||||||
@ -120,8 +112,10 @@
|
|||||||
&key
|
&key
|
||||||
(encoding :utf-8)
|
(encoding :utf-8)
|
||||||
columns
|
columns
|
||||||
gucs before after
|
gucs before after options
|
||||||
((:fixed-options options)))
|
&aux
|
||||||
|
(worker-count (getf options :worker-count))
|
||||||
|
(concurrency (getf options :concurrency)))
|
||||||
`(lambda ()
|
`(lambda ()
|
||||||
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||||
,@(batch-control-bindings options)
|
,@(batch-control-bindings options)
|
||||||
@ -146,6 +140,10 @@
|
|||||||
:skip-lines ,(or (getf options :skip-line) 0))))
|
:skip-lines ,(or (getf options :skip-line) 0))))
|
||||||
|
|
||||||
(pgloader.sources:copy-database source
|
(pgloader.sources:copy-database source
|
||||||
|
,@ (when worker-count
|
||||||
|
(list :worker-count worker-count))
|
||||||
|
,@ (when concurrency
|
||||||
|
(list :concurrency concurrency))
|
||||||
:truncate truncate
|
:truncate truncate
|
||||||
:drop-indexes drop-indexes
|
:drop-indexes drop-indexes
|
||||||
:disable-triggers disable-triggers))
|
:disable-triggers disable-triggers))
|
||||||
@ -155,7 +153,7 @@
|
|||||||
(defrule load-fixed-cols-file load-fixed-cols-file-command
|
(defrule load-fixed-cols-file load-fixed-cols-file-command
|
||||||
(:lambda (command)
|
(:lambda (command)
|
||||||
(bind (((source encoding fields pg-db-uri columns
|
(bind (((source encoding fields pg-db-uri columns
|
||||||
&key ((:fixed-options options)) gucs before after) command))
|
&key options gucs before after) command))
|
||||||
(cond (*dry-run*
|
(cond (*dry-run*
|
||||||
(lisp-code-for-csv-dry-run pg-db-uri))
|
(lisp-code-for-csv-dry-run pg-db-uri))
|
||||||
(t
|
(t
|
||||||
@ -165,4 +163,4 @@
|
|||||||
:gucs gucs
|
:gucs gucs
|
||||||
:before before
|
:before before
|
||||||
:after after
|
:after after
|
||||||
:fixed-options options))))))
|
:options options))))))
|
||||||
|
@ -406,7 +406,8 @@
|
|||||||
|
|
||||||
(with-stats-collection ("Index Build Completion" :section section)
|
(with-stats-collection ("Index Build Completion" :section section)
|
||||||
(loop :repeat (count-indexes table)
|
(loop :repeat (count-indexes table)
|
||||||
:do (lp:receive-result idx-channel)))
|
:do (lp:receive-result idx-channel))
|
||||||
|
(lp:end-kernel :wait t))
|
||||||
|
|
||||||
;; turn unique indexes into pkeys now
|
;; turn unique indexes into pkeys now
|
||||||
(with-pgsql-connection (target)
|
(with-pgsql-connection (target)
|
||||||
|
@ -151,6 +151,8 @@
|
|||||||
(defgeneric process-rows (md-copy stream process-fn)
|
(defgeneric process-rows (md-copy stream process-fn)
|
||||||
(:documentation "Process rows from a given input stream."))
|
(:documentation "Process rows from a given input stream."))
|
||||||
|
|
||||||
|
(defgeneric clone-copy-for (md-copy path-spec)
|
||||||
|
(:documentation "Create a new instance for copying PATH-SPEC data."))
|
||||||
|
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
@ -188,4 +190,4 @@
|
|||||||
(:documentation "Alter load duties for database sources copy support."))
|
(:documentation "Alter load duties for database sources copy support."))
|
||||||
|
|
||||||
(defgeneric instanciate-table-copy-object (db-copy table)
|
(defgeneric instanciate-table-copy-object (db-copy table)
|
||||||
(:documentation "Create an new instance for copying TABLE data."))
|
(:documentation "Create a new instance for copying TABLE data."))
|
||||||
|
@ -12,7 +12,11 @@
|
|||||||
(print-unreadable-object (c stream :type t :identity t)
|
(print-unreadable-object (c stream :type t :identity t)
|
||||||
(with-slots (type spec) c
|
(with-slots (type spec) c
|
||||||
(let ((path (when (slot-boundp c 'path) (slot-value c 'path))))
|
(let ((path (when (slot-boundp c 'path) (slot-value c 'path))))
|
||||||
(format stream "~a://~a:~{~a~^,~}" type (first spec) path)))))
|
(etypecase path
|
||||||
|
(string
|
||||||
|
(format stream "~a://~a:~a" type (first spec) path))
|
||||||
|
(list
|
||||||
|
(format stream "~a://~a:~{~a~^,~}" type (first spec) path)))))))
|
||||||
|
|
||||||
(defmethod expand :after ((md md-connection))
|
(defmethod expand :after ((md md-connection))
|
||||||
"Expand the archive for the MD connection."
|
"Expand the archive for the MD connection."
|
||||||
@ -30,15 +34,12 @@
|
|||||||
(defgeneric expand-spec (md-connection)
|
(defgeneric expand-spec (md-connection)
|
||||||
(:documentation "Expand specification for an FD source."))
|
(:documentation "Expand specification for an FD source."))
|
||||||
|
|
||||||
(defgeneric open-next-stream (md-connection &rest args &key)
|
|
||||||
(:documentation "Open the next input stream from FD."))
|
|
||||||
|
|
||||||
(defmethod expand-spec ((md md-connection))
|
(defmethod expand-spec ((md md-connection))
|
||||||
"Given fd spec as a CONS of a source type and a tagged object, expand the
|
"Given fd spec as a CONS of a source type and a tagged object, expand the
|
||||||
tagged object depending on the source type and return a list of pathnames."
|
tagged object depending on the source type and return a list of pathnames."
|
||||||
(destructuring-bind (type &rest part) (md-spec md)
|
(destructuring-bind (type &rest part) (md-spec md)
|
||||||
(ecase type
|
(ecase type
|
||||||
(:inline (list (caar part)))
|
(:inline (list (md-spec md)))
|
||||||
(:stdin (list *standard-input*))
|
(:stdin (list *standard-input*))
|
||||||
(:regex (destructuring-bind (keep regex root) part
|
(:regex (destructuring-bind (keep regex root) part
|
||||||
(filter-directory regex
|
(filter-directory regex
|
||||||
@ -51,39 +52,31 @@
|
|||||||
(if (probe-file realname) (list realname)
|
(if (probe-file realname) (list realname)
|
||||||
(error "File does not exists: '~a'." realname)))))))
|
(error "File does not exists: '~a'." realname)))))))
|
||||||
|
|
||||||
(defmethod open-next-stream ((md md-connection)
|
(defmethod open-connection ((md md-connection)
|
||||||
&rest args
|
&rest args
|
||||||
&key &allow-other-keys)
|
&key &allow-other-keys)
|
||||||
"Switch to the following file in the PATH list."
|
"We know how to open several kinds of specs here, all that target a single
|
||||||
;; first close current stream
|
file as input. The multi-file specs must have been expanded before trying
|
||||||
(when (slot-boundp md 'strm)
|
to open the connection."
|
||||||
(close (md-strm md)))
|
|
||||||
|
|
||||||
;; now open the new one
|
|
||||||
(when (fd-path md)
|
(when (fd-path md)
|
||||||
(let ((current-path (pop (fd-path md))))
|
(log-message :notice "Opening ~s" (fd-path md)) ; info
|
||||||
(when current-path
|
(cond ;; inline
|
||||||
(log-message :info "Open ~s" current-path)
|
((and (listp (fd-path md))
|
||||||
(prog1
|
(eq :inline (first (fd-path md))))
|
||||||
(setf (md-strm md)
|
(destructuring-bind (filename . position)
|
||||||
(typecase current-path
|
(second (fd-path md))
|
||||||
(stream current-path)
|
;; open the filename with given extra args
|
||||||
(t (apply #'open current-path args))))
|
(setf (md-strm md) (apply #'open filename args))
|
||||||
;;
|
;; and position the stream as expected
|
||||||
;; The :inline md spec is a little special, it's a filename and a
|
(file-position (md-strm md) position)))
|
||||||
;; position where to skip to at opening the file. It allows for
|
|
||||||
;; easier self-contained tests.
|
|
||||||
;;
|
|
||||||
(when (eq :inline (car (md-spec md)))
|
|
||||||
(file-position (md-strm md) (cdadr (md-spec md)))))))))
|
|
||||||
|
|
||||||
(defmethod open-connection ((md md-connection) &key)
|
;; stdin
|
||||||
"The fd connection supports several specs to open a connection:
|
((streamp (fd-path md))
|
||||||
- if we have a path, that's a single file to open
|
(setf (md-strm md) (fd-path md)))
|
||||||
- otherwise spec is an CONS wherin
|
|
||||||
- car is the source type
|
;; other cases should be filenames
|
||||||
- cdr is an object suitable for `get-absolute-pathname`"
|
(t
|
||||||
(setf (fd-path md) (expand-spec md))
|
(setf (md-strm md) (apply #'open (fd-path md) args)))))
|
||||||
md)
|
md)
|
||||||
|
|
||||||
(defmethod close-connection ((md md-connection))
|
(defmethod close-connection ((md md-connection))
|
||||||
@ -91,8 +84,7 @@
|
|||||||
(when (and (slot-boundp md 'strm) (md-strm md))
|
(when (and (slot-boundp md 'strm) (md-strm md))
|
||||||
(close (md-strm md)))
|
(close (md-strm md)))
|
||||||
|
|
||||||
(setf (md-strm md) nil
|
(setf (md-strm md) nil))
|
||||||
(fd-path md) nil))
|
|
||||||
|
|
||||||
(defun get-pathname (dbname table-name &key (fd-path-root *fd-path-root*))
|
(defun get-pathname (dbname table-name &key (fd-path-root *fd-path-root*))
|
||||||
"Return a pathname where to read or write the file data"
|
"Return a pathname where to read or write the file data"
|
||||||
|
@ -18,16 +18,14 @@
|
|||||||
|
|
||||||
Finally returns how many rows where read and processed."
|
Finally returns how many rows where read and processed."
|
||||||
|
|
||||||
(with-connection (cnx (source copy))
|
(with-connection (cnx (source copy)
|
||||||
(loop :for input := (open-next-stream cnx
|
|
||||||
:direction :input
|
:direction :input
|
||||||
:external-format (encoding copy)
|
:external-format (encoding copy)
|
||||||
:if-does-not-exist nil)
|
:if-does-not-exist nil)
|
||||||
:while input
|
(let ((input (md-strm cnx)))
|
||||||
:do (progn
|
|
||||||
;; we handle skipping more than one line here, as cl-copy only knows
|
;; we handle skipping more than one line here, as cl-copy only knows
|
||||||
;; about skipping the first line
|
;; about skipping the first line
|
||||||
(loop repeat (skip-lines copy) do (read-line input nil nil))
|
(loop :repeat (skip-lines copy) :do (read-line input nil nil))
|
||||||
|
|
||||||
;; we might now have to read the fields from the header line
|
;; we might now have to read the fields from the header line
|
||||||
(when (header copy)
|
(when (header copy)
|
||||||
@ -37,7 +35,7 @@
|
|||||||
(log-message :debug "Parsed header columns ~s" (fields copy)))
|
(log-message :debug "Parsed header columns ~s" (fields copy)))
|
||||||
|
|
||||||
;; read in the text file, split it into columns
|
;; read in the text file, split it into columns
|
||||||
(process-rows copy input process-row-fn)))))
|
(process-rows copy input process-row-fn))))
|
||||||
|
|
||||||
(defmethod preprocess-row ((copy md-copy))
|
(defmethod preprocess-row ((copy md-copy))
|
||||||
"The file based readers possibly have extra work to do with user defined
|
"The file based readers possibly have extra work to do with user defined
|
||||||
@ -54,6 +52,23 @@
|
|||||||
(format nil "~s" (car col)))
|
(format nil "~s" (car col)))
|
||||||
(columns copy)))
|
(columns copy)))
|
||||||
|
|
||||||
|
(defmethod clone-copy-for ((copy md-copy) path-spec)
|
||||||
|
"Create a copy of CSV for loading data from PATH-SPEC."
|
||||||
|
(make-instance (class-of copy)
|
||||||
|
;; source-db is expected unbound here, so bypassed
|
||||||
|
:target-db (clone-connection (target-db copy))
|
||||||
|
:source (make-instance (class-of (source copy))
|
||||||
|
:spec (md-spec (source copy))
|
||||||
|
:type (conn-type (source copy))
|
||||||
|
:path path-spec)
|
||||||
|
:target (target copy)
|
||||||
|
:fields (fields copy)
|
||||||
|
:columns (columns copy)
|
||||||
|
:transforms (transforms copy)
|
||||||
|
:encoding (encoding copy)
|
||||||
|
:skip-lines (skip-lines copy)
|
||||||
|
:header (header copy)))
|
||||||
|
|
||||||
(defmethod copy-database ((copy md-copy)
|
(defmethod copy-database ((copy md-copy)
|
||||||
&key
|
&key
|
||||||
truncate
|
truncate
|
||||||
@ -61,8 +76,8 @@
|
|||||||
drop-indexes
|
drop-indexes
|
||||||
|
|
||||||
;; generic API, but ignored here
|
;; generic API, but ignored here
|
||||||
(worker-count 8)
|
(worker-count 4)
|
||||||
(concurrency 2)
|
(concurrency 1)
|
||||||
|
|
||||||
data-only
|
data-only
|
||||||
schema-only
|
schema-only
|
||||||
@ -86,11 +101,42 @@
|
|||||||
(target copy)
|
(target copy)
|
||||||
:drop-indexes drop-indexes)
|
:drop-indexes drop-indexes)
|
||||||
|
|
||||||
(copy-from copy
|
;; ensure we truncate only one
|
||||||
:worker-count worker-count
|
(when truncate
|
||||||
|
(truncate-tables (clone-connection (target-db copy)) (target copy)))
|
||||||
|
|
||||||
|
;; expand the specs of our source, we might have to care about several
|
||||||
|
;; files actually.
|
||||||
|
(let* ((lp:*kernel* (make-kernel worker-count))
|
||||||
|
(channel (lp:make-channel))
|
||||||
|
(path-list (expand-spec (source copy))))
|
||||||
|
(loop :for path-spec :in path-list
|
||||||
|
:do (let ((table-source (clone-copy-for copy path-spec)))
|
||||||
|
(copy-from table-source
|
||||||
:concurrency concurrency
|
:concurrency concurrency
|
||||||
:truncate truncate
|
:kernel lp:*kernel*
|
||||||
:disable-triggers disable-triggers)
|
:channel channel
|
||||||
|
:truncate nil
|
||||||
|
:disable-triggers disable-triggers)))
|
||||||
|
|
||||||
|
;; end kernel
|
||||||
|
(with-stats-collection ("COPY Threads Completion" :section :post
|
||||||
|
:use-result-as-read t
|
||||||
|
:use-result-as-rows t)
|
||||||
|
(let ((worker-count (* (length path-list)
|
||||||
|
(task-count concurrency))))
|
||||||
|
(loop :for tasks :below worker-count
|
||||||
|
:do (destructuring-bind (task table seconds)
|
||||||
|
(lp:receive-result channel)
|
||||||
|
(log-message :debug
|
||||||
|
"Finished processing ~a for ~s ~50T~6$s"
|
||||||
|
task (format-table-name table) seconds)
|
||||||
|
(when (eq :writer task)
|
||||||
|
(update-stats :data table :secs seconds))))
|
||||||
|
(prog1
|
||||||
|
worker-count
|
||||||
|
(lp:end-kernel :wait nil))))
|
||||||
|
(lp:end-kernel :wait t))
|
||||||
|
|
||||||
;; re-create the indexes from the target table entry
|
;; re-create the indexes from the target table entry
|
||||||
(create-indexes-again (target-db copy)
|
(create-indexes-again (target-db copy)
|
||||||
|
@ -180,5 +180,8 @@
|
|||||||
|
|
||||||
;; now wait until both the tasks are over, and kill the kernel
|
;; now wait until both the tasks are over, and kill the kernel
|
||||||
(unless c-s-p
|
(unless c-s-p
|
||||||
|
(log-message :debug "waiting for ~d tasks" (task-count concurrency))
|
||||||
|
(loop :repeat (task-count concurrency)
|
||||||
|
:do (lp:receive-result channel))
|
||||||
(log-message :info "COPY ~s done." table-name)
|
(log-message :info "COPY ~s done." table-name)
|
||||||
(unless k-s-p (lp:end-kernel :wait t)))))))
|
(unless k-s-p (lp:end-kernel :wait t)))))))
|
||||||
|
@ -10,12 +10,7 @@
|
|||||||
(setf (slot-value copy 'type) "copy"))
|
(setf (slot-value copy 'type) "copy"))
|
||||||
|
|
||||||
(defclass copy-copy (md-copy)
|
(defclass copy-copy (md-copy)
|
||||||
((encoding :accessor encoding ; file encoding
|
((delimiter :accessor delimiter ; see COPY options for TEXT
|
||||||
:initarg :encoding) ;
|
|
||||||
(skip-lines :accessor skip-lines ; we might want to skip COPY lines
|
|
||||||
:initarg :skip-lines ;
|
|
||||||
:initform 0) ;
|
|
||||||
(delimiter :accessor delimiter ; see COPY options for TEXT
|
|
||||||
:initarg :delimiter ; in PostgreSQL docs
|
:initarg :delimiter ; in PostgreSQL docs
|
||||||
:initform #\Tab)
|
:initform #\Tab)
|
||||||
(null-as :accessor null-as
|
(null-as :accessor null-as
|
||||||
@ -35,6 +30,18 @@
|
|||||||
(unless transforms
|
(unless transforms
|
||||||
(setf (slot-value copy 'transforms) (make-list (length columns))))))
|
(setf (slot-value copy 'transforms) (make-list (length columns))))))
|
||||||
|
|
||||||
|
(defmethod clone-copy-for ((copy copy-copy) path-spec)
|
||||||
|
"Create a copy of FIXED for loading data from PATH-SPEC."
|
||||||
|
(let ((copy-for-path-spec
|
||||||
|
(change-class (call-next-method copy path-spec) 'copy-copy)))
|
||||||
|
(loop :for slot-name :in '(delimiter null-as)
|
||||||
|
:do (when (slot-boundp copy slot-name)
|
||||||
|
(setf (slot-value copy-for-path-spec slot-name)
|
||||||
|
(slot-value copy slot-name))))
|
||||||
|
|
||||||
|
;; return the new instance!
|
||||||
|
copy-for-path-spec))
|
||||||
|
|
||||||
(declaim (inline parse-row))
|
(declaim (inline parse-row))
|
||||||
|
|
||||||
(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N"))
|
(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N"))
|
||||||
|
@ -48,6 +48,24 @@
|
|||||||
(unless transforms
|
(unless transforms
|
||||||
(setf (slot-value csv 'transforms) (make-list (length columns))))))
|
(setf (slot-value csv 'transforms) (make-list (length columns))))))
|
||||||
|
|
||||||
|
(defmethod clone-copy-for ((csv copy-csv) path-spec)
|
||||||
|
"Create a copy of CSV for loading data from PATH-SPEC."
|
||||||
|
(let ((csv-for-path-spec
|
||||||
|
(change-class (call-next-method csv path-spec) 'copy-csv)))
|
||||||
|
(loop :for slot-name :in '(source-type
|
||||||
|
separator
|
||||||
|
newline
|
||||||
|
quote
|
||||||
|
escape
|
||||||
|
escape-mode
|
||||||
|
trim-blanks)
|
||||||
|
:do (when (slot-boundp csv slot-name)
|
||||||
|
(setf (slot-value csv-for-path-spec slot-name)
|
||||||
|
(slot-value csv slot-name))))
|
||||||
|
|
||||||
|
;; return the new instance!
|
||||||
|
csv-for-path-spec))
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
;;; Read a file format in CSV format, and call given function on each line.
|
;;; Read a file format in CSV format, and call given function on each line.
|
||||||
;;;
|
;;;
|
||||||
|
@ -30,6 +30,10 @@
|
|||||||
(unless transforms
|
(unless transforms
|
||||||
(setf (slot-value fixed 'transforms) (make-list (length columns))))))
|
(setf (slot-value fixed 'transforms) (make-list (length columns))))))
|
||||||
|
|
||||||
|
(defmethod clone-copy-for ((fixed copy-fixed) path-spec)
|
||||||
|
"Create a copy of FIXED for loading data from PATH-SPEC."
|
||||||
|
(change-class (call-next-method fixed path-spec) 'copy-fixed))
|
||||||
|
|
||||||
(declaim (inline parse-row))
|
(declaim (inline parse-row))
|
||||||
|
|
||||||
(defun parse-row (fixed-cols-specs line)
|
(defun parse-row (fixed-cols-specs line)
|
||||||
|
@ -133,7 +133,8 @@
|
|||||||
;;;
|
;;;
|
||||||
;;; Tools for every connection classes
|
;;; Tools for every connection classes
|
||||||
;;;
|
;;;
|
||||||
(defmacro with-connection ((var connection) &body forms)
|
(defmacro with-connection ((var connection &rest args &key &allow-other-keys)
|
||||||
|
&body forms)
|
||||||
"Connect to DB-CONNECTION and handle any condition when doing so, and when
|
"Connect to DB-CONNECTION and handle any condition when doing so, and when
|
||||||
connected execute FORMS in a protected way so that we always disconnect
|
connected execute FORMS in a protected way so that we always disconnect
|
||||||
at the end."
|
at the end."
|
||||||
@ -147,7 +148,7 @@
|
|||||||
#'(lambda (w)
|
#'(lambda (w)
|
||||||
(log-message :warning "~a" w)
|
(log-message :warning "~a" w)
|
||||||
(muffle-warning))))
|
(muffle-warning))))
|
||||||
(open-connection ,conn))
|
(apply #'open-connection ,conn (list ,@args)))
|
||||||
(condition (e)
|
(condition (e)
|
||||||
(cond ((typep ,connection 'fd-connection)
|
(cond ((typep ,connection 'fd-connection)
|
||||||
(error 'fd-connection-error
|
(error 'fd-connection-error
|
||||||
|
@ -3,9 +3,12 @@ load csv
|
|||||||
in directory 'data'
|
in directory 'data'
|
||||||
having fields (id, field)
|
having fields (id, field)
|
||||||
into postgres:///pgloader?matching
|
into postgres:///pgloader?matching
|
||||||
|
|
||||||
with fields optionally enclosed by '"',
|
with fields optionally enclosed by '"',
|
||||||
fields terminated by ',',
|
fields terminated by ',',
|
||||||
truncate
|
truncate,
|
||||||
|
workers = 8,
|
||||||
|
concurrency = 1
|
||||||
|
|
||||||
before load do
|
before load do
|
||||||
$$ drop table if exists matching; $$,
|
$$ drop table if exists matching; $$,
|
||||||
|
Loading…
Reference in New Issue
Block a user