Implement concurrency and workers for files sources.

More than the syntax and API tweaks, this patch also make it so that a
multi-file specification (using e.g. ALL FILENAMES IN DIRECTORY) can be
loaded with several files in the group in parallel.

To that effect, tweak again the md-connection and md-copy
implementations.
This commit is contained in:
Dimitri Fontaine 2016-01-16 22:53:55 +01:00
parent aa8b756315
commit 7dd69a11e1
17 changed files with 216 additions and 143 deletions

View File

@ -622,7 +622,7 @@ All data sources specific commands support the following options:
See the section BATCH BEHAVIOUR OPTIONS for more details\. See the section BATCH BEHAVIOUR OPTIONS for more details\.
. .
.IP .IP
In addition, the data sources \fImysql\fR, \fIsqlite\fR, \fImssql\fR, \fIixf\fR and \fIdbf\fR all support the following settings: In addition, the following settings are available:
. .
.IP "\(bu" 4 .IP "\(bu" 4
\fIworkers = W\fR \fIworkers = W\fR

View File

@ -550,8 +550,7 @@ Some clauses are common to all commands:
See the section BATCH BEHAVIOUR OPTIONS for more details. See the section BATCH BEHAVIOUR OPTIONS for more details.
In addition, the data sources *mysql*, *sqlite*, *mssql*, *ixf* and In addition, the following settings are available:
*dbf* all support the following settings:
- *workers = W* - *workers = W*
- *concurrency = C* - *concurrency = C*

View File

@ -567,7 +567,7 @@
(lisp-code-for-loading-from-copy source fields target (lisp-code-for-loading-from-copy source fields target
:encoding (or encoding :default) :encoding (or encoding :default)
:gucs gucs :gucs gucs
:copy-options options :options options
:before before :before before
:after after)) :after after))
@ -575,7 +575,7 @@
(lisp-code-for-loading-from-fixed source fields target (lisp-code-for-loading-from-fixed source fields target
:encoding encoding :encoding encoding
:gucs gucs :gucs gucs
:fixed-options options :options options
:before before :before before
:after after)) :after after))
@ -583,21 +583,21 @@
(lisp-code-for-loading-from-csv source fields target (lisp-code-for-loading-from-csv source fields target
:encoding encoding :encoding encoding
:gucs gucs :gucs gucs
:csv-options options :options options
:before before :before before
:after after)) :after after))
(dbf-connection (dbf-connection
(lisp-code-for-loading-from-dbf source target (lisp-code-for-loading-from-dbf source target
:gucs gucs :gucs gucs
:dbf-options options :options options
:before before :before before
:after after)) :after after))
(ixf-connection (ixf-connection
(lisp-code-for-loading-from-ixf source target (lisp-code-for-loading-from-ixf source target
:gucs gucs :gucs gucs
:ixf-options options :options options
:before before :before before
:after after)) :after after))
@ -605,7 +605,7 @@
(lisp-code-for-loading-from-sqlite source target (lisp-code-for-loading-from-sqlite source target
:gucs gucs :gucs gucs
:casts casts :casts casts
:sqlite-options options :options options
:before before :before before
:after after)) :after after))
@ -613,7 +613,7 @@
(lisp-code-for-loading-from-mysql source target (lisp-code-for-loading-from-mysql source target
:gucs gucs :gucs gucs
:casts casts :casts casts
:mysql-options options :options options
:before before :before before
:after after)) :after after))
@ -621,7 +621,7 @@
(lisp-code-for-loading-from-mssql source target (lisp-code-for-loading-from-mssql source target
:gucs gucs :gucs gucs
:casts casts :casts casts
:mssql-options options :options options
:before before :before before
:after after)))) :after after))))
:start-logger nil))) :start-logger nil)))

View File

@ -438,7 +438,7 @@
#:md-spec #:md-spec
#:md-strm #:md-strm
#:expand-spec #:expand-spec
#:open-next-stream #:clone-copy-for
;; the db-methods ;; the db-methods
#:fetch-metadata #:fetch-metadata
@ -451,7 +451,6 @@
;; file based utils for CSV, fixed etc ;; file based utils for CSV, fixed etc
#:with-open-file-or-stream #:with-open-file-or-stream
#:get-pathname #:get-pathname
#:get-absolute-pathname
#:project-fields #:project-fields
#:reformat-then-process #:reformat-then-process

View File

@ -33,7 +33,9 @@
(defrule option-null (and kw-null quoted-string) (defrule option-null (and kw-null quoted-string)
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null))) (:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
(defrule copy-option (or option-batch-rows (defrule copy-option (or option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-batch-concurrency option-batch-concurrency
option-truncate option-truncate
@ -43,19 +45,9 @@
option-delimiter option-delimiter
option-null)) option-null))
(defrule another-copy-option (and comma copy-option) (defrule copy-options (and kw-with
(:lambda (source) (and copy-option (* (and comma copy-option))))
(bind (((_ option) source)) option))) (:function flatten-option-list))
(defrule copy-option-list (and copy-option (* another-copy-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule copy-options (and kw-with copy-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :copy-options opts))))
(defrule copy-uri (and "copy://" filename) (defrule copy-uri (and "copy://" filename)
(:lambda (source) (:lambda (source)
@ -112,8 +104,10 @@
&key &key
(encoding :utf-8) (encoding :utf-8)
columns columns
gucs before after gucs before after options
((:copy-options options))) &aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* (,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
@ -136,10 +130,16 @@
:fields ',fields :fields ',fields
:columns ',columns :columns ',columns
,@(remove-batch-control-option ,@(remove-batch-control-option
options :extras '(:truncate options :extras '(:worker-count
:concurrency
:truncate
:drop-indexes :drop-indexes
:disable-triggers))))) :disable-triggers)))))
(pgloader.sources:copy-database source (pgloader.sources:copy-database source
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:truncate truncate :truncate truncate
:drop-indexes drop-indexes :drop-indexes drop-indexes
:disable-triggers disable-triggers)) :disable-triggers disable-triggers))
@ -149,7 +149,7 @@
(defrule load-copy-file load-copy-file-command (defrule load-copy-file load-copy-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri columns (bind (((source encoding fields pg-db-uri columns
&key ((:copy-options options)) gucs before after) command)) &key options gucs before after) command))
(cond (*dry-run* (cond (*dry-run*
(lisp-code-for-csv-dry-run pg-db-uri)) (lisp-code-for-csv-dry-run pg-db-uri))
(t (t
@ -159,4 +159,4 @@
:gucs gucs :gucs gucs
:before before :before before
:after after :after after
:copy-options options)))))) :options options))))))

View File

@ -103,7 +103,9 @@
(bind (((_ _ _ escape-mode) term)) (bind (((_ _ _ escape-mode) term))
(cons :escape-mode escape-mode)))) (cons :escape-mode escape-mode))))
(defrule csv-option (or option-batch-rows (defrule csv-option (or option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-batch-concurrency option-batch-concurrency
option-truncate option-truncate
@ -120,19 +122,9 @@
option-keep-unquoted-blanks option-keep-unquoted-blanks
option-csv-escape-mode)) option-csv-escape-mode))
(defrule another-csv-option (and comma csv-option) (defrule csv-options (and kw-with
(:lambda (source) (and csv-option (* (and comma csv-option))))
(bind (((_ option) source)) option))) (:function flatten-option-list))
(defrule csv-option-list (and csv-option (* another-csv-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule csv-options (and kw-with csv-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :csv-options opts))))
;; ;;
;; CSV per-field reading options ;; CSV per-field reading options
@ -414,8 +406,10 @@
&key &key
(encoding :utf-8) (encoding :utf-8)
columns columns
gucs before after gucs before after options
((:csv-options options))) &aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* (,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
@ -438,10 +432,16 @@
:fields ',fields :fields ',fields
:columns ',columns :columns ',columns
,@(remove-batch-control-option ,@(remove-batch-control-option
options :extras '(:truncate options :extras '(:worker-count
:concurrency
:truncate
:drop-indexes :drop-indexes
:disable-triggers))))) :disable-triggers)))))
(pgloader.sources:copy-database source (pgloader.sources:copy-database source
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:truncate truncate :truncate truncate
:drop-indexes drop-indexes :drop-indexes drop-indexes
:disable-triggers disable-triggers)) :disable-triggers disable-triggers))
@ -451,7 +451,7 @@
(defrule load-csv-file load-csv-file-command (defrule load-csv-file load-csv-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri columns (bind (((source encoding fields pg-db-uri columns
&key ((:csv-options options)) gucs before after) command)) &key options gucs before after) command))
(cond (*dry-run* (cond (*dry-run*
(lisp-code-for-csv-dry-run pg-db-uri)) (lisp-code-for-csv-dry-run pg-db-uri))
(t (t
@ -461,4 +461,4 @@
:gucs gucs :gucs gucs
:before before :before before
:after after :after after
:csv-options options)))))) :options options))))))

View File

@ -43,7 +43,9 @@
(:lambda (source) (:lambda (source)
(bind (((_ field-defs _) source)) field-defs))) (bind (((_ field-defs _) source)) field-defs)))
(defrule fixed-option (or option-batch-rows (defrule fixed-option (or option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-batch-concurrency option-batch-concurrency
option-truncate option-truncate
@ -51,19 +53,9 @@
option-disable-triggers option-disable-triggers
option-skip-header)) option-skip-header))
(defrule another-fixed-option (and comma fixed-option) (defrule fixed-options (and kw-with
(:lambda (source) (and fixed-option (* (and comma fixed-option))))
(bind (((_ option) source)) option))) (:function flatten-option-list))
(defrule fixed-option-list (and fixed-option (* another-fixed-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule fixed-options (and kw-with csv-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :fixed-options opts))))
(defrule fixed-uri (and "fixed://" filename) (defrule fixed-uri (and "fixed://" filename)
(:lambda (source) (:lambda (source)
@ -120,8 +112,10 @@
&key &key
(encoding :utf-8) (encoding :utf-8)
columns columns
gucs before after gucs before after options
((:fixed-options options))) &aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* (,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
@ -146,6 +140,10 @@
:skip-lines ,(or (getf options :skip-line) 0)))) :skip-lines ,(or (getf options :skip-line) 0))))
(pgloader.sources:copy-database source (pgloader.sources:copy-database source
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:truncate truncate :truncate truncate
:drop-indexes drop-indexes :drop-indexes drop-indexes
:disable-triggers disable-triggers)) :disable-triggers disable-triggers))
@ -155,7 +153,7 @@
(defrule load-fixed-cols-file load-fixed-cols-file-command (defrule load-fixed-cols-file load-fixed-cols-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri columns (bind (((source encoding fields pg-db-uri columns
&key ((:fixed-options options)) gucs before after) command)) &key options gucs before after) command))
(cond (*dry-run* (cond (*dry-run*
(lisp-code-for-csv-dry-run pg-db-uri)) (lisp-code-for-csv-dry-run pg-db-uri))
(t (t
@ -165,4 +163,4 @@
:gucs gucs :gucs gucs
:before before :before before
:after after :after after
:fixed-options options)))))) :options options))))))

View File

@ -406,7 +406,8 @@
(with-stats-collection ("Index Build Completion" :section section) (with-stats-collection ("Index Build Completion" :section section)
(loop :repeat (count-indexes table) (loop :repeat (count-indexes table)
:do (lp:receive-result idx-channel))) :do (lp:receive-result idx-channel))
(lp:end-kernel :wait t))
;; turn unique indexes into pkeys now ;; turn unique indexes into pkeys now
(with-pgsql-connection (target) (with-pgsql-connection (target)

View File

@ -151,6 +151,8 @@
(defgeneric process-rows (md-copy stream process-fn) (defgeneric process-rows (md-copy stream process-fn)
(:documentation "Process rows from a given input stream.")) (:documentation "Process rows from a given input stream."))
(defgeneric clone-copy-for (md-copy path-spec)
(:documentation "Create a new instance for copying PATH-SPEC data."))
;;; ;;;
@ -188,4 +190,4 @@
(:documentation "Alter load duties for database sources copy support.")) (:documentation "Alter load duties for database sources copy support."))
(defgeneric instanciate-table-copy-object (db-copy table) (defgeneric instanciate-table-copy-object (db-copy table)
(:documentation "Create an new instance for copying TABLE data.")) (:documentation "Create a new instance for copying TABLE data."))

View File

@ -12,7 +12,11 @@
(print-unreadable-object (c stream :type t :identity t) (print-unreadable-object (c stream :type t :identity t)
(with-slots (type spec) c (with-slots (type spec) c
(let ((path (when (slot-boundp c 'path) (slot-value c 'path)))) (let ((path (when (slot-boundp c 'path) (slot-value c 'path))))
(format stream "~a://~a:~{~a~^,~}" type (first spec) path))))) (etypecase path
(string
(format stream "~a://~a:~a" type (first spec) path))
(list
(format stream "~a://~a:~{~a~^,~}" type (first spec) path)))))))
(defmethod expand :after ((md md-connection)) (defmethod expand :after ((md md-connection))
"Expand the archive for the MD connection." "Expand the archive for the MD connection."
@ -30,15 +34,12 @@
(defgeneric expand-spec (md-connection) (defgeneric expand-spec (md-connection)
(:documentation "Expand specification for an FD source.")) (:documentation "Expand specification for an FD source."))
(defgeneric open-next-stream (md-connection &rest args &key)
(:documentation "Open the next input stream from FD."))
(defmethod expand-spec ((md md-connection)) (defmethod expand-spec ((md md-connection))
"Given fd spec as a CONS of a source type and a tagged object, expand the "Given fd spec as a CONS of a source type and a tagged object, expand the
tagged object depending on the source type and return a list of pathnames." tagged object depending on the source type and return a list of pathnames."
(destructuring-bind (type &rest part) (md-spec md) (destructuring-bind (type &rest part) (md-spec md)
(ecase type (ecase type
(:inline (list (caar part))) (:inline (list (md-spec md)))
(:stdin (list *standard-input*)) (:stdin (list *standard-input*))
(:regex (destructuring-bind (keep regex root) part (:regex (destructuring-bind (keep regex root) part
(filter-directory regex (filter-directory regex
@ -51,39 +52,31 @@
(if (probe-file realname) (list realname) (if (probe-file realname) (list realname)
(error "File does not exists: '~a'." realname))))))) (error "File does not exists: '~a'." realname)))))))
(defmethod open-next-stream ((md md-connection) (defmethod open-connection ((md md-connection)
&rest args &rest args
&key &allow-other-keys) &key &allow-other-keys)
"Switch to the following file in the PATH list." "We know how to open several kinds of specs here, all that target a single
;; first close current stream file as input. The multi-file specs must have been expanded before trying
(when (slot-boundp md 'strm) to open the connection."
(close (md-strm md)))
;; now open the new one
(when (fd-path md) (when (fd-path md)
(let ((current-path (pop (fd-path md)))) (log-message :notice "Opening ~s" (fd-path md)) ; info
(when current-path (cond ;; inline
(log-message :info "Open ~s" current-path) ((and (listp (fd-path md))
(prog1 (eq :inline (first (fd-path md))))
(setf (md-strm md) (destructuring-bind (filename . position)
(typecase current-path (second (fd-path md))
(stream current-path) ;; open the filename with given extra args
(t (apply #'open current-path args)))) (setf (md-strm md) (apply #'open filename args))
;; ;; and position the stream as expected
;; The :inline md spec is a little special, it's a filename and a (file-position (md-strm md) position)))
;; position where to skip to at opening the file. It allows for
;; easier self-contained tests.
;;
(when (eq :inline (car (md-spec md)))
(file-position (md-strm md) (cdadr (md-spec md)))))))))
(defmethod open-connection ((md md-connection) &key) ;; stdin
"The fd connection supports several specs to open a connection: ((streamp (fd-path md))
- if we have a path, that's a single file to open (setf (md-strm md) (fd-path md)))
- otherwise spec is an CONS wherin
- car is the source type ;; other cases should be filenames
- cdr is an object suitable for `get-absolute-pathname`" (t
(setf (fd-path md) (expand-spec md)) (setf (md-strm md) (apply #'open (fd-path md) args)))))
md) md)
(defmethod close-connection ((md md-connection)) (defmethod close-connection ((md md-connection))
@ -91,8 +84,7 @@
(when (and (slot-boundp md 'strm) (md-strm md)) (when (and (slot-boundp md 'strm) (md-strm md))
(close (md-strm md))) (close (md-strm md)))
(setf (md-strm md) nil (setf (md-strm md) nil))
(fd-path md) nil))
(defun get-pathname (dbname table-name &key (fd-path-root *fd-path-root*)) (defun get-pathname (dbname table-name &key (fd-path-root *fd-path-root*))
"Return a pathname where to read or write the file data" "Return a pathname where to read or write the file data"

View File

@ -18,16 +18,14 @@
Finally returns how many rows where read and processed." Finally returns how many rows where read and processed."
(with-connection (cnx (source copy)) (with-connection (cnx (source copy)
(loop :for input := (open-next-stream cnx
:direction :input :direction :input
:external-format (encoding copy) :external-format (encoding copy)
:if-does-not-exist nil) :if-does-not-exist nil)
:while input (let ((input (md-strm cnx)))
:do (progn
;; we handle skipping more than one line here, as cl-copy only knows ;; we handle skipping more than one line here, as cl-copy only knows
;; about skipping the first line ;; about skipping the first line
(loop repeat (skip-lines copy) do (read-line input nil nil)) (loop :repeat (skip-lines copy) :do (read-line input nil nil))
;; we might now have to read the fields from the header line ;; we might now have to read the fields from the header line
(when (header copy) (when (header copy)
@ -37,7 +35,7 @@
(log-message :debug "Parsed header columns ~s" (fields copy))) (log-message :debug "Parsed header columns ~s" (fields copy)))
;; read in the text file, split it into columns ;; read in the text file, split it into columns
(process-rows copy input process-row-fn))))) (process-rows copy input process-row-fn))))
(defmethod preprocess-row ((copy md-copy)) (defmethod preprocess-row ((copy md-copy))
"The file based readers possibly have extra work to do with user defined "The file based readers possibly have extra work to do with user defined
@ -54,6 +52,23 @@
(format nil "~s" (car col))) (format nil "~s" (car col)))
(columns copy))) (columns copy)))
(defmethod clone-copy-for ((copy md-copy) path-spec)
"Create a copy of CSV for loading data from PATH-SPEC."
(make-instance (class-of copy)
;; source-db is expected unbound here, so bypassed
:target-db (clone-connection (target-db copy))
:source (make-instance (class-of (source copy))
:spec (md-spec (source copy))
:type (conn-type (source copy))
:path path-spec)
:target (target copy)
:fields (fields copy)
:columns (columns copy)
:transforms (transforms copy)
:encoding (encoding copy)
:skip-lines (skip-lines copy)
:header (header copy)))
(defmethod copy-database ((copy md-copy) (defmethod copy-database ((copy md-copy)
&key &key
truncate truncate
@ -61,8 +76,8 @@
drop-indexes drop-indexes
;; generic API, but ignored here ;; generic API, but ignored here
(worker-count 8) (worker-count 4)
(concurrency 2) (concurrency 1)
data-only data-only
schema-only schema-only
@ -86,11 +101,42 @@
(target copy) (target copy)
:drop-indexes drop-indexes) :drop-indexes drop-indexes)
(copy-from copy ;; ensure we truncate only one
:worker-count worker-count (when truncate
(truncate-tables (clone-connection (target-db copy)) (target copy)))
;; expand the specs of our source, we might have to care about several
;; files actually.
(let* ((lp:*kernel* (make-kernel worker-count))
(channel (lp:make-channel))
(path-list (expand-spec (source copy))))
(loop :for path-spec :in path-list
:do (let ((table-source (clone-copy-for copy path-spec)))
(copy-from table-source
:concurrency concurrency :concurrency concurrency
:truncate truncate :kernel lp:*kernel*
:disable-triggers disable-triggers) :channel channel
:truncate nil
:disable-triggers disable-triggers)))
;; end kernel
(with-stats-collection ("COPY Threads Completion" :section :post
:use-result-as-read t
:use-result-as-rows t)
(let ((worker-count (* (length path-list)
(task-count concurrency))))
(loop :for tasks :below worker-count
:do (destructuring-bind (task table seconds)
(lp:receive-result channel)
(log-message :debug
"Finished processing ~a for ~s ~50T~6$s"
task (format-table-name table) seconds)
(when (eq :writer task)
(update-stats :data table :secs seconds))))
(prog1
worker-count
(lp:end-kernel :wait nil))))
(lp:end-kernel :wait t))
;; re-create the indexes from the target table entry ;; re-create the indexes from the target table entry
(create-indexes-again (target-db copy) (create-indexes-again (target-db copy)

View File

@ -180,5 +180,8 @@
;; now wait until both the tasks are over, and kill the kernel ;; now wait until both the tasks are over, and kill the kernel
(unless c-s-p (unless c-s-p
(log-message :debug "waiting for ~d tasks" (task-count concurrency))
(loop :repeat (task-count concurrency)
:do (lp:receive-result channel))
(log-message :info "COPY ~s done." table-name) (log-message :info "COPY ~s done." table-name)
(unless k-s-p (lp:end-kernel :wait t))))))) (unless k-s-p (lp:end-kernel :wait t)))))))

View File

@ -10,12 +10,7 @@
(setf (slot-value copy 'type) "copy")) (setf (slot-value copy 'type) "copy"))
(defclass copy-copy (md-copy) (defclass copy-copy (md-copy)
((encoding :accessor encoding ; file encoding ((delimiter :accessor delimiter ; see COPY options for TEXT
:initarg :encoding) ;
(skip-lines :accessor skip-lines ; we might want to skip COPY lines
:initarg :skip-lines ;
:initform 0) ;
(delimiter :accessor delimiter ; see COPY options for TEXT
:initarg :delimiter ; in PostgreSQL docs :initarg :delimiter ; in PostgreSQL docs
:initform #\Tab) :initform #\Tab)
(null-as :accessor null-as (null-as :accessor null-as
@ -35,6 +30,18 @@
(unless transforms (unless transforms
(setf (slot-value copy 'transforms) (make-list (length columns)))))) (setf (slot-value copy 'transforms) (make-list (length columns))))))
(defmethod clone-copy-for ((copy copy-copy) path-spec)
"Create a copy of FIXED for loading data from PATH-SPEC."
(let ((copy-for-path-spec
(change-class (call-next-method copy path-spec) 'copy-copy)))
(loop :for slot-name :in '(delimiter null-as)
:do (when (slot-boundp copy slot-name)
(setf (slot-value copy-for-path-spec slot-name)
(slot-value copy slot-name))))
;; return the new instance!
copy-for-path-spec))
(declaim (inline parse-row)) (declaim (inline parse-row))
(defun parse-row (line &key (delimiter #\Tab) (null-as "\\N")) (defun parse-row (line &key (delimiter #\Tab) (null-as "\\N"))

View File

@ -48,6 +48,24 @@
(unless transforms (unless transforms
(setf (slot-value csv 'transforms) (make-list (length columns)))))) (setf (slot-value csv 'transforms) (make-list (length columns))))))
(defmethod clone-copy-for ((csv copy-csv) path-spec)
"Create a copy of CSV for loading data from PATH-SPEC."
(let ((csv-for-path-spec
(change-class (call-next-method csv path-spec) 'copy-csv)))
(loop :for slot-name :in '(source-type
separator
newline
quote
escape
escape-mode
trim-blanks)
:do (when (slot-boundp csv slot-name)
(setf (slot-value csv-for-path-spec slot-name)
(slot-value csv slot-name))))
;; return the new instance!
csv-for-path-spec))
;;; ;;;
;;; Read a file format in CSV format, and call given function on each line. ;;; Read a file format in CSV format, and call given function on each line.
;;; ;;;

View File

@ -30,6 +30,10 @@
(unless transforms (unless transforms
(setf (slot-value fixed 'transforms) (make-list (length columns)))))) (setf (slot-value fixed 'transforms) (make-list (length columns))))))
(defmethod clone-copy-for ((fixed copy-fixed) path-spec)
"Create a copy of FIXED for loading data from PATH-SPEC."
(change-class (call-next-method fixed path-spec) 'copy-fixed))
(declaim (inline parse-row)) (declaim (inline parse-row))
(defun parse-row (fixed-cols-specs line) (defun parse-row (fixed-cols-specs line)

View File

@ -133,7 +133,8 @@
;;; ;;;
;;; Tools for every connection classes ;;; Tools for every connection classes
;;; ;;;
(defmacro with-connection ((var connection) &body forms) (defmacro with-connection ((var connection &rest args &key &allow-other-keys)
&body forms)
"Connect to DB-CONNECTION and handle any condition when doing so, and when "Connect to DB-CONNECTION and handle any condition when doing so, and when
connected execute FORMS in a protected way so that we always disconnect connected execute FORMS in a protected way so that we always disconnect
at the end." at the end."
@ -147,7 +148,7 @@
#'(lambda (w) #'(lambda (w)
(log-message :warning "~a" w) (log-message :warning "~a" w)
(muffle-warning)))) (muffle-warning))))
(open-connection ,conn)) (apply #'open-connection ,conn (list ,@args)))
(condition (e) (condition (e)
(cond ((typep ,connection 'fd-connection) (cond ((typep ,connection 'fd-connection)
(error 'fd-connection-error (error 'fd-connection-error

View File

@ -3,9 +3,12 @@ load csv
in directory 'data' in directory 'data'
having fields (id, field) having fields (id, field)
into postgres:///pgloader?matching into postgres:///pgloader?matching
with fields optionally enclosed by '"', with fields optionally enclosed by '"',
fields terminated by ',', fields terminated by ',',
truncate truncate,
workers = 8,
concurrency = 1
before load do before load do
$$ drop table if exists matching; $$, $$ drop table if exists matching; $$,