From 01e5c2376390749c2b7041b17b9a974ee8efb6b2 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Fri, 25 Aug 2017 01:57:54 +0200 Subject: [PATCH] Add support for explicit TARGET TABLE clause in load commands. It used to be that you would give the target table name as an option to the PostgreSQL connection string, which is untasteful: load ... into pgsql://user@host/dbname?tablename=foo.bar ... Or even, for backwards compatibility: load ... into pgsql://user@host/dbname?foo.bar ... The new syntax makes provision for a separate clause for the target table name, possibly schema-qualified: load ... into pgsql://user@host/dbname target table foo.bar ... Which is much better, in particular when used together with the target columns clause. Implementing this seemingly quite small feature had impact on many parsing related features of pgloader, such as the regression testing facility. So much so that some extra refactoring got into its way here, around the lisp-code-for-loading-from- functions and their usage in `load-data'. While at it, this patch simplifies a lot the `load-data' function by making a good use of &allow-other-keys and :allow-other-keys t. Finally, this patch splits main.lisp into main.lisp and api.lisp, with the latter intended to contain functions for Common Lisp programs wanting to use pgloader as a library. The API itself is still the same as before this patch, tho. Just in another file for clarity. --- pgloader.1 | 19 +- pgloader.1.md | 19 +- pgloader.asd | 7 + src/api.lisp | 403 ++++++++++++++++++++++++++++++++ src/main.lisp | 272 --------------------- src/parsers/command-copy.lisp | 21 +- src/parsers/command-csv.lisp | 31 ++- src/parsers/command-dbf.lisp | 27 ++- src/parsers/command-fixed.lisp | 22 +- src/parsers/command-ixf.lisp | 21 +- src/parsers/command-mssql.lisp | 3 +- src/parsers/command-mysql.lisp | 3 +- src/parsers/command-parser.lisp | 22 +- src/parsers/command-sqlite.lisp | 3 +- src/regress/regress.lisp | 8 +- test/copy.load | 3 +- test/csv-districts.load | 3 +- test/csv-error.load | 3 +- test/csv-escape-mode.load | 3 +- test/csv-filename-pattern.load | 4 +- test/csv-guess.load | 3 +- test/csv-json.load | 3 +- test/csv-missing-col.load | 3 +- test/csv-non-printable.load | 3 +- test/fixed.load | 3 +- test/ixf.load | 3 +- 26 files changed, 572 insertions(+), 343 deletions(-) create mode 100644 src/api.lisp diff --git a/pgloader.1 b/pgloader.1 index 5cf8d04..154bc0e 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -553,8 +553,11 @@ The pgloader commands follow the same global grammar rules\. Each of them might .nf LOAD - FROM [ HAVING FIELDS ] - INTO [ TARGET COLUMNS ] + FROM + [ HAVING FIELDS ] + INTO + [ TARGET TABLE [ "" ]\."" ] + [ TARGET COLUMNS ] [ WITH ] @@ -954,7 +957,8 @@ LOAD CSV ( startIpNum, endIpNum, locId ) - INTO postgresql://user@localhost:54393/dbname?geolite\.blocks + INTO postgresql://user@localhost:54393/dbname + TARGET TABLE geolite\.blocks TARGET COLUMNS ( iprange ip4r using (ip\-range startIpNum endIpNum), @@ -1235,7 +1239,8 @@ LOAD FIXED c from 18 for 8, d from 26 for 17 [null if blanks, trim right whitespace] ) - INTO postgresql:///pgloader?fixed + INTO postgresql:///pgloader + TARGET TABLE fixed ( a, b, c time using (time\-with\-no\-separator c), @@ -1473,7 +1478,8 @@ LOAD COPY trackid, track, album, media, genre, composer, milliseconds, bytes, unitprice ) - INTO postgresql:///pgloader?track_full + INTO postgresql:///pgloader + TARGET TABLE track_full WITH truncate @@ -1669,7 +1675,8 @@ This command instructs pgloader to load data from an IBM \fBIXF\fR file\. Here\' LOAD IXF FROM data/nsitra\.test1\.ixf - INTO postgresql:///pgloader?nsitra\.test1 + INTO postgresql:///pgloader + TARGET TABLE nsitra\.test1 WITH truncate, create table, timezone UTC BEFORE LOAD DO diff --git a/pgloader.1.md b/pgloader.1.md index 6bb73dd..d64c35a 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -494,8 +494,11 @@ might support only a subset of the general options and provide specific options. LOAD - FROM [ HAVING FIELDS ] - INTO [ TARGET COLUMNS ] + FROM + [ HAVING FIELDS ] + INTO + [ TARGET TABLE [ "" ]."
" ] + [ TARGET COLUMNS ] [ WITH ] @@ -817,7 +820,8 @@ example: ( startIpNum, endIpNum, locId ) - INTO postgresql://user@localhost:54393/dbname?geolite.blocks + INTO postgresql://user@localhost:54393/dbname + TARGET TABLE geolite.blocks TARGET COLUMNS ( iprange ip4r using (ip-range startIpNum endIpNum), @@ -1050,7 +1054,8 @@ columns arranged in a *fixed size* manner. Here's an example: c from 18 for 8, d from 26 for 17 [null if blanks, trim right whitespace] ) - INTO postgresql:///pgloader?fixed + INTO postgresql:///pgloader + TARGET TABLE fixed ( a, b, c time using (time-with-no-separator c), @@ -1224,7 +1229,8 @@ data as described in the PostgreSQL documentation. Here's an example: trackid, track, album, media, genre, composer, milliseconds, bytes, unitprice ) - INTO postgresql:///pgloader?track_full + INTO postgresql:///pgloader + TARGET TABLE track_full WITH truncate @@ -1384,7 +1390,8 @@ an example: LOAD IXF FROM data/nsitra.test1.ixf - INTO postgresql:///pgloader?nsitra.test1 + INTO postgresql:///pgloader + TARGET TABLE nsitra.test1 WITH truncate, create table, timezone UTC BEFORE LOAD DO diff --git a/pgloader.asd b/pgloader.asd index 9cfaf1a..12e7036 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -226,11 +226,18 @@ ;; the main entry file, used when building a stand-alone ;; executable image + (:file "api" :depends-on ("params" + "package" + "utils" + "parsers" + "sources")) + (:file "main" :depends-on ("params" "package" "utils" "parsers" "sources" + "api" "regress")))) ;; to produce the website diff --git a/src/api.lisp b/src/api.lisp new file mode 100644 index 0000000..2bfbe48 --- /dev/null +++ b/src/api.lisp @@ -0,0 +1,403 @@ +;;; +;;; The main API, or an attempt at providing pgloader as a lisp usable API +;;; rather than only an end-user program. +;;; + +(in-package #:pgloader) + +(define-condition source-definition-error (error) + ((mesg :initarg :mesg :reader source-definition-error-mesg)) + (:report (lambda (err stream) + (format stream "~a" (source-definition-error-mesg err))))) + +(define-condition cli-parsing-error (error) () + (:report (lambda (err stream) + (declare (ignore err)) + (format stream "Could not parse the command line: see above.")))) + +(define-condition load-files-not-found-error (error) + ((filename-list :initarg :filename-list)) + (:report (lambda (err stream) + (format stream + ;; start lines with 3 spaces because of trivial-backtrace + "~{No such file or directory: ~s~^~% ~}" + (slot-value err 'filename-list))))) + +;;; +;;; Main processing functions +;;; +(defun process-command-file (filename-list &key (flush-summary t)) + "Process each FILENAME in FILENAME-LIST as a pgloader command + file (.load)." + (loop :for filename :in filename-list + :for truename := (probe-file filename) + :unless truename :collect filename :into not-found-list + :do (if truename + (run-commands truename + :start-logger nil + :flush-summary flush-summary) + (log-message :error "Can not find file: ~s" filename)) + :finally (when not-found-list + (error 'load-files-not-found-error :filename-list not-found-list)))) + +(defun process-source-and-target (source-string target-string + &optional + type encoding set with field cast + before after) + "Given exactly 2 CLI arguments, process them as source and target URIs. +Parameters here are meant to be already parsed, see parse-cli-optargs." + (let* ((type (handler-case + (parse-cli-type type) + (condition (e) + (log-message :warning + "Could not parse --type ~s: ~a" + type e)))) + (source-uri (handler-case + (if type + (parse-source-string-for-type type source-string) + (parse-source-string source-string)) + (condition (e) + (log-message :warning + "Could not parse source string ~s: ~a" + source-string e)))) + (type (when (and source-string + (typep source-uri 'connection)) + (parse-cli-type (conn-type source-uri)))) + (target-uri (handler-case + (parse-target-string target-string) + (condition (e) + (log-message :error + "Could not parse target string ~s: ~a" + target-string e))))) + + ;; some verbosity about the parsing "magic" + (log-message :info " SOURCE: ~s" source-string) + (log-message :info "SOURCE URI: ~s" source-uri) + (log-message :info " TARGET: ~s" target-string) + (log-message :info "TARGET URI: ~s" target-uri) + + (cond ((and (null source-uri) (null target-uri)) + (process-command-file (list source-string target-string))) + + ((or (null source-string) (null source-uri)) + (log-message :fatal + "Failed to parse ~s as a source URI." source-string) + (log-message :log "You might need to use --type.")) + + ((or (null target-string) (null target-uri)) + (log-message :fatal + "Failed to parse ~s as a PostgreSQL database URI." + target-string))) + + (let* ((nb-errors 0) + (options (handler-case + (parse-cli-options type with) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse --with ~s:" with) + (log-message :error "~a" e)))) + (fields (handler-case + (parse-cli-fields type field) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse --fields ~s:" field) + (log-message :error "~a" e))))) + + (destructuring-bind (&key encoding gucs casts before after) + (loop :for (keyword option user-string parse-fn) + :in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding) + (:gucs "--set" ,set ,#'parse-cli-gucs) + (:casts "--cast" ,cast ,#'parse-cli-casts) + (:before "--before" ,before ,#'parse-sql-file) + (:after "--after" ,after ,#'parse-sql-file)) + :append (list keyword + (handler-case + (funcall parse-fn user-string) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse ~a ~s: ~a" + option user-string e))))) + + (unless (= 0 nb-errors) + (error 'cli-parsing-error)) + + ;; so, we actually have all the specs for the + ;; job on the command line now. + (when (and source-uri target-uri (= 0 nb-errors)) + (load-data :from source-uri + :into target-uri + :encoding encoding + :options options + :gucs gucs + :fields fields + :casts casts + :before before + :after after + :start-logger nil)))))) + +;;; +;;; Helper function to run a given command +;;; +(defun run-commands (source + &key + (start-logger t) + (flush-summary t) + ((:summary *summary-pathname*) *summary-pathname*) + ((:log-filename *log-filename*) *log-filename*) + ((:log-min-messages *log-min-messages*) *log-min-messages*) + ((:client-min-messages *client-min-messages*) *client-min-messages*)) + "SOURCE can be a function, which is run, a list, which is compiled as CL + code then run, a pathname containing one or more commands that are parsed + then run, or a commands string that is then parsed and each command run." + + (with-monitor (:start-logger start-logger) + (let* ((funcs + (typecase source + (function (list source)) + + (list (list (compile nil source))) + + (pathname (mapcar (lambda (expr) (compile nil expr)) + (parse-commands-from-file source))) + + (t (mapcar (lambda (expr) (compile nil expr)) + (if (probe-file source) + (parse-commands-from-file source) + (parse-commands source))))))) + + (loop :for func :in funcs + :do (funcall func) + :do (when flush-summary + (flush-summary :reset t)))))) + +;;; +;;; Helper functions to actually do things +;;; +(defun process-command-file (filename-list &key (flush-summary t)) + "Process each FILENAME in FILENAME-LIST as a pgloader command + file (.load)." + (loop :for filename :in filename-list + :for truename := (probe-file filename) + :unless truename :collect filename :into not-found-list + :do (if truename + (run-commands truename + :start-logger nil + :flush-summary flush-summary) + (log-message :error "Can not find file: ~s" filename)) + :finally (when not-found-list + (error 'load-files-not-found-error :filename-list not-found-list)))) + +(defun process-source-and-target (source-string target-string + &optional + type encoding set with field cast + before after) + "Given exactly 2 CLI arguments, process them as source and target URIs. +Parameters here are meant to be already parsed, see parse-cli-optargs." + (let* ((type (handler-case + (parse-cli-type type) + (condition (e) + (log-message :warning + "Could not parse --type ~s: ~a" + type e)))) + (source-uri (handler-case + (if type + (parse-source-string-for-type type source-string) + (parse-source-string source-string)) + (condition (e) + (log-message :warning + "Could not parse source string ~s: ~a" + source-string e)))) + (type (when (and source-string + (typep source-uri 'connection)) + (parse-cli-type (conn-type source-uri)))) + (target-uri (handler-case + (parse-target-string target-string) + (condition (e) + (log-message :error + "Could not parse target string ~s: ~a" + target-string e))))) + + ;; some verbosity about the parsing "magic" + (log-message :info " SOURCE: ~s" source-string) + (log-message :info "SOURCE URI: ~s" source-uri) + (log-message :info " TARGET: ~s" target-string) + (log-message :info "TARGET URI: ~s" target-uri) + + (cond ((and (null source-uri) (null target-uri)) + (process-command-file (list source-string target-string))) + + ((or (null source-string) (null source-uri)) + (log-message :fatal + "Failed to parse ~s as a source URI." source-string) + (log-message :log "You might need to use --type.")) + + ((or (null target-string) (null target-uri)) + (log-message :fatal + "Failed to parse ~s as a PostgreSQL database URI." + target-string))) + + (let* ((nb-errors 0) + (options (handler-case + (parse-cli-options type with) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse --with ~s:" with) + (log-message :error "~a" e)))) + (fields (handler-case + (parse-cli-fields type field) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse --fields ~s:" field) + (log-message :error "~a" e))))) + + (destructuring-bind (&key encoding gucs casts before after) + (loop :for (keyword option user-string parse-fn) + :in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding) + (:gucs "--set" ,set ,#'parse-cli-gucs) + (:casts "--cast" ,cast ,#'parse-cli-casts) + (:before "--before" ,before ,#'parse-sql-file) + (:after "--after" ,after ,#'parse-sql-file)) + :append (list keyword + (handler-case + (funcall parse-fn user-string) + (condition (e) + (incf nb-errors) + (log-message :error "Could not parse ~a ~s: ~a" + option user-string e))))) + + (unless (= 0 nb-errors) + (error 'cli-parsing-error)) + + ;; so, we actually have all the specs for the + ;; job on the command line now. + (when (and source-uri target-uri (= 0 nb-errors)) + (load-data :from source-uri + :into target-uri + :encoding encoding + :options options + :gucs gucs + :fields fields + :casts casts + :before before + :after after + :start-logger nil)))))) + + +;;; +;;; Helper function to run a given command +;;; +(defun run-commands (source + &key + (start-logger t) + (flush-summary t) + ((:summary *summary-pathname*) *summary-pathname*) + ((:log-filename *log-filename*) *log-filename*) + ((:log-min-messages *log-min-messages*) *log-min-messages*) + ((:client-min-messages *client-min-messages*) *client-min-messages*)) + "SOURCE can be a function, which is run, a list, which is compiled as CL + code then run, a pathname containing one or more commands that are parsed + then run, or a commands string that is then parsed and each command run." + + (with-monitor (:start-logger start-logger) + (let* ((funcs + (typecase source + (function (list source)) + + (list (list (compile nil source))) + + (pathname (mapcar (lambda (expr) (compile nil expr)) + (parse-commands-from-file source))) + + (t (mapcar (lambda (expr) (compile nil expr)) + (if (probe-file source) + (parse-commands-from-file source) + (parse-commands source))))))) + + (loop :for func :in funcs + :do (funcall func) + :do (when flush-summary + (flush-summary :reset t)))))) + + +;;; +;;; Main API to use from outside of pgloader. +;;; +(defun load-data (&key ((:from source)) ((:into target)) + encoding fields target-table options gucs casts before after + (start-logger t) (flush-summary t)) + "Load data from SOURCE into TARGET." + (declare (type connection source) + (type pgsql-connection target)) + + (when (and (typep source (or 'csv-connection + 'copy-connection + 'fixed-connection)) + (null target-table) + (null (pgconn-table-name target))) + (error 'source-definition-error + :mesg (format nil + "~a data source require a table name target." + (conn-type source)))) + + (with-monitor (:start-logger start-logger) + (when (and casts (not (member (type-of source) + '(sqlite-connection + mysql-connection + mssql-connection)))) + (log-message :log "Cast rules are ignored for this sources.")) + + ;; now generates the code for the command + (log-message :debug "LOAD DATA FROM ~s" source) + (let* ((target-table (or target-table + (let ((table (pgconn-table-name target))) + (etypecase (pgconn-table-name target) + (string (create-table table)) + (cons (create-table table)) + (table table) + (null nil))))) + (code (lisp-code-for-loading :from source + :into target + :encoding encoding + :fields fields + :target-table target-table + :options options + :gucs gucs + :casts casts + :before before + :after after))) + (run-commands (process-relative-pathnames (uiop:getcwd) code) + :start-logger nil + :flush-summary flush-summary)))) + +(defvar *get-code-for-source* + (list (cons 'copy-connection #'lisp-code-for-loading-from-copy) + (cons 'fixed-connection #'lisp-code-for-loading-from-fixed) + (cons 'csv-connectio #'lisp-code-for-loading-from-csv) + (cons 'dbf-connection #'lisp-code-for-loading-from-dbf) + (cons 'ixf-connection #'lisp-code-for-loading-from-ixf) + (cons 'sqlite-connection #'lisp-code-for-loading-from-sqlite) + (cons 'mysql-connection #'lisp-code-for-loading-from-mysql) + (cons 'mssql-connection #'lisp-code-for-loading-from-mssql)) + "Each source type might require a different set of options.") + +(defun lisp-code-for-loading (&key + ((:from source)) ((:into target)) + encoding fields target-table + options gucs casts before after) + (let ((func (cdr (assoc (type-of source) *get-code-for-source*)))) + ;; not all functions support the same set of &key parameters, + ;; they all have &allow-other-keys in their signature tho. + (assert (not (null func))) + (if func + (funcall func + source + target + :target-table target-table + :fields fields + :encoding (or encoding :default) + :gucs gucs + :casts casts + :options options + :before before + :after after + :allow-other-keys t)))) diff --git a/src/main.lisp b/src/main.lisp index 24a7eaf..3cc90c2 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -372,275 +372,3 @@ ;; done. (uiop:quit +os-code-success+))))) - - -;;; -;;; Helper functions to actually do things -;;; -(define-condition load-files-not-found-error (error) - ((filename-list :initarg :filename-list)) - (:report (lambda (err stream) - (format stream - ;; start lines with 3 spaces because of trivial-backtrace - "~{No such file or directory: ~s~^~% ~}" - (slot-value err 'filename-list))))) - -(defun process-command-file (filename-list &key (flush-summary t)) - "Process each FILENAME in FILENAME-LIST as a pgloader command - file (.load)." - (loop :for filename :in filename-list - :for truename := (probe-file filename) - :unless truename :collect filename :into not-found-list - :do (if truename - (run-commands truename - :start-logger nil - :flush-summary flush-summary) - (log-message :error "Can not find file: ~s" filename)) - :finally (when not-found-list - (error 'load-files-not-found-error :filename-list not-found-list)))) - -(define-condition cli-parsing-error (error) () - (:report (lambda (err stream) - (declare (ignore err)) - (format stream "Could not parse the command line: see above.")))) - -(defun process-source-and-target (source-string target-string - &optional - type encoding set with field cast - before after) - "Given exactly 2 CLI arguments, process them as source and target URIs. -Parameters here are meant to be already parsed, see parse-cli-optargs." - (let* ((type (handler-case - (parse-cli-type type) - (condition (e) - (log-message :warning - "Could not parse --type ~s: ~a" - type e)))) - (source-uri (handler-case - (if type - (parse-source-string-for-type type source-string) - (parse-source-string source-string)) - (condition (e) - (log-message :warning - "Could not parse source string ~s: ~a" - source-string e)))) - (type (when (and source-string - (typep source-uri 'connection)) - (parse-cli-type (conn-type source-uri)))) - (target-uri (handler-case - (parse-target-string target-string) - (condition (e) - (log-message :error - "Could not parse target string ~s: ~a" - target-string e))))) - - ;; some verbosity about the parsing "magic" - (log-message :info " SOURCE: ~s" source-string) - (log-message :info "SOURCE URI: ~s" source-uri) - (log-message :info " TARGET: ~s" target-string) - (log-message :info "TARGET URI: ~s" target-uri) - - (cond ((and (null source-uri) (null target-uri)) - (process-command-file (list source-string target-string))) - - ((or (null source-string) (null source-uri)) - (log-message :fatal - "Failed to parse ~s as a source URI." source-string) - (log-message :log "You might need to use --type.")) - - ((or (null target-string) (null target-uri)) - (log-message :fatal - "Failed to parse ~s as a PostgreSQL database URI." - target-string))) - - (let* ((nb-errors 0) - (options (handler-case - (parse-cli-options type with) - (condition (e) - (incf nb-errors) - (log-message :error "Could not parse --with ~s:" with) - (log-message :error "~a" e)))) - (fields (handler-case - (parse-cli-fields type field) - (condition (e) - (incf nb-errors) - (log-message :error "Could not parse --fields ~s:" field) - (log-message :error "~a" e))))) - - (destructuring-bind (&key encoding gucs casts before after) - (loop :for (keyword option user-string parse-fn) - :in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding) - (:gucs "--set" ,set ,#'parse-cli-gucs) - (:casts "--cast" ,cast ,#'parse-cli-casts) - (:before "--before" ,before ,#'parse-sql-file) - (:after "--after" ,after ,#'parse-sql-file)) - :append (list keyword - (handler-case - (funcall parse-fn user-string) - (condition (e) - (incf nb-errors) - (log-message :error "Could not parse ~a ~s: ~a" - option user-string e))))) - - (unless (= 0 nb-errors) - (error 'cli-parsing-error)) - - ;; so, we actually have all the specs for the - ;; job on the command line now. - (when (and source-uri target-uri (= 0 nb-errors)) - (load-data :from source-uri - :into target-uri - :encoding encoding - :options options - :gucs gucs - :fields fields - :casts casts - :before before - :after after - :start-logger nil)))))) - - -;;; -;;; Helper function to run a given command -;;; -(defun run-commands (source - &key - (start-logger t) - (flush-summary t) - ((:summary *summary-pathname*) *summary-pathname*) - ((:log-filename *log-filename*) *log-filename*) - ((:log-min-messages *log-min-messages*) *log-min-messages*) - ((:client-min-messages *client-min-messages*) *client-min-messages*)) - "SOURCE can be a function, which is run, a list, which is compiled as CL - code then run, a pathname containing one or more commands that are parsed - then run, or a commands string that is then parsed and each command run." - - (with-monitor (:start-logger start-logger) - (let* ((funcs - (typecase source - (function (list source)) - - (list (list (compile nil source))) - - (pathname (mapcar (lambda (expr) (compile nil expr)) - (parse-commands-from-file source))) - - (t (mapcar (lambda (expr) (compile nil expr)) - (if (probe-file source) - (parse-commands-from-file source) - (parse-commands source))))))) - - (loop :for func :in funcs - :do (funcall func) - :do (when flush-summary - (flush-summary :reset t)))))) - - -;;; -;;; Main API to use from outside of pgloader. -;;; -(define-condition source-definition-error (error) - ((mesg :initarg :mesg :reader source-definition-error-mesg)) - (:report (lambda (err stream) - (format stream "~a" (source-definition-error-mesg err))))) - -(defun load-data (&key ((:from source)) ((:into target)) - encoding fields options gucs casts before after - (start-logger t) (flush-summary t)) - "Load data from SOURCE into TARGET." - (declare (type connection source) - (type pgsql-connection target)) - - (when (and (typep source 'csv-connection) - (null (pgconn-table-name target))) - (error 'source-definition-error - :mesg "CSV data source require a table name target.")) - - (when (and (typep source 'fixed-connection) - (null (pgconn-table-name target))) - (error 'source-definition-error - :mesg "Fixed-width data source require a table name target.")) - - (when (and (typep source 'fixed-connection) - (null fields)) - (error 'source-definition-error - :mesg "Fixed-width data source require fields specs.")) - - (with-monitor (:start-logger start-logger) - (when (and casts (not (member (type-of source) - '(sqlite-connection - mysql-connection - mssql-connection)))) - (log-message :log "Cast rules are ignored for this sources.")) - - ;; now generates the code for the command - (log-message :debug "LOAD DATA FROM ~s" source) - (let ((code - (etypecase source - (copy-connection - (lisp-code-for-loading-from-copy source target - :fields fields - :encoding (or encoding :default) - :gucs gucs - :options options - :before before - :after after)) - - (fixed-connection - (lisp-code-for-loading-from-fixed source target - :fields fields - :encoding encoding - :gucs gucs - :options options - :before before - :after after)) - - (csv-connection - (lisp-code-for-loading-from-csv source target - :fields fields - :encoding encoding - :gucs gucs - :options options - :before before - :after after)) - - (dbf-connection - (lisp-code-for-loading-from-dbf source target - :gucs gucs - :options options - :before before - :after after)) - - (ixf-connection - (lisp-code-for-loading-from-ixf source target - :gucs gucs - :options options - :before before - :after after)) - - (sqlite-connection - (lisp-code-for-loading-from-sqlite source target - :gucs gucs - :casts casts - :options options - :before before - :after after)) - - (mysql-connection - (lisp-code-for-loading-from-mysql source target - :gucs gucs - :casts casts - :options options - :before before - :after after)) - - (mssql-connection - (lisp-code-for-loading-from-mssql source target - :gucs gucs - :casts casts - :options options - :before before - :after after))))) - (run-commands (process-relative-pathnames (uiop:getcwd) code) - :start-logger nil - :flush-summary flush-summary)))) diff --git a/src/parsers/command-copy.lisp b/src/parsers/command-copy.lisp index c773c63..5f2fef7 100644 --- a/src/parsers/command-copy.lisp +++ b/src/parsers/command-copy.lisp @@ -87,16 +87,25 @@ (defrule load-copy-file-command (and copy-source (? file-encoding) (? copy-source-field-list) target + (? csv-target-table) (? csv-target-column-list) load-copy-file-optional-clauses) (:lambda (command) - (destructuring-bind (source encoding fields target columns clauses) command - `(,source ,encoding ,fields ,target ,columns ,@clauses)))) + (destructuring-bind (source encoding fields pguri table-name columns clauses) + command + (list* source + encoding + fields + pguri + (create-table (or table-name (pgconn-table-name pguri))) + columns + clauses)))) (defun lisp-code-for-loading-from-copy (copy-conn pg-db-conn &key (encoding :utf-8) fields + target-table columns gucs before after options &aux @@ -107,7 +116,7 @@ ,@(batch-control-bindings options) ,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :section :pre) - (expand (fetch-file ,copy-conn))))) + (expand (fetch-file ,copy-conn))))) (progn ,(sql-code-block pg-db-conn :pre before "before load") @@ -121,8 +130,7 @@ (make-instance 'pgloader.copy:copy-copy :target-db ,pg-db-conn :source source-db - :target (create-table - ',(pgconn-table-name pg-db-conn)) + :target ,target-table :encoding ,encoding :fields ',fields :columns ',columns @@ -149,7 +157,7 @@ (defrule load-copy-file load-copy-file-command (:lambda (command) - (bind (((source encoding fields pg-db-uri columns + (bind (((source encoding fields pg-db-uri table columns &key options gucs before after) command)) (cond (*dry-run* (lisp-code-for-csv-dry-run pg-db-uri)) @@ -157,6 +165,7 @@ (lisp-code-for-loading-from-copy source pg-db-uri :encoding encoding :fields fields + :target-table table :columns columns :gucs gucs :before before diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index e88fd28..535cac8 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -268,6 +268,12 @@ open-paren csv-target-columns close-paren) (:lambda (source) (bind (((_ _ columns _) source)) columns))) + +(defrule csv-target-table (and kw-target kw-table dsn-table-name) + (:lambda (c-t-t) + ;; dsn-table-name: (:table-name "schema" . "table") + (cdr (third c-t-t)))) + ;; ;; The main command parsing ;; @@ -373,11 +379,20 @@ (defrule load-csv-file-command (and csv-source (? file-encoding) (? csv-source-field-list) - target (? csv-target-column-list) + target + (? csv-target-table) + (? csv-target-column-list) load-csv-file-optional-clauses) (:lambda (command) - (destructuring-bind (source encoding fields target columns clauses) command - `(,source ,encoding ,fields ,target ,columns ,@clauses)))) + (destructuring-bind (source encoding fields pguri table-name columns clauses) + command + (list* source + encoding + fields + pguri + (create-table (or table-name (pgconn-table-name pguri))) + columns + clauses)))) (defun lisp-code-for-csv-dry-run (pg-db-conn) `(lambda () @@ -391,8 +406,10 @@ &key (encoding :utf-8) fields + target-table columns gucs before after options + &allow-other-keys &aux (worker-count (getf options :worker-count)) (concurrency (getf options :concurrency))) @@ -401,7 +418,7 @@ ,@(batch-control-bindings options) ,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :section :pre) - (expand (fetch-file ,csv-conn))))) + (expand (fetch-file ,csv-conn))))) (progn ,(sql-code-block pg-db-conn :pre before "before load") @@ -415,8 +432,7 @@ (make-instance 'pgloader.csv:copy-csv :target-db ,pg-db-conn :source source-db - :target (create-table - ',(pgconn-table-name pg-db-conn)) + :target ,target-table :encoding ,encoding :fields ',fields :columns ',columns @@ -443,7 +459,7 @@ (defrule load-csv-file load-csv-file-command (:lambda (command) - (bind (((source encoding fields pg-db-uri columns + (bind (((source encoding fields pg-db-uri table columns &key options gucs before after) command)) (cond (*dry-run* (lisp-code-for-csv-dry-run pg-db-uri)) @@ -451,6 +467,7 @@ (lisp-code-for-loading-from-csv source pg-db-uri :encoding encoding :fields fields + :target-table table :columns columns :gucs gucs :before before diff --git a/src/parsers/command-dbf.lisp b/src/parsers/command-dbf.lisp index 93a8880..b3f1d0d 100644 --- a/src/parsers/command-dbf.lisp +++ b/src/parsers/command-dbf.lisp @@ -68,11 +68,19 @@ (bind (((_ _ encoding) enc)) encoding) :ascii))) -(defrule load-dbf-command (and dbf-source (? dbf-file-encoding) - target load-dbf-optional-clauses) +(defrule load-dbf-command (and dbf-source + (? dbf-file-encoding) + target + (? csv-target-table) + load-dbf-optional-clauses) (:lambda (command) - (destructuring-bind (source encoding target clauses) command - `(,source ,encoding ,target ,@clauses)))) + (destructuring-bind (source encoding pguri table-name clauses) + command + (list* source + encoding + pguri + (create-table (or table-name (pgconn-table-name pguri))) + clauses)))) (defun lisp-code-for-dbf-dry-run (dbf-db-conn pg-db-conn) `(lambda () @@ -82,14 +90,14 @@ (defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn &key + target-table (encoding :ascii) - gucs before after options) + gucs before after options + &allow-other-keys) `(lambda () (let* (,@(pgsql-connection-bindings pg-db-conn gucs) ,@(batch-control-bindings options) ,@(identifier-case-binding options) - (table (create-table - ',(pgconn-table-name pg-db-conn))) (source-db (with-stats-collection ("fetch" :section :pre) (expand (fetch-file ,dbf-db-conn)))) (source @@ -97,7 +105,7 @@ :target-db ,pg-db-conn :encoding ,encoding :source-db source-db - :target table))) + :target ,target-table))) ,(sql-code-block pg-db-conn :pre before "before load") @@ -111,12 +119,13 @@ (defrule load-dbf-file load-dbf-command (:lambda (command) - (bind (((source encoding pg-db-uri + (bind (((source encoding pg-db-uri table &key options gucs before after) command)) (cond (*dry-run* (lisp-code-for-dbf-dry-run source pg-db-uri)) (t (lisp-code-for-loading-from-dbf source pg-db-uri + :target-table table :encoding encoding :gucs gucs :before before diff --git a/src/parsers/command-fixed.lisp b/src/parsers/command-fixed.lisp index dfcf246..8739607 100644 --- a/src/parsers/command-fixed.lisp +++ b/src/parsers/command-fixed.lisp @@ -95,18 +95,28 @@ (defrule load-fixed-cols-file-command (and fixed-source (? file-encoding) fixed-source-field-list target + (? csv-target-table) (? csv-target-column-list) load-fixed-cols-file-optional-clauses) (:lambda (command) - (destructuring-bind (source encoding fields target columns clauses) command - `(,source ,encoding ,fields ,target ,columns ,@clauses)))) + (destructuring-bind (source encoding fields pguri table-name columns clauses) + command + (list* source + encoding + fields + pguri + (create-table (or table-name (pgconn-table-name pguri))) + columns + clauses)))) (defun lisp-code-for-loading-from-fixed (fixed-conn pg-db-conn &key (encoding :utf-8) fields + target-table columns gucs before after options + &allow-other-keys &aux (worker-count (getf options :worker-count)) (concurrency (getf options :concurrency))) @@ -115,7 +125,7 @@ ,@(batch-control-bindings options) ,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :section :pre) - (expand (fetch-file ,fixed-conn))))) + (expand (fetch-file ,fixed-conn))))) (progn ,(sql-code-block pg-db-conn :pre before "before load") @@ -129,8 +139,7 @@ (make-instance 'pgloader.fixed:copy-fixed :target-db ,pg-db-conn :source source-db - :target (create-table - ',(pgconn-table-name pg-db-conn)) + :target ,target-table :encoding ,encoding :fields ',fields :columns ',columns @@ -151,7 +160,7 @@ (defrule load-fixed-cols-file load-fixed-cols-file-command (:lambda (command) - (bind (((source encoding fields pg-db-uri columns + (bind (((source encoding fields pg-db-uri table columns &key options gucs before after) command)) (cond (*dry-run* (lisp-code-for-csv-dry-run pg-db-uri)) @@ -159,6 +168,7 @@ (lisp-code-for-loading-from-fixed source pg-db-uri :encoding encoding :fields fields + :target-table table :columns columns :gucs gucs :before before diff --git a/src/parsers/command-ixf.lisp b/src/parsers/command-ixf.lisp index 2c12eab..9c608f5 100644 --- a/src/parsers/command-ixf.lisp +++ b/src/parsers/command-ixf.lisp @@ -62,27 +62,33 @@ (:lambda (clauses-list) (alexandria:alist-plist clauses-list))) -(defrule load-ixf-command (and ixf-source target load-ixf-optional-clauses) +(defrule load-ixf-command (and ixf-source + target + (? csv-target-table) + load-ixf-optional-clauses) (:lambda (command) - (destructuring-bind (source target clauses) command - `(,source ,target ,@clauses)))) + (destructuring-bind (source pguri table-name clauses) command + (list* source + pguri + (create-table (or table-name (pgconn-table-name pguri))) + clauses)))) (defun lisp-code-for-loading-from-ixf (ixf-db-conn pg-db-conn &key - gucs before after options) + target-table gucs before after options + &allow-other-keys) `(lambda () (let* (,@(pgsql-connection-bindings pg-db-conn gucs) ,@(batch-control-bindings options) ,@(identifier-case-binding options) (timezone (getf ',options :timezone)) - (table-name (create-table ',(pgconn-table-name pg-db-conn))) (source-db (with-stats-collection ("fetch" :section :pre) (expand (fetch-file ,ixf-db-conn)))) (source (make-instance 'pgloader.ixf:copy-ixf :target-db ,pg-db-conn :source-db source-db - :target table-name + :target ,target-table :timezone timezone))) ,(sql-code-block pg-db-conn :pre before "before load") @@ -98,12 +104,13 @@ (defrule load-ixf-file load-ixf-command (:lambda (command) - (bind (((source pg-db-uri + (bind (((source pg-db-uri table &key options gucs before after) command)) (cond (*dry-run* (lisp-code-for-csv-dry-run pg-db-uri)) (t (lisp-code-for-loading-from-ixf source pg-db-uri + :target-table table :gucs gucs :before before :after after diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp index fef7bb0..fa153b0 100644 --- a/src/parsers/command-mssql.lisp +++ b/src/parsers/command-mssql.lisp @@ -140,7 +140,8 @@ gucs mssql-gucs casts before after options alter-schema alter-table - including excluding) + including excluding + &allow-other-keys) `(lambda () ;; now is the time to load the CFFI lib we need (freetds) (let (#+sbcl(sb-ext:*muffled-warnings* 'style-warning)) diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp index aaa1691..240be3a 100644 --- a/src/parsers/command-mysql.lisp +++ b/src/parsers/command-mysql.lisp @@ -147,7 +147,8 @@ alter-table alter-schema ((:including incl)) ((:excluding excl)) - ((:decoding decoding-as))) + ((:decoding decoding-as)) + &allow-other-keys) `(lambda () (let* ((*default-cast-rules* ',*mysql-default-cast-rules*) (*cast-rules* ',casts) diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp index ac65b3c..86ca99a 100644 --- a/src/parsers/command-parser.lisp +++ b/src/parsers/command-parser.lisp @@ -287,34 +287,42 @@ ;;; (defrule pg-db-uri-from-command (or pg-db-uri-from-files pg-db-uri-from-source-target + pg-db-uri-from-source-table-target pg-db-uri-from-source-and-encoding)) (defrule pg-db-uri-from-files (or load-csv-file-command load-copy-file-command load-fixed-cols-file-command) (:lambda (command) - (destructuring-bind (source encoding fields pg-db-uri columns + (destructuring-bind (source encoding fields pg-db-uri table columns &key gucs &allow-other-keys) command (declare (ignore source encoding fields columns)) - (list pg-db-uri gucs)))) + (list pg-db-uri table gucs)))) -(defrule pg-db-uri-from-source-target (or load-ixf-command - load-sqlite-command +(defrule pg-db-uri-from-source-target (or load-sqlite-command load-mysql-command load-mssql-command) (:lambda (command) (destructuring-bind (source pg-db-uri &key gucs &allow-other-keys) command (declare (ignore source)) - (list pg-db-uri gucs)))) + (list pg-db-uri nil gucs)))) + +(defrule pg-db-uri-from-source-table-target (or load-ixf-command) + (:lambda (command) + (destructuring-bind (source pg-db-uri table &key gucs &allow-other-keys) + command + (declare (ignore source)) + (list pg-db-uri table gucs)))) (defrule pg-db-uri-from-source-and-encoding (or load-dbf-command) (:lambda (command) - (destructuring-bind (source encoding pg-db-uri &key gucs &allow-other-keys) + (destructuring-bind (source encoding pg-db-uri table + &key gucs &allow-other-keys) command (declare (ignore source encoding)) - (list pg-db-uri gucs)))) + (list pg-db-uri table gucs)))) (defun parse-target-pg-db-uri (command-file) "Partially parse COMMAND-FILE and return its target connection string." diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index 0ee78a3..f484e57 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -93,7 +93,8 @@ load database gucs casts before after options alter-table alter-schema ((:including incl)) - ((:excluding excl))) + ((:excluding excl)) + &allow-other-keys) `(lambda () (let* ((*default-cast-rules* ',*sqlite-default-cast-rules*) (*cast-rules* ',casts) diff --git a/src/regress/regress.lisp b/src/regress/regress.lisp index c5e4c35..06dc696 100644 --- a/src/regress/regress.lisp +++ b/src/regress/regress.lisp @@ -27,10 +27,9 @@ (expected-data-file (make-pathname :defaults load-file :type "out" :directory expected-subdir)) - ((target-conn gucs) (parse-target-pg-db-uri load-file)) + ((target-conn target-table gucs) (parse-target-pg-db-uri load-file)) (*pg-settings* (pgloader.pgsql:sanitize-user-gucs gucs)) (*pgsql-reserved-keywords* (list-reserved-keywords target-conn)) - (target-table (create-table (pgconn-table-name target-conn))) (expected-data-source (parse-source-string-for-type @@ -47,7 +46,9 @@ ;; src/parsers/command-db-uri.lisp ;; (cons "expected" (table-name target-table))) - e-d-t))) + e-d-t)) + (expected-target-table + (create-table (cons "expected" (table-name target-table))))) (log-message :log "Comparing loaded data against ~s" expected-data-file) @@ -67,6 +68,7 @@ ;; load expected data (load-data :from expected-data-source :into expected-data-target + :target-table expected-target-table :options '(:truncate t) :start-logger nil :flush-summary t) diff --git a/test/copy.load b/test/copy.load index 1c81cb7..aea6f21 100644 --- a/test/copy.load +++ b/test/copy.load @@ -4,7 +4,8 @@ LOAD COPY trackid, track, album, media, genre, composer, milliseconds, bytes, unitprice ) - INTO postgresql:///pgloader?track_full + INTO postgresql:///pgloader + TARGET TABLE track_full WITH truncate, drop indexes diff --git a/test/csv-districts.load b/test/csv-districts.load index 2bde239..8eab0fa 100644 --- a/test/csv-districts.load +++ b/test/csv-districts.load @@ -21,7 +21,8 @@ LOAD CSV intptlong -- Longitude (decimal degrees) ) - INTO postgresql:///pgloader?districts + INTO postgresql:///pgloader + TARGET TABLE districts TARGET COLUMNS ( usps, geoid, aland, awater, aland_sqmi, awater_sqmi, diff --git a/test/csv-error.load b/test/csv-error.load index 5b3144f..83bab1c 100644 --- a/test/csv-error.load +++ b/test/csv-error.load @@ -1,6 +1,7 @@ LOAD CSV FROM INLINE with encoding 'ascii' - INTO postgresql:///pgloader?jordane + INTO postgresql:///pgloader + TARGET TABLE jordane WITH truncate, fields terminated by '|', diff --git a/test/csv-escape-mode.load b/test/csv-escape-mode.load index adfc34e..759467a 100644 --- a/test/csv-escape-mode.load +++ b/test/csv-escape-mode.load @@ -20,7 +20,8 @@ LOAD CSV document_height, document_width, localstorage_size, sessionstorage_size, num_iframes, num_scripts, doctype, meta_viewport) - INTO postgresql:///pgloader?csv_escape_mode + INTO postgresql:///pgloader + TARGET TABLE csv_escape_mode ( id bigint using (identity pageid), doctype diff --git a/test/csv-filename-pattern.load b/test/csv-filename-pattern.load index 1a15640..c83319d 100644 --- a/test/csv-filename-pattern.load +++ b/test/csv-filename-pattern.load @@ -2,7 +2,9 @@ load csv from all filenames matching ~ in directory 'data' having fields (id, field) - into postgres:///pgloader?matching + + into postgres:///pgloader + target table matching with fields optionally enclosed by '"', fields terminated by ',', diff --git a/test/csv-guess.load b/test/csv-guess.load index f7c7e9c..98462f7 100644 --- a/test/csv-guess.load +++ b/test/csv-guess.load @@ -1,6 +1,7 @@ LOAD CSV FROM data/track.csv - INTO postgresql:///pgloader?csv.track + INTO postgresql:///pgloader + TARGET TABLE csv.track WITH truncate diff --git a/test/csv-json.load b/test/csv-json.load index 9680418..6298f1f 100644 --- a/test/csv-json.load +++ b/test/csv-json.load @@ -1,6 +1,7 @@ LOAD CSV FROM INLINE - INTO postgresql:///pgloader?json + INTO postgresql:///pgloader + TARGET TABLE json WITH truncate, fields not enclosed, diff --git a/test/csv-missing-col.load b/test/csv-missing-col.load index 9390cc2..b2db7dc 100644 --- a/test/csv-missing-col.load +++ b/test/csv-missing-col.load @@ -1,6 +1,7 @@ LOAD CSV FROM inline (a, b, c, d, e, f, g) - INTO postgresql:///pgloader?missingcol (a, b, c, d, e, f, g) + INTO postgresql:///pgloader + TARGET TABLE missingcol (a, b, c, d, e, f, g) WITH truncate, fields optionally enclosed by '"', diff --git a/test/csv-non-printable.load b/test/csv-non-printable.load index b87f921..d1ea375 100644 --- a/test/csv-non-printable.load +++ b/test/csv-non-printable.load @@ -5,7 +5,8 @@ LOAD CSV FROM inline with encoding 'LATIN1' HAVING FIELDS ("Some-Field", c2, c3) - INTO postgresql:///pgloader?tab_csv + INTO postgresql:///pgloader + target table tab_csv ( c1 text using "Some-Field", c2, c3 ) diff --git a/test/fixed.load b/test/fixed.load index 9ee13b2..77fd7a3 100644 --- a/test/fixed.load +++ b/test/fixed.load @@ -19,7 +19,8 @@ LOAD FIXED c from 18 for 8, d from 26 for 17 [null if blanks, trim right whitespace] ) - INTO postgresql:///pgloader?fixed + INTO postgresql:///pgloader + TARGET TABLE fixed ( a, b, c time using (time-with-no-separator c), diff --git a/test/ixf.load b/test/ixf.load index b1e12b1..ad6365a 100644 --- a/test/ixf.load +++ b/test/ixf.load @@ -1,6 +1,7 @@ LOAD IXF FROM data/nsitra.test1.ixf - INTO postgresql:///pgloader?nsitra.test1 + INTO postgresql:///pgloader + TARGET TABLE nsitra.test1 WITH on error stop, truncate, create table, timezone UTC