diff --git a/pgloader.1 b/pgloader.1 index d8e4ce5..96391ec 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1,7 +1,7 @@ .\" generated with Ronn/v0.7.3 .\" http://github.com/rtomayko/ronn/tree/0.7.3 . -.TH "PGLOADER" "1" "July 2017" "ff" "" +.TH "PGLOADER" "1" "August 2017" "ff" "" . .SH "NAME" \fBpgloader\fR \- PostgreSQL data loader @@ -571,6 +571,82 @@ LOAD .P The main clauses are the \fBLOAD\fR, \fBFROM\fR, \fBINTO\fR and \fBWITH\fR clauses that each command implements\. Some command then implement the \fBSET\fR command, or some specific clauses such as the \fBCAST\fR clause\. . +.SH "TEMPLATING WITH MUSTACHE" +pgloader implements the https://mustache\.github\.io/ templating system so that you may have dynamic parts of your commands\. See the documentation for this template system online\. +. +.P +A specific feature of pgloader is the ability to fetch a variable from the OS environment of the pgloader process, making it possible to run pgloader as in the following example: +. +.IP "" 4 +. +.nf + +$ DBPATH=sqlite/sqlite\.db pgloader \./test/sqlite\-env\.load +. +.fi +. +.IP "" 0 +. +.P +or in several steps: +. +.IP "" 4 +. +.nf + +$ export DBPATH=sqlite/sqlite\.db +$ pgloader \./test/sqlite\-env\.load +. +.fi +. +.IP "" 0 +. +.P +The variable can then be used in a typical mustache fashion: +. +.IP "" 4 +. +.nf + +load database + from \'{{DBPATH}}\' + into postgresql:///pgloader; +. +.fi +. +.IP "" 0 +. +.P +It\'s also possible to prepare a INI file such as the following: +. +.IP "" 4 +. +.nf + +[pgloader] + +DBPATH = sqlite/sqlite\.db +. +.fi +. +.IP "" 0 +. +.P +And run the following command, feeding the INI values as a \fIcontext\fR for pgloader templating system: +. +.IP "" 4 +. +.nf + +$ pgloader \-\-context \./test/sqlite\.ini \./test/sqlite\-ini\.load +. +.fi +. +.IP "" 0 +. +.P +The mustache templates implementation with OS environment support replaces former \fBGETENV\fR implementation, which didn\'t work anyway\. +. .SH "COMMON CLAUSES" Some clauses are common to all commands: . @@ -580,9 +656,6 @@ Some clauses are common to all commands: .IP The \fIFROM\fR clause specifies where to read the data from, and each command introduces its own variant of sources\. For instance, the \fICSV\fR source supports \fBinline\fR, \fBstdin\fR, a filename, a quoted filename, and a \fIFILENAME MATCHING\fR clause (see above); whereas the \fIMySQL\fR source only supports a MySQL database URI specification\. . -.IP -In all cases, the \fIFROM\fR clause is able to read its value from an environment variable when using the form \fBGETENV \'varname\'\fR\. -. .IP "\(bu" 4 \fIINTO\fR . @@ -590,9 +663,6 @@ In all cases, the \fIFROM\fR clause is able to read its value from an environmen The PostgreSQL connection URI must contains the name of the target table where to load the data into\. That table must have already been created in PostgreSQL, and the name might be schema qualified\. . .IP -The \fIINTO\fR target database connection URI can be parsed from the value of an environment variable when using the form \fBGETENV \'varname\'\fR\. -. -.IP Then \fIINTO\fR option also supports an optional comma separated list of target columns, which are either the name of an input \fIfield\fR or the white space separated list of the target column name, its PostgreSQL data type and a \fIUSING\fR expression\. . .IP diff --git a/pgloader.1.md b/pgloader.1.md index 2bcd823..715bedb 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -509,6 +509,43 @@ The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each command implements. Some command then implement the `SET` command, or some specific clauses such as the `CAST` clause. +## TEMPLATING WITH MUSTACHE + +pgloader implements the https://mustache.github.io/ templating system so +that you may have dynamic parts of your commands. See the documentation for +this template system online. + +A specific feature of pgloader is the ability to fetch a variable from the +OS environment of the pgloader process, making it possible to run pgloader +as in the following example: + + $ DBPATH=sqlite/sqlite.db pgloader ./test/sqlite-env.load + +or in several steps: + + $ export DBPATH=sqlite/sqlite.db + $ pgloader ./test/sqlite-env.load + +The variable can then be used in a typical mustache fashion: + + load database + from '{{DBPATH}}' + into postgresql:///pgloader; + +It's also possible to prepare a INI file such as the following: + + [pgloader] + + DBPATH = sqlite/sqlite.db + +And run the following command, feeding the INI values as a *context* for +pgloader templating system: + + $ pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load + +The mustache templates implementation with OS environment support replaces +former `GETENV` implementation, which didn't work anyway. + ## COMMON CLAUSES Some clauses are common to all commands: @@ -521,18 +558,12 @@ Some clauses are common to all commands: *FILENAME MATCHING* clause (see above); whereas the *MySQL* source only supports a MySQL database URI specification. - In all cases, the *FROM* clause is able to read its value from an - environment variable when using the form `GETENV 'varname'`. - - *INTO* The PostgreSQL connection URI must contains the name of the target table where to load the data into. That table must have already been created in PostgreSQL, and the name might be schema qualified. - The *INTO* target database connection URI can be parsed from the value - of an environment variable when using the form `GETENV 'varname'`. - Then *INTO* option also supports an optional comma separated list of target columns, which are either the name of an input *field* or the white space separated list of the target column name, its PostgreSQL data diff --git a/pgloader.asd b/pgloader.asd index 2b57366..110dce9 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -35,6 +35,7 @@ #:uuid ; Transforming MS SQL unique identifiers #:quri ; decode URI parameters #:cl-ppcre ; Perl Compatible Regular Expressions + #:cl-mustache ; Logic-less templates ) :components ((:module "src" @@ -108,6 +109,7 @@ :serial t :components ((:file "parse-ini") + (:file "template") (:file "command-utils") (:file "command-keywords") (:file "command-regexp") diff --git a/src/main.lisp b/src/main.lisp index 55b6aa9..24a7eaf 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -51,6 +51,8 @@ ("on-error-stop" :type boolean :documentation "Refrain from handling errors properly.") + (("context" #\C) :type string :documentation "Command Context Variables") + (("with") :type string :list t :optional t :documentation "Load options") @@ -190,7 +192,7 @@ (destructuring-bind (&key help version quiet verbose debug logfile list-encodings upgrade-config - dry-run on-error-stop + dry-run on-error-stop context ((:load-lisp-file load)) client-min-messages log-min-messages summary root-dir self-upgrade @@ -227,6 +229,13 @@ ;; Set parameters that come from the environement (init-params-from-environment) + ;; Read the context file (if given) and the environment + (handler-case + (initialize-context context) + (condition (e) + (format t "Couldn't read ini file ~s: ~a~%" context e) + (usage argv))) + ;; Then process options (when debug #+sbcl diff --git a/src/package.lisp b/src/package.lisp index 782fd0b..3c8b6f5 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -739,6 +739,7 @@ (:import-from #:pgloader.ixf #:ixf-connection) (:export #:parse-commands #:parse-commands-from-file + #:initialize-context ;; tools to enable complete cli parsing in main.lisp #:process-relative-pathnames diff --git a/src/params.lisp b/src/params.lisp index bb2e095..95fc688 100644 --- a/src/params.lisp +++ b/src/params.lisp @@ -30,6 +30,7 @@ #:*default-tmpdir* #:init-params-from-environment #:getenv-default + #:*context* #:+os-code-success+ #:+os-code-error+ @@ -170,6 +171,14 @@ (fad:pathname-as-directory (getenv-default "TMPDIR" *default-tmpdir*)))) +;;; +;;; Run time context to fill-in variable parts of the commands. +;;; +(defvar *context* nil + "Alist of (names . values) intialized from the environment at run-time, + and from a --context command line argument, then used in the commands when + they are using the Mustache templating feature.") + ;;; ;;; Some command line constants for OS errors codes ;;; diff --git a/src/parsers/command-copy.lisp b/src/parsers/command-copy.lisp index 6a0e161..c773c63 100644 --- a/src/parsers/command-copy.lisp +++ b/src/parsers/command-copy.lisp @@ -73,17 +73,7 @@ (:regex (make-instance 'copy-connection :spec src)) (:http (make-instance 'copy-connection :uri (first specs)))))))) -(defrule get-copy-file-source-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v) - (connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'copy-file-source connstring)))) - -(defrule copy-source (and kw-load kw-copy kw-from - (or get-copy-file-source-from-environment-variable - copy-file-source)) +(defrule copy-source (and kw-load kw-copy kw-from copy-file-source) (:lambda (src) (bind (((_ _ _ source) src)) source))) diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index 2d3de1b..e88fd28 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -350,17 +350,7 @@ (:regex (make-instance 'csv-connection :spec src)) (:http (make-instance 'csv-connection :uri (first specs)))))))) -(defrule get-csv-file-source-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v) - (connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'csv-file-source connstring)))) - -(defrule csv-source (and kw-load kw-csv kw-from - (or get-csv-file-source-from-environment-variable - csv-file-source)) +(defrule csv-source (and kw-load kw-csv kw-from csv-file-source) (:lambda (src) (bind (((_ _ _ source) src)) source))) diff --git a/src/parsers/command-db-uri.lisp b/src/parsers/command-db-uri.lisp index b3dc513..fc8a399 100644 --- a/src/parsers/command-db-uri.lisp +++ b/src/parsers/command-db-uri.lisp @@ -214,16 +214,7 @@ :use-ssl use-ssl :table-name table-name)))) -(defrule get-pgsql-uri-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v)) - (let ((connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'pgsql-uri connstring))))) - -(defrule target (and kw-into (or pgsql-uri - get-pgsql-uri-from-environment-variable)) +(defrule target (and kw-into pgsql-uri) (:destructure (into target) (declare (ignore into)) target)) diff --git a/src/parsers/command-fixed.lisp b/src/parsers/command-fixed.lisp index d2af942..dfcf246 100644 --- a/src/parsers/command-fixed.lisp +++ b/src/parsers/command-fixed.lisp @@ -81,17 +81,7 @@ (:regex (make-instance 'fixed-connection :spec src)) (:http (make-instance 'fixed-connection :uri (first specs)))))))) -(defrule get-fixed-file-source-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v) - (connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'fixed-file-source connstring)))) - -(defrule fixed-source (and kw-load kw-fixed kw-from - (or get-fixed-file-source-from-environment-variable - fixed-file-source)) +(defrule fixed-source (and kw-load kw-fixed kw-from fixed-file-source) (:lambda (src) (bind (((_ _ _ source) src)) source))) diff --git a/src/parsers/command-keywords.lisp b/src/parsers/command-keywords.lisp index fb0576a..71c4f19 100644 --- a/src/parsers/command-keywords.lisp +++ b/src/parsers/command-keywords.lisp @@ -47,7 +47,6 @@ (def-keyword-rule "default") (def-keyword-rule "typemod") (def-keyword-rule "using") - (def-keyword-rule "getenv") (def-keyword-rule "on") (def-keyword-rule "error") (def-keyword-rule "stop") diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp index 2b2dabf..fef7bb0 100644 --- a/src/parsers/command-mssql.lisp +++ b/src/parsers/command-mssql.lisp @@ -114,17 +114,7 @@ (getenv-default "TDSPORT" "1433"))) :name dbname)))) -(defrule get-mssql-uri-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v)) - (let ((connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'mssql-uri connstring))))) - -(defrule mssql-source (and kw-load kw-database kw-from - (or mssql-uri - get-mssql-uri-from-environment-variable)) +(defrule mssql-source (and kw-load kw-database kw-from mssql-uri) (:lambda (source) (bind (((_ _ _ uri) source)) uri))) (defrule load-mssql-command (and mssql-source target diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp index 12baa9b..2119ca2 100644 --- a/src/parsers/command-mysql.lisp +++ b/src/parsers/command-mysql.lisp @@ -124,17 +124,7 @@ (getenv-default "MYSQL_TCP_PORT" "3306"))) :name dbname)))) -(defrule get-mysql-uri-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v)) - (let ((connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'mysql-uri connstring))))) - -(defrule mysql-source (and kw-load kw-database kw-from - (or mysql-uri - get-mysql-uri-from-environment-variable)) +(defrule mysql-source (and kw-load kw-database kw-from mysql-uri) (:lambda (source) (bind (((_ _ _ uri) source)) uri))) (defrule load-mysql-command (and mysql-source target diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp index ba870d1..ac65b3c 100644 --- a/src/parsers/command-parser.lisp +++ b/src/parsers/command-parser.lisp @@ -28,9 +28,16 @@ (defrule commands (+ command)) -(defun parse-commands (commands) +(defun parse-commands (commands-template &key (start 0) end junk-allowed) "Parse a command and return a LAMBDA form that takes no parameter." - (parse 'commands commands)) + (let ((commands (apply-template (subseq commands-template start end)))) + (unless junk-allowed + (log-message :info "Parsed command:~%~a~%" commands)) + (parse 'commands + commands + :start start + :end end + :junk-allowed junk-allowed))) (defun inject-inline-data-position (command position) "We have '(:inline nil) somewhere in command, have '(:inline position) instead." @@ -100,14 +107,14 @@ (*data-expected-inline* nil) (content (read-file-into-string filename))) (multiple-value-bind (commands end-commands-position) - (parse 'commands content :junk-allowed t) + (parse-commands content :junk-allowed t) ;; INLINE is only allowed where we have a single command in the file (if *data-expected-inline* (progn (when (= 0 end-commands-position) ;; didn't find any command, leave error reporting to esrap - (parse 'commands content)) + (parse-commands content)) (when (and *data-expected-inline* (null end-commands-position)) @@ -122,13 +129,16 @@ ;; now we should have a single command and inline data after that ;; replace the (:inline nil) found in the first (and only) command ;; with a (:inline position) instead - (list - (inject-inline-data-position - (first commands) (cons filename end-commands-position)))) + (let ((command + (parse-commands content :end end-commands-position))) + (list + (inject-inline-data-position (first command) + (cons filename + end-commands-position))))) ;; There was no INLINE magic found in the file, reparse it so that ;; normal error processing happen - (parse 'commands content)))))) + (parse-commands content)))))) ;;; diff --git a/src/parsers/command-source.lisp b/src/parsers/command-source.lisp index 16ef929..77c37c4 100644 --- a/src/parsers/command-source.lisp +++ b/src/parsers/command-source.lisp @@ -39,17 +39,7 @@ (defrule maybe-quoted-filename-or-http-uri (or http-uri maybe-quoted-filename)) -(defrule get-filename-or-http-uri-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (destructuring-bind (g varname) p-e-v - (declare (ignore g)) - (let ((connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'maybe-quoted-filename-or-http-uri connstring))))) - -(defrule filename-or-http-uri (or get-filename-or-http-uri-from-environment-variable - maybe-quoted-filename-or-http-uri)) +(defrule filename-or-http-uri maybe-quoted-filename-or-http-uri) (defrule source-uri (or stdin http-uri diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index 6a2d9d1..0ee78a3 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -60,17 +60,7 @@ load database (:http (make-instance 'sqlite-connection :uri url)) (:filename (make-instance 'sqlite-connection :path url)))))) -(defrule get-sqlite-uri-from-environment-variable (and kw-getenv name) - (:lambda (p-e-v) - (bind (((_ varname) p-e-v) - (connstring (getenv-default varname))) - (unless connstring - (error "Environment variable ~s is unset." varname)) - (parse 'sqlite-uri connstring)))) - -(defrule sqlite-source (and kw-load kw-database kw-from - (or get-sqlite-uri-from-environment-variable - sqlite-uri)) +(defrule sqlite-source (and kw-load kw-database kw-from sqlite-uri) (:lambda (source) (bind (((_ _ _ uri) source)) uri))) diff --git a/src/parsers/template.lisp b/src/parsers/template.lisp new file mode 100644 index 0000000..674fcad --- /dev/null +++ b/src/parsers/template.lisp @@ -0,0 +1,54 @@ +;;; +;;; Allow the pgloader load command to be a Mustache Template. +;;; +;;; Variables are to be found either in the OS environment for the process, +;;; or in the .ini file given as a --context command line argument. +;;; + +(in-package #:pgloader.parser) + +(defun apply-template (string) + (mustache:render* string *context*)) + +(defun initialize-context (filename) + "Initialize a context from the environment variables and from the given + context-filename (might be nil). CONTEXT-FILENAME is an INI file." + + (when filename + (setf *context* (read-ini-file filename)))) + +(defun read-ini-file (filename) + (let ((ini (ini:make-config))) + (ini:read-files ini (list filename)) + + (loop :for section :in (ini:sections ini) + :append (loop :for option :in (ini:options ini section) + :for key := (string-upcase option) + :for val := (ini:get-option ini section option) + :collect (cons key val))))) + + +;;; +;;; cl-mustache doesn't read variables from the environment, and we want to. +;;; cl-mustache uses CLOS for finding values in a context from a key, so we +;;; can derive that. +;;; +(defmethod mustache::context-get :around ((key string) (context hash-table)) + (multiple-value-bind (data find) + (call-next-method) + (if find + (values data find) + (context-get-from-environment key)))) + +(defmethod mustache::context-get :around ((key string) (context null)) + (multiple-value-bind (data find) + (call-next-method) + (if find + (values data find) + (context-get-from-environment key)))) + +(defun context-get-from-environment (key) + (let ((val (uiop:getenv key))) + (if val + (values val t) + (values)))) diff --git a/test/sakila.load b/test/sakila.load index 9c87ed6..e6b4731 100644 --- a/test/sakila.load +++ b/test/sakila.load @@ -9,7 +9,7 @@ load database WITH on error stop, concurrency = 2, workers = 6, prefetch rows = 25000, - -- multiple readers per thread, rows per range = 50000, + multiple readers per thread, rows per range = 50000, max parallel create index = 4-- , -- quote identifiers diff --git a/test/sqlite-env.load b/test/sqlite-env.load new file mode 100644 index 0000000..24fb0d9 --- /dev/null +++ b/test/sqlite-env.load @@ -0,0 +1,26 @@ +/* + * Run with either one of those commands: + * + * DBPATH=sqlite/sqlite.db ./build/bin/pgloader ./test/sqlite-env.load + * ./build/bin/pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load + * + */ + +load database + from '{{DBPATH}}' + into postgresql:///pgloader + + -- with include drop, create tables, create indexes, reset sequences + + before load do + $$ drop schema if exists sqlite cascade; $$, + $$ create schema if not exists sqlite; $$ + + cast column character.f1 to text drop typemod, + column appointments.time to timestamptz drop default, + type intege to integer, + type character to varchar keep typemod + + set work_mem to '16MB', + maintenance_work_mem to '512 MB', + search_path to 'sqlite'; diff --git a/test/sqlite.ini b/test/sqlite.ini new file mode 100644 index 0000000..2662a24 --- /dev/null +++ b/test/sqlite.ini @@ -0,0 +1,3 @@ +[pgloader] + +DBPATH = sqlite/sqlite.db