pgloader/src/main.lisp
Dimitri Fontaine f719d2976d Implement a template system for pgloader commands.
This feature has been asked several times, and I can't see any way to fix
the GETENV parsing mess that we have. In this patch the GETENV support is
retired and replaced with a templating system, using the Mustache syntax.

To get back the GETENV feature, our implementation of the Mustache template
system adds support for fetching the template variable values from the OS
environment.

Fixes #555, Fixes #609.
See #500, #477, #278.
2017-08-16 01:33:11 +02:00

647 lines
27 KiB
Common Lisp
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(in-package #:pgloader)
;;;
;;; Now some tooling
;;;
(defun log-threshold (min-message &key quiet verbose debug)
"Return the internal value to use given the script parameters."
(cond ((and debug verbose) :data)
(debug :debug)
(verbose :notice)
(quiet :error)
(t (or (find-symbol (string-upcase min-message) "KEYWORD")
:notice))))
(defparameter *opt-spec*
`((("help" #\h) :type boolean :documentation "Show usage and exit.")
(("version" #\V) :type boolean
:documentation "Displays pgloader version and exit.")
(("quiet" #\q) :type boolean :documentation "Be quiet")
(("verbose" #\v) :type boolean :documentation "Be verbose")
(("debug" #\d) :type boolean :documentation "Display debug level information.")
("client-min-messages" :type string :initial-value "warning"
:documentation "Filter logs seen at the console")
("log-min-messages" :type string :initial-value "notice"
:documentation "Filter logs seen in the logfile")
(("summary" #\S) :type string :documentation "Filename where to copy the summary")
(("root-dir" #\D) :type string :initial-value ,*root-dir*
:documentation "Output root directory.")
(("upgrade-config" #\U) :type boolean
:documentation "Output the command(s) corresponding to .conf file for v2.x")
(("list-encodings" #\E) :type boolean
:documentation "List pgloader known encodings and exit.")
(("logfile" #\L) :type string
:documentation "Filename where to send the logs.")
(("load-lisp-file" #\l) :type string :list t :optional t
:documentation "Read user code from files")
("dry-run" :type boolean
:documentation "Only check database connections, don't load anything.")
("on-error-stop" :type boolean
:documentation "Refrain from handling errors properly.")
(("context" #\C) :type string :documentation "Command Context Variables")
(("with") :type string :list t :optional t
:documentation "Load options")
(("set") :type string :list t :optional t
:documentation "PostgreSQL options")
(("field") :type string :list t :optional t
:documentation "Source file fields specification")
(("cast") :type string :list t :optional t
:documentation "Specific cast rules")
(("type") :type string :optional t
:documentation "Force input source type")
(("encoding") :type string :optional t
:documentation "Source expected encoding")
(("before") :type string :optional t
:documentation "SQL script to run before loading the data")
(("after") :type string :optional t
:documentation "SQL script to run after loading the data")
("self-upgrade" :type string :optional t
:documentation "Path to pgloader newer sources")
("regress" :type boolean :optional t
:documentation "Drive regression testing")))
(defun print-backtrace (condition debug)
"Depending on DEBUG, print out the full backtrace or just a shorter
message on STREAM for given CONDITION."
(if debug
(trivial-backtrace:print-backtrace condition :output nil)
(trivial-backtrace:print-condition condition nil)))
(defun mkdir-or-die (path debug &optional (stream *standard-output*))
"Create a directory at given PATH and exit with an error message when
that's not possible."
(handler-case
(let ((dir (uiop:ensure-directory-pathname path)))
(when debug
(format stream "mkdir -p ~s~%" dir))
(uiop:parse-unix-namestring (ensure-directories-exist dir)))
(condition (e)
;; any error here is a panic
(if debug
(format stream "PANIC: ~a~%" (print-backtrace e debug))
(format stream "PANIC: ~a.~%" e))
(uiop:quit))))
(defun log-file-name (logfile)
" If the logfile has not been given by the user, default to using
pgloader.log within *root-dir*."
(cond ((null logfile)
(make-pathname :defaults *root-dir*
:name "pgloader"
:type "log"))
((fad:pathname-relative-p logfile)
(merge-pathnames logfile *root-dir*))
(t
logfile)))
(defun usage (argv &key quit)
"Show usage then QUIT if asked to."
(format t "~&~a [ option ... ] command-file ..." (first argv))
(format t "~&~a [ option ... ] SOURCE TARGET" (first argv))
(command-line-arguments:show-option-help *opt-spec*)
(when quit (uiop:quit +os-code-error-usage+)))
(defvar *self-upgraded-already* nil
"Keep track if we did reload our own source code already.")
(defun self-upgrade (namestring &optional debug)
"Load pgloader sources at PATH-TO-PGLOADER-SOURCES."
(let ((pgloader-pathname (uiop:directory-exists-p
(uiop:parse-unix-namestring namestring))))
(unless pgloader-pathname
(format t "No such directory: ~s~%" namestring)
(uiop:quit +os-code-error+))
;; now the real thing
(handler-case
(handler-bind ((warning #'muffle-warning))
(let ((asdf:*central-registry* (list* pgloader-pathname
asdf:*central-registry*)))
(format t "Self-upgrading from sources at ~s~%"
(uiop:native-namestring pgloader-pathname))
(with-output-to-string (*standard-output*)
(asdf:load-system :pgloader
:verbose nil
:force-not *self-upgrade-immutable-systems*))))
(condition (c)
(format t "Fatal: ~a~%" c)
(format t "~a~%" *self-upgrade-immutable-systems*)
(when debug (invoke-debugger c))))))
(defun parse-summary-filename (summary debug)
"Return the pathname where to write the summary output."
(when summary
(let* ((summary-pathname (uiop:parse-unix-namestring summary))
(summary-pathname (if (uiop:absolute-pathname-p summary-pathname)
summary-pathname
(uiop:merge-pathnames* summary-pathname *root-dir*)))
(summary-dir (directory-namestring summary-pathname)))
(mkdir-or-die summary-dir debug)
summary-pathname)))
(defvar *--load-list-file-extension-whitelist* '("lisp" "lsp" "cl" "asd")
"White list of file extensions allowed with the --load option.")
(defun load-extra-transformation-functions (filename &optional verbose)
"Load an extra filename to tweak pgloader's behavior."
(let ((pathname (uiop:parse-native-namestring filename)))
(unless (member (pathname-type pathname)
*--load-list-file-extension-whitelist*
:test #'string=)
(error "Unknown lisp file extension: ~s" (pathname-type pathname)))
(format t "Loading code from ~s~%" pathname)
(load (compile-file pathname :verbose verbose :print verbose))))
(defun main (argv)
"Entry point when building an executable image with buildapp"
(let ((args (rest argv)))
(multiple-value-bind (options arguments)
(handler-case
(command-line-arguments:process-command-line-options *opt-spec* args)
(condition (e)
;; print out the usage, whatever happens here
;; (declare (ignore e))
(format t "~a~%" e)
(usage argv :quit t)))
(destructuring-bind (&key help version quiet verbose debug logfile
list-encodings upgrade-config
dry-run on-error-stop context
((:load-lisp-file load))
client-min-messages log-min-messages summary
root-dir self-upgrade
with set field cast type encoding before after
regress)
options
;; parse the log thresholds
(setf *log-min-messages*
(log-threshold log-min-messages
:quiet quiet :verbose verbose :debug debug)
*client-min-messages*
(log-threshold client-min-messages
:quiet quiet :verbose verbose :debug debug)
verbose (member *client-min-messages* '(:info :debug :data))
debug (member *client-min-messages* '(:debug :data))
quiet (and (not verbose) (not debug)))
;; First thing: Self Upgrade?
(when self-upgrade
(unless *self-upgraded-already*
(self-upgrade self-upgrade debug)
(let ((*self-upgraded-already* t))
(main argv))))
;; First care about the root directory where pgloader is supposed to
;; output its data logs and reject files
(let ((root-dir-truename (or (probe-file root-dir)
(mkdir-or-die root-dir debug))))
(setf *root-dir* (uiop:ensure-directory-pathname root-dir-truename)))
;; Set parameters that come from the environement
(init-params-from-environment)
;; Read the context file (if given) and the environment
(handler-case
(initialize-context context)
(condition (e)
(format t "Couldn't read ini file ~s: ~a~%" context e)
(usage argv)))
;; Then process options
(when debug
#+sbcl
(format t "sb-impl::*default-external-format* ~s~%"
sb-impl::*default-external-format*)
(format t "tmpdir: ~s~%" *default-tmpdir*))
(when version
(format t "pgloader version ~s~%" *version-string*)
(format t "compiled with ~a ~a~%"
(lisp-implementation-type)
(lisp-implementation-version)))
(when help
(usage argv))
(when (or help version) (uiop:quit +os-code-success+))
(when list-encodings
(show-encodings)
(uiop:quit +os-code-success+))
(when upgrade-config
(loop for filename in arguments
do
(handler-case
(with-monitor ()
(pgloader.ini:convert-ini-into-commands filename))
(condition (c)
(when debug (invoke-debugger c))
(uiop:quit +os-code-error+))))
(uiop:quit +os-code-success+))
;; Should we run in dry-run mode?
(setf *dry-run* dry-run)
;; Should we stop at first error?
(setf *on-error-stop* on-error-stop)
;; load extra lisp code provided for by the user
(when load
(loop :for filename :in load :do
(handler-case
(load-extra-transformation-functions filename debug)
((or simple-condition serious-condition) (e)
(format *error-output*
"Failed to load lisp source file ~s~%" filename)
(format *error-output* "~a~%~%" e)
(uiop:quit +os-code-error+)))))
;; Now process the arguments
(when arguments
;; Start the logs system
(let* ((*log-filename* (log-file-name logfile))
(*summary-pathname* (parse-summary-filename summary debug)))
(handler-case
;; The handler-case is to catch unhandled exceptions at the
;; top level.
;;
;; The handler-bind below is to be able to offer a
;; meaningful backtrace to the user in case of unexpected
;; conditions being signaled.
(handler-bind
(((and condition (not (or cli-parsing-error
source-definition-error)))
#'(lambda (condition)
(format *error-output* "KABOOM!~%")
(format *error-output* "FATAL error: ~a~%~a~%~%"
condition
(print-backtrace condition debug)))))
(with-monitor ()
;; tell the user where to look for interesting things
(log-message :log "Main logs in '~a'"
(uiop:native-namestring *log-filename*))
(log-message :log "Data errors in '~a'~%" *root-dir*)
(cond
((and regress (= 1 (length arguments)))
;; run a regression test
(process-regression-test (first arguments)))
(regress
(log-message :fatal "Regression testing requires a single .load file as input."))
((= 2 (length arguments))
;; if there are exactly two arguments in the command
;; line, try and process them as source and target
;; arguments
(process-source-and-target (first arguments)
(second arguments)
type encoding
set with field cast
before after))
(t
;; process the files
;; other options are not going to be used here
(let ((cli-options `(("--type" ,type)
("--encoding" ,encoding)
("--set" ,set)
("--with" ,with)
("--field" ,field)
("--cast" ,cast)
("--before" ,before)
("--after" ,after))))
(loop :for (cli-option-name cli-option-value)
:in cli-options
:when cli-option-value
:do (log-message
:fatal
"Option ~s is ignored when using a load file"
cli-option-name))
;; when we issued a single error previously, do nothing
(unless (remove-if #'null (mapcar #'second cli-options))
(process-command-file arguments)))))))
((or cli-parsing-error source-definition-error) (c)
(format *error-output* "~%~a~%~%" c)
(let ((lp:*kernel* *monitoring-kernel*))
(lp:end-kernel :wait t))
(uiop:quit +os-code-error-bad-source+))
(condition (c)
(format *error-output* "~%What I am doing here?~%~%")
(format *error-output* "~a~%~%" c)
;; wait until monitor stops...
(format *error-output*
"~%Waiting for the monitor thread to complete.~%~%")
(let ((lp:*kernel* *monitoring-kernel*))
(lp:end-kernel :wait t))
(uiop:quit +os-code-error+)))))
;; done.
(uiop:quit +os-code-success+)))))
;;;
;;; Helper functions to actually do things
;;;
(define-condition load-files-not-found-error (error)
((filename-list :initarg :filename-list))
(:report (lambda (err stream)
(format stream
;; start lines with 3 spaces because of trivial-backtrace
"~{No such file or directory: ~s~^~% ~}"
(slot-value err 'filename-list)))))
(defun process-command-file (filename-list &key (flush-summary t))
"Process each FILENAME in FILENAME-LIST as a pgloader command
file (.load)."
(loop :for filename :in filename-list
:for truename := (probe-file filename)
:unless truename :collect filename :into not-found-list
:do (if truename
(run-commands truename
:start-logger nil
:flush-summary flush-summary)
(log-message :error "Can not find file: ~s" filename))
:finally (when not-found-list
(error 'load-files-not-found-error :filename-list not-found-list))))
(define-condition cli-parsing-error (error) ()
(:report (lambda (err stream)
(declare (ignore err))
(format stream "Could not parse the command line: see above."))))
(defun process-source-and-target (source-string target-string
&optional
type encoding set with field cast
before after)
"Given exactly 2 CLI arguments, process them as source and target URIs.
Parameters here are meant to be already parsed, see parse-cli-optargs."
(let* ((type (handler-case
(parse-cli-type type)
(condition (e)
(log-message :warning
"Could not parse --type ~s: ~a"
type e))))
(source-uri (handler-case
(if type
(parse-source-string-for-type type source-string)
(parse-source-string source-string))
(condition (e)
(log-message :warning
"Could not parse source string ~s: ~a"
source-string e))))
(type (when (and source-string
(typep source-uri 'connection))
(parse-cli-type (conn-type source-uri))))
(target-uri (handler-case
(parse-target-string target-string)
(condition (e)
(log-message :error
"Could not parse target string ~s: ~a"
target-string e)))))
;; some verbosity about the parsing "magic"
(log-message :info " SOURCE: ~s" source-string)
(log-message :info "SOURCE URI: ~s" source-uri)
(log-message :info " TARGET: ~s" target-string)
(log-message :info "TARGET URI: ~s" target-uri)
(cond ((and (null source-uri) (null target-uri))
(process-command-file (list source-string target-string)))
((or (null source-string) (null source-uri))
(log-message :fatal
"Failed to parse ~s as a source URI." source-string)
(log-message :log "You might need to use --type."))
((or (null target-string) (null target-uri))
(log-message :fatal
"Failed to parse ~s as a PostgreSQL database URI."
target-string)))
(let* ((nb-errors 0)
(options (handler-case
(parse-cli-options type with)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse --with ~s:" with)
(log-message :error "~a" e))))
(fields (handler-case
(parse-cli-fields type field)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse --fields ~s:" field)
(log-message :error "~a" e)))))
(destructuring-bind (&key encoding gucs casts before after)
(loop :for (keyword option user-string parse-fn)
:in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding)
(:gucs "--set" ,set ,#'parse-cli-gucs)
(:casts "--cast" ,cast ,#'parse-cli-casts)
(:before "--before" ,before ,#'parse-sql-file)
(:after "--after" ,after ,#'parse-sql-file))
:append (list keyword
(handler-case
(funcall parse-fn user-string)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse ~a ~s: ~a"
option user-string e)))))
(unless (= 0 nb-errors)
(error 'cli-parsing-error))
;; so, we actually have all the specs for the
;; job on the command line now.
(when (and source-uri target-uri (= 0 nb-errors))
(load-data :from source-uri
:into target-uri
:encoding encoding
:options options
:gucs gucs
:fields fields
:casts casts
:before before
:after after
:start-logger nil))))))
;;;
;;; Helper function to run a given command
;;;
(defun run-commands (source
&key
(start-logger t)
(flush-summary t)
((:summary *summary-pathname*) *summary-pathname*)
((:log-filename *log-filename*) *log-filename*)
((:log-min-messages *log-min-messages*) *log-min-messages*)
((:client-min-messages *client-min-messages*) *client-min-messages*))
"SOURCE can be a function, which is run, a list, which is compiled as CL
code then run, a pathname containing one or more commands that are parsed
then run, or a commands string that is then parsed and each command run."
(with-monitor (:start-logger start-logger)
(let* ((funcs
(typecase source
(function (list source))
(list (list (compile nil source)))
(pathname (mapcar (lambda (expr) (compile nil expr))
(parse-commands-from-file source)))
(t (mapcar (lambda (expr) (compile nil expr))
(if (probe-file source)
(parse-commands-from-file source)
(parse-commands source)))))))
(loop :for func :in funcs
:do (funcall func)
:do (when flush-summary
(flush-summary :reset t))))))
;;;
;;; Main API to use from outside of pgloader.
;;;
(define-condition source-definition-error (error)
((mesg :initarg :mesg :reader source-definition-error-mesg))
(:report (lambda (err stream)
(format stream "~a" (source-definition-error-mesg err)))))
(defun load-data (&key ((:from source)) ((:into target))
encoding fields options gucs casts before after
(start-logger t) (flush-summary t))
"Load data from SOURCE into TARGET."
(declare (type connection source)
(type pgsql-connection target))
(when (and (typep source 'csv-connection)
(null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "CSV data source require a table name target."))
(when (and (typep source 'fixed-connection)
(null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "Fixed-width data source require a table name target."))
(when (and (typep source 'fixed-connection)
(null fields))
(error 'source-definition-error
:mesg "Fixed-width data source require fields specs."))
(with-monitor (:start-logger start-logger)
(when (and casts (not (member (type-of source)
'(sqlite-connection
mysql-connection
mssql-connection))))
(log-message :log "Cast rules are ignored for this sources."))
;; now generates the code for the command
(log-message :debug "LOAD DATA FROM ~s" source)
(let ((code
(etypecase source
(copy-connection
(lisp-code-for-loading-from-copy source target
:fields fields
:encoding (or encoding :default)
:gucs gucs
:options options
:before before
:after after))
(fixed-connection
(lisp-code-for-loading-from-fixed source target
:fields fields
:encoding encoding
:gucs gucs
:options options
:before before
:after after))
(csv-connection
(lisp-code-for-loading-from-csv source target
:fields fields
:encoding encoding
:gucs gucs
:options options
:before before
:after after))
(dbf-connection
(lisp-code-for-loading-from-dbf source target
:gucs gucs
:options options
:before before
:after after))
(ixf-connection
(lisp-code-for-loading-from-ixf source target
:gucs gucs
:options options
:before before
:after after))
(sqlite-connection
(lisp-code-for-loading-from-sqlite source target
:gucs gucs
:casts casts
:options options
:before before
:after after))
(mysql-connection
(lisp-code-for-loading-from-mysql source target
:gucs gucs
:casts casts
:options options
:before before
:after after))
(mssql-connection
(lisp-code-for-loading-from-mssql source target
:gucs gucs
:casts casts
:options options
:before before
:after after)))))
(run-commands (process-relative-pathnames (uiop:getcwd) code)
:start-logger nil
:flush-summary flush-summary))))