pgloader/parse-ini.lisp

336 lines
12 KiB
Common Lisp

;;;
;;; Compatibility package to read old configuration file format.
;;;
(in-package :pgloader.ini)
(defparameter *global-section* "pgsql")
(defstruct params
filename table format is-template use-template
fields columns
truncate encoding logs rejects gucs
separator null-as empty-string skip-lines)
(defun process-true-false (value)
"parse python boolean values"
(cond ((string-equal "True" value) t)
((string-equal "False" value) nil)
(t value)))
(defun read-default-value-for-param (config option &optional default)
"Fetch value for OPTION in the global section."
(if (ini:has-option-p config *global-section* option)
(ini:get-option config *global-section* option)
default))
(defun read-value-for-param (config section option &key template default)
"Read the value of OPTION in the SECTION part of the CONFIG or its
TEMPLATE when one is defined, finally using provided DEFAULT."
(cond ((ini:has-option-p config section option)
(ini:get-option config section option))
(template
(if (ini:has-option-p config template option)
(ini:get-option config template option)
(read-default-value-for-param config option default)))
(t (read-default-value-for-param config option default))))
(defmethod set-param ((params params) config section option param
&optional default)
"Set the params structure slot PARAM, reading its value in the SECTION
part of the CONFIG or its TEMPLATE when one is defined, finally using
provided DEFAULT."
(let ((value
(process-true-false
(read-value-for-param config section option
:template (params-use-template params)
:default default))))
(setf (slot-value params param) value)))
(defmethod set-gucs ((params params) config section)
(let* ((template (params-use-template params))
(encoding
(string-trim "'"
(read-value-for-param config section "client_encoding"
:template template)))
(datestyle (read-value-for-param config section "datestyle"
:template template)))
(setf (params-gucs params)
(append
(when encoding (list (cons "client_encoding" encoding)))
(when datestyle (list (cons "datestyle" datestyle)))
(get-gucs config section)
(when template (get-gucs config template))
(get-gucs config *global-section*)))))
(defun get-gucs (config section)
"Get PostgreSQL settings from SECTION."
(loop
for (option . value) in (ini:items config section)
when (and (< 10 (length option)) (string= "pg_option_" option :end2 10))
collect (cons (subseq option 10) value)))
(defun user-defined-columns (config section)
"Fetch all option that begin with udc_ as user defined columns"
(loop for (option . value) in (ini:items config section)
when (and (< 4 (length option)) (string= "udc_" option :end2 4))
collect (cons (subseq option 4) value)))
(defun split-columns-specs (colspecs)
"Return an alist of column name and column position from given COLSPEC"
(loop
for count from 1
for raw in (sq:split-sequence #\, colspecs)
for colspec = (string-trim " " raw)
for (name pos) = (sq:split-sequence #\: colspec)
collect (cons name (or (when pos (parse-integer pos)) count))))
(defun get-pgsql-column-specs (config section)
"Connect to PostgreSQL to get the column specs."
(with-database-connection (config section)
(loop
for pos from 1
for name in (list-columns dbname table-name)
collect (cons name pos))))
(defun parse-columns-spec (string config section)
"Parse old-style columns specification, such as:
* --> nil
x, y, a, b, d:6, c:5 --> \"x, y, a, b, d, c\"
Returns the list of fields to read from the file and the list of columns
to fill-in in the database as separate values."
(let* ((colspecs
(if (string= string "*")
(get-pgsql-column-specs config section)
(split-columns-specs string))))
(values (mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
(mapcar #'car colspecs))))
(defun parse-only-cols (columns only-cols)
" columns = x, y, a, b, d:6, c:5
only_cols = 3-6
Note that parsing the columns value has already been done for us, what
we are given here actually is (x y a b d c)
Returns (a b d c)"
(let ((indices
(loop
for raw in (sq:split-sequence #\, only-cols)
for range = (string-trim " " raw)
for (lower upper) = (mapcar #'parse-integer
(sq:split-sequence #\- range))
when upper append (loop for i from lower to upper collect i)
else collect lower)))
(loop
with cols = (coerce columns 'vector)
for i in indices
collect (aref cols (- i 1)))))
(defun compute-columns (columns only-cols copy-columns user-defined
config section)
"For columns, if only-cols is set, restrict to that. If copy-columns is
set, use that and replace references to user defined columns."
(cond (only-cols
;; that's again something kind of special
(parse-only-cols columns only-cols))
(copy-columns
;; that's the format used when user-defined columns are in play
(multiple-value-bind (fields columns)
(parse-columns-spec copy-columns config section)
(declare (ignore fields))
(mapcar
(lambda (colname)
(let ((constant
(cdr (assoc colname user-defined :test #'string=))))
(if constant
(format nil "~a ~a using ~s" colname "text" constant)
colname)))
columns)))
(t
columns)))
(defun parse-section (config section &optional (params (make-params)))
"Parse a configuration section into a params structure."
(unless (params-is-template params)
(loop for (option . param) in '(("use_template" . use-template)
("template" . is-template)
("reject_log" . logs)
("reject_data" . rejects)
("table" . table)
("format" . format)
("filename" . filename)
("truncate" . truncate)
("input_encoding" . encoding)
("reject_log" . logs)
("reject_data" . rejects)
("field_sep" . separator)
("null" . null-as)
("empty_string" . empty-string)
("skip_head_lines" . skip-lines))
do (set-param params config section option param))
;; now parse gucs
(set-gucs params config section)
;; now parse fields and columns
(let* ((template (params-use-template params))
(columns (read-value-for-param config section "columns"
:template template))
(user-defined (append
(user-defined-columns config section)
(when template
(user-defined-columns config template))
(user-defined-columns config *global-section*)))
(copy-columns (read-value-for-param config section "copy_columns"
:template template))
(only-cols (read-value-for-param config section "only_cols"
:template template)))
;; make sense of the old cruft
(multiple-value-bind (fields columns)
(parse-columns-spec columns config section)
(setf (params-fields params) fields)
(setf (params-columns params)
(compute-columns columns only-cols copy-columns user-defined
config section))))
params))
(defun get-connection-params (config section)
"Return a property list with connection parameters for SECTION."
(loop
for (param option section default)
in `((:host "host" ,*global-section* ,*pgconn-host*)
(:port "port" ,*global-section* ,*pgconn-port*)
(:user "user" ,*global-section* ,*pgconn-user*)
(:pass "pass" ,*global-section* ,*pgconn-pass*)
(:dbname "base" ,*global-section* nil)
(:table-name "table" ,section nil))
append
(list param
(coerce
(read-value-for-param config section option :default default)
'simple-string))))
(defun get-connection-string (config section)
"Return the connection parameters as a postgresql:// string."
(destructuring-bind (&key host port user pass dbname table-name)
(get-connection-params config section)
(format nil "postgresql://~a:~a@~a:~a/~a?~a"
user pass host port dbname table-name)))
(defmacro with-database-connection ((config section) &body body)
"Execute given SQL in a PostgreSQL connection suitable for CONFIG, SECTION."
`(destructuring-bind (&key host port user pass dbname table-name)
(get-connection-params ,config ,section)
(let ((*pgconn-host* host)
(*pgconn-port* (typecase port
(integer port)
(string (parse-integer port))))
(*pgconn-user* user)
(*pgconn-pass* pass)
(dbname dbname)
(table-name table-name))
,@body)))
(defun read-ini-file (filename)
(let ((config (ini:make-config)))
(ini:read-files config (list filename))))
(defun parse-ini-file (filename)
"Parse an old-style INI file into a list of PARAMS structures"
(let* ((config (read-ini-file filename))
(sections
(remove-if
(lambda (s) (member s '("default" *global-section*) :test #'string=))
(ini:sections config))))
(remove-if #'null (mapcar (lambda (s) (parse-section config s)) sections))))
(defun print-csv-option (params option)
"Print a CSV option in the new format."
(let ((value (when (slot-exists-p params option)
(slot-value params option))))
(case option
(truncate (when value "truncate"))
(quote (format nil "fields optionally enclosed by '~c'" #\"))
(escape (format nil "fields escaped by double-quote"))
(separator (format nil "fields terminated by '~c'" (aref value 0)))
(skip-lines (when value
(format nil "skip header = ~a" value))))))
(defun write-command-to-string (config section &key with-data-inline)
"Return the new syntax for the command found in SECTION."
(let ((params (parse-section config section)))
(when (and (params-filename params)
(params-separator params))
(with-output-to-string (s)
(format s "LOAD CSV~%")
(format s " FROM ~a ~@[WITH ENCODING ~a~]~%"
(if with-data-inline "inline"
(format nil "'~a'" (params-filename params)))
(when (params-encoding params)
(string-trim "'" (params-encoding params))))
(format s " ~@[(~{~&~10T~a~^,~}~%~8T)~]~%" (params-fields params))
(format s " INTO ~a~%" (get-connection-string config section))
(format s " ~@[(~{~&~10T~a~^,~}~%~8T)~]~%" (params-columns params))
;; CSV options
(format s "~% WITH ~{~a~^,~%~10T~}~%"
(loop for name in '(truncate skip-lines quote escape separator)
for option = (print-csv-option params name)
when option collect it))
;; GUCs
(format s "~% SET ~{~a~^,~&~10T~};"
(loop for (name . setting) in (params-gucs params)
collect (format nil "~a to '~a'" name setting)))))))
(defun convert-ini-into-commands (filename)
"Read the INI file at FILENAME and convert each section of it to a command
in the new pgloader format."
(let ((config (read-ini-file filename)))
(format t "~{~a~^~%~%~%~}"
(loop for section in (ini:sections config)
for command = (write-command-to-string config section)
when command collect it))))
(defun convert-ini-into-files (filename target-directory &key with-data-inline)
"Reads the INI file at FILENAME and creates files names <section>.load for
each section in the INI file, in TARGET-DIRECTORY."
(let ((config (read-ini-file filename)))
;; first mkdir -p
(ensure-directories-exist target-directory)
(loop
for section in (ini:sections config)
for target = (make-pathname :directory target-directory
:name section
:type "load")
for command = (write-command-to-string config section
:with-data-inline with-data-inline)
when command
do (with-open-file (c target
:direction :output
:if-exists :supersede
:if-does-not-exist :create
:external-format :utf-8)
(format c "~a" command)
(when with-data-inline
(let* ((params (parse-section config section))
(datafile
(merge-pathnames (params-filename params)
(directory-namestring filename))))
(format c "~%~%~%~%~a"
(slurp-file-into-string datafile)))))
and collect target)))