From 87e157bee20a5c54853c698f6a5fa05de8f35a62 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 17 Nov 2014 00:23:06 +0100 Subject: [PATCH] Add a new database source type in the parser. Now it's possible to parse a command to load data from MS SQL. The parser was until now parsing all database URI within the same common rule and that isn't possible anymore if we want to distinguish in between source database right from the parser, which we actually want to do. This patch also implement in-passing fixes all over the place, including the transformation function float-to-string that only happened to work on double-float data. --- Makefile | 7 +- pgloader.asd | 4 +- src/package.lisp | 1 + src/parsers/command-db-uri.lisp | 106 ++++++++------------- src/parsers/command-mssql.lisp | 121 ++++++++++++++++++++++++ src/parsers/command-mysql.lisp | 39 +++++++- src/parsers/command-parser.lisp | 1 + src/parsers/command-source.lisp | 6 -- src/sources/mssql.lisp | 37 ++++---- src/sources/mssql/mssql-cast-rules.lisp | 2 +- src/utils/transforms.lisp | 6 +- test/parse/AdventureWorks2008R2.load | 16 ++++ 12 files changed, 248 insertions(+), 98 deletions(-) create mode 100644 src/parsers/command-mssql.lisp create mode 100644 test/parse/AdventureWorks2008R2.load diff --git a/Makefile b/Makefile index fb0ff35..1c3a4cd 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,11 @@ CL = sbcl # default to 4096 MB of RAM size in the image DYNSIZE = 4096 -LISP_SRC = $(wildcard src/*lisp) \ - $(wildcard src/pgsql/*lisp) \ +LISP_SRC = $(wildcard src/*lisp) \ + $(wildcard src/monkey/*lisp) \ + $(wildcard src/utils/*lisp) \ + $(wildcard src/parsers/*lisp) \ + $(wildcard src/pgsql/*lisp) \ $(wildcard src/sources/*lisp) \ pgloader.asd diff --git a/pgloader.asd b/pgloader.asd index 0f72f0f..e40367a 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -92,6 +92,7 @@ (:file "command-dbf") (:file "command-cast-rules") (:file "command-mysql") + (:file "command-mssql") (:file "command-sqlite") (:file "command-archive") (:file "command-parser") @@ -104,7 +105,8 @@ ;; Source format specific implementations (:module sources - :depends-on ("params" + :depends-on ("monkey" ; mssql driver patches + "params" "package" "sources-api" "pgsql" diff --git a/src/package.lisp b/src/package.lisp index b4ae4e7..7e6f26e 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -352,6 +352,7 @@ #:*default-cast-rules* #:*cast-rules*) (:import-from #:pgloader.mysql #:*mysql-default-cast-rules*) + (:import-from #:pgloader.mssql #:*mssql-default-cast-rules*) (:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*) (:export #:parse-commands #:run-commands diff --git a/src/parsers/command-db-uri.lisp b/src/parsers/command-db-uri.lisp index fd2f39c..ded1581 100644 --- a/src/parsers/command-db-uri.lisp +++ b/src/parsers/command-db-uri.lisp @@ -65,8 +65,16 @@ (:identity t)) (defrule dsn-hostname (and (? hostname) (? dsn-port)) - (:destructure (hostname &optional port) - (append (list :host hostname) port))) + (:lambda (host-port) + (destructuring-bind (host &optional port) host-port + (append (list :host + (when host + (destructuring-bind (type &optional name) host + (ecase type + (:unix (if name (cons :unix name) :unix)) + (:ipv4 name) + (:host name))))) + port)))) (defrule dsn-dbname (and "/" (? namestring)) (:destructure (slash dbname) @@ -83,90 +91,52 @@ (declare (ignore qm)) (list :table-name name))) -(defrule dsn-prefix (and (or "postgresql" "pgsql" "mysql" "syslog") "://") - (:lambda (db) - (bind (((prefix _) db)) - (cond ((string= "postgresql" prefix) (list :type :postgresql)) - ((string= "pgsql" prefix) (list :type :postgresql)) - ((string= "mysql" prefix) (list :type :mysql)) - ((string= "syslog" prefix) (list :type :syslog)))))) +(defrule pgsql-prefix (and (or "postgresql" "pgsql") "://") + (:constant (list :type :postgresql))) -(defrule db-connection-uri (and dsn-prefix - (? dsn-user-password) - (? dsn-hostname) - dsn-dbname - (? dsn-table-name)) +(defrule pgsql-uri (and pgsql-prefix + (? dsn-user-password) + (? dsn-hostname) + dsn-dbname + (? dsn-table-name)) (:lambda (uri) (destructuring-bind (&key type - user + user password host port dbname - table-name) - (apply #'append uri) - ;; + table-name) + (apply #'append uri) ;; Default to environment variables as described in ;; http://www.postgresql.org/docs/9.3/static/app-psql.html - ;; http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html - ;; - (let ((user - (or user - (case type - (:postgresql - (getenv-default "PGUSER" - #+unix (getenv-default "USER") - #-unix (getenv-default "UserName"))) - (:mysql - (getenv-default "USER"))))) - (password (or password - (case type - (:postgresql (getenv-default "PGPASSWORD")) - (:mysql (getenv-default "MYSQL_PWD")))))) - (list :type type - :user user - :password password - :host (or (when host - (destructuring-bind (type &optional name) host - (ecase type - (:unix (if name (cons :unix name) :unix)) - (:ipv4 name) - (:host name)))) - (case type - (:postgresql (getenv-default "PGHOST" - #+unix :unix - #-unix "localhost")) - (:mysql (getenv-default "MYSQL_HOST" "localhost")))) - :port (or port - (parse-integer - ;; avoid a NIL is not a STRING style warning by - ;; using ecase here - (ecase type - (:postgresql (getenv-default "PGPORT" "5432")) - (:mysql (getenv-default "MYSQL_TCP_PORT" "3306"))))) - :dbname (or dbname - (case type - (:postgresql (getenv-default "PGDATABASE" user)))) - :table-name table-name))))) + (list :type type + :user (or user + (getenv-default "PGUSER" + #+unix (getenv-default "USER") + #-unix (getenv-default "UserName"))) + :password (or password (getenv-default "PGPASSWORD")) + :host (or host (getenv-default "PGHOST" + #+unix :unix + #-unix "localhost")) + :port (or port (parse-integer + (getenv-default "PGPORT" "5432"))) + :dbname (or dbname (getenv-default "PGDATABASE" user)) + :table-name table-name)))) -(defrule get-dburi-from-environment-variable (and kw-getenv name) +(defrule get-pgsql-uri-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) (bind (((_ varname) p-e-v)) (let ((connstring (getenv-default varname))) (unless connstring (error "Environment variable ~s is unset." varname)) - (parse 'db-connection-uri connstring))))) + (parse 'pgsql-uri connstring))))) -(defrule target (and kw-into (or db-connection-uri - get-dburi-from-environment-variable)) +(defrule target (and kw-into (or pgsql-uri + get-pgsql-uri-from-environment-variable)) (:destructure (into target) (declare (ignore into)) - (destructuring-bind (&key type &allow-other-keys) target - (unless (eq type :postgresql) - (error "The target must be a PostgreSQL connection string.")) - target))) - - + target)) (defun pgsql-connection-bindings (pg-db-uri gucs) diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp new file mode 100644 index 0000000..7bcc051 --- /dev/null +++ b/src/parsers/command-mssql.lisp @@ -0,0 +1,121 @@ +;;; +;;; Parse the pgloader commands grammar +;;; + +(in-package :pgloader.parser) + +(defun mssql-connection-bindings (ms-db-uri) + "Generate the code needed to set MSSQL connection bindings." + (destructuring-bind (&key ((:host mshost)) + ((:port msport)) + ((:user msuser)) + ((:password mspass)) + ((:dbname msdb)) + &allow-other-keys) + ms-db-uri + `((*msconn-host* ',mshost) + (*msconn-port* ,msport) + (*msconn-user* ,msuser) + (*msconn-pass* ,mspass) + (*ms-dbname* ,msdb)))) + + +;;; +;;; Allow clauses to appear in any order +;;; +(defrule load-mssql-optional-clauses (* (or mysql-options + gucs + casts + before-load + after-load)) + (:lambda (clauses-list) + (alexandria:alist-plist clauses-list))) + +(defrule mssql-prefix "mssql://" (:constant (list :type :mssql))) + +(defrule mssql-uri (and mssql-prefix + (? dsn-user-password) + (? dsn-hostname) + dsn-dbname) + (:lambda (uri) + (destructuring-bind (&key type + user + password + host + port + dbname) + (apply #'append uri) + ;; Default to environment variables as described in + ;; http://www.freetds.org/userguide/envvar.htm + (list :type type + :user (or user (getenv-default "USER")) + :password password + :host (or host (getenv-default "TDSHOST" "localhost")) + :port (or port (parse-integer + (getenv-default "TDSPORT" "1433"))) + :dbname dbname)))) + +(defrule get-mssql-uri-from-environment-variable (and kw-getenv name) + (:lambda (p-e-v) + (bind (((_ varname) p-e-v)) + (let ((connstring (getenv-default varname))) + (unless connstring + (error "Environment variable ~s is unset." varname)) + (parse 'mssql-uri connstring))))) + +(defrule mssql-source (and kw-load kw-database kw-from + (or mssql-uri + get-mssql-uri-from-environment-variable)) + (:lambda (source) (bind (((_ _ _ uri) source)) uri))) + +(defrule load-mssql-command (and mssql-source target + load-mssql-optional-clauses) + (:lambda (command) + (destructuring-bind (source target clauses) command + `(,source ,target ,@clauses)))) + + +;;; LOAD DATABASE FROM mssql:// +(defrule load-mssql-database load-mssql-command + (:lambda (source) + (bind (((ms-db-uri pg-db-uri + &key + gucs casts before after + ((:mssql-options options))) source) + + ((&key ((:dbname msdb)) table-name + &allow-other-keys) ms-db-uri) + + ((&key ((:dbname pgdb)) &allow-other-keys) pg-db-uri)) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-idx (pgloader.utils:make-pgstate)) + (state-after (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*mssql-default-cast-rules*) + (*cast-rules* ',casts) + ,@(mssql-connection-bindings ms-db-uri) + ,@(pgsql-connection-bindings pg-db-uri gucs) + ,@(batch-control-bindings options) + (source + (make-instance 'pgloader.mssql::copy-mssql + :target-db ,pgdb + :source-db ,msdb))) + + ,(sql-code-block pgdb 'state-before before "before load") + + (pgloader.mssql:copy-database source + ,@(when table-name + `(:only-tables ',(list table-name))) + :state-before state-before + :state-after state-after + :state-indexes state-idx + ,@(remove-batch-control-option options)) + + ,(sql-code-block pgdb 'state-after after "after load") + + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after + :parallel state-idx)))))) + diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp index 248a30e..5ddbf4b 100644 --- a/src/parsers/command-mysql.lisp +++ b/src/parsers/command-mysql.lisp @@ -112,7 +112,44 @@ (:lambda (clauses-list) (alexandria:alist-plist clauses-list))) -(defrule load-mysql-command (and database-source target +(defrule mysql-prefix "mysql://" (:constant (list :type :mysql))) + +(defrule mysql-uri (and mysql-prefix + (? dsn-user-password) + (? dsn-hostname) + dsn-dbname) + (:lambda (uri) + (destructuring-bind (&key type + user + password + host + port + dbname) + (apply #'append uri) + ;; Default to environment variables as described in + ;; http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html + (list :type type + :user (or user (getenv-default "USER")) + :password (or password (getenv-default "MYSQL_PWD")) + :host (or host (getenv-default "MYSQL_HOST" "localhost")) + :port (or port (parse-integer + (getenv-default "MYSQL_TCP_PORT" "3306"))) + :dbname dbname)))) + +(defrule get-mysql-uri-from-environment-variable (and kw-getenv name) + (:lambda (p-e-v) + (bind (((_ varname) p-e-v)) + (let ((connstring (getenv-default varname))) + (unless connstring + (error "Environment variable ~s is unset." varname)) + (parse 'mysql-uri connstring))))) + +(defrule mysql-source (and kw-load kw-database kw-from + (or mysql-uri + get-mysql-uri-from-environment-variable)) + (:lambda (source) (bind (((_ _ _ uri) source)) uri))) + +(defrule load-mysql-command (and mysql-source target load-mysql-optional-clauses) (:lambda (command) (destructuring-bind (source target clauses) command diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp index 9c7e3ef..496b5ea 100644 --- a/src/parsers/command-parser.lisp +++ b/src/parsers/command-parser.lisp @@ -17,6 +17,7 @@ load-dbf-file load-ixf-file load-mysql-database + load-mssql-database load-sqlite-database ;; load-syslog-messages ) diff --git a/src/parsers/command-source.lisp b/src/parsers/command-source.lisp index 37fbf2c..45a883a 100644 --- a/src/parsers/command-source.lisp +++ b/src/parsers/command-source.lisp @@ -62,9 +62,3 @@ (:destructure (load-from ws source) (declare (ignore load-from ws)) source)) - -(defrule database-source (and kw-load kw-database kw-from - (or db-connection-uri - get-dburi-from-environment-variable)) - (:lambda (source) - (bind (((_ _ _ uri) source)) uri))) diff --git a/src/sources/mssql.lisp b/src/sources/mssql.lisp index e408b0c..184b920 100644 --- a/src/sources/mssql.lisp +++ b/src/sources/mssql.lisp @@ -109,31 +109,32 @@ (defmethod copy-database ((mssql copy-mssql) &key state-before - data-only - schema-only + state-after + state-indexes (truncate nil) + (data-only nil) + (schema-only nil) (create-tables t) (include-drop t) (create-indexes t) (reset-sequences t) - only-tables - including - excluding + (foreign-keys t) (identifier-case :downcase) - (encoding :utf-8)) + (encoding :utf-8) + only-tables) "Stream the given MS SQL database down to PostgreSQL." - (declare (ignore create-indexes reset-sequences)) + (declare (ignore create-indexes reset-sequences foreign-keys)) (let* ((summary (null *state*)) (*state* (or *state* (make-pgstate))) + (idx-state (or state-indexes (make-pgstate))) (state-before (or state-before (make-pgstate))) - (idx-state (make-pgstate)) - (seq-state (make-pgstate)) + (state-after (or state-after (make-pgstate))) (cffi:*default-foreign-encoding* encoding) (copy-kernel (make-kernel 2)) - (all-columns (filter-column-list (list-all-columns) - :only-tables only-tables - :including including - :excluding excluding)) + (all-columns (filter-column-list + (with-mssql-connection () + (list-all-columns)) + :only-tables only-tables)) ;; (all-indexes (filter-column-list (list-all-indexes) ;; :only-tables only-tables ;; :including including @@ -194,6 +195,7 @@ identifier-case) :fields columns))) (log-message :debug "TARGET: ~a" (target table-source)) + (log-message :log "target: ~s" table-source) ;; COPY the data to PostgreSQL, using copy-kernel (unless schema-only (copy-from table-source :kernel copy-kernel))))) @@ -202,7 +204,8 @@ (let ((lp:*kernel* copy-kernel)) (lp:end-kernel)) ;; and report the total time spent on the operation - (report-full-summary "Total streaming time" *state* - :before state-before - :finally seq-state - :parallel idx-state))) + (when summary + (report-full-summary "Total streaming time" *state* + :before state-before + :finally state-after + :parallel idx-state)))) diff --git a/src/sources/mssql/mssql-cast-rules.lisp b/src/sources/mssql/mssql-cast-rules.lisp index 7beb000..6dda02e 100644 --- a/src/sources/mssql/mssql-cast-rules.lisp +++ b/src/sources/mssql/mssql-cast-rules.lisp @@ -36,7 +36,7 @@ (:source (:type "numeric") :target (:type "numeric") :using pgloader.transforms::float-to-string) - (:source (:type "money") :target (:type "numeric") + (:source (:type "money") :target (:type "numeric") :using pgloader.transforms::float-to-string) (:source (:type "smallmoney") :target (:type "numeric") diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp index 3f78d7c..cb4e52b 100644 --- a/src/utils/transforms.lisp +++ b/src/utils/transforms.lisp @@ -151,8 +151,10 @@ accepted by PostgreSQL, that is 100.0 rather than 100.0d0." (declare (type (or null float) float)) (when float - (let ((*read-default-float-format* 'double-float)) - (princ-to-string float)))) + (typecase float + (double-float (let ((*read-default-float-format* 'double-float)) + (princ-to-string float))) + (t (princ-to-string float))))) (defun set-to-enum-array (set-string) "Transform a MySQL SET value into a PostgreSQL ENUM Array" diff --git a/test/parse/AdventureWorks2008R2.load b/test/parse/AdventureWorks2008R2.load new file mode 100644 index 0000000..c509148 --- /dev/null +++ b/test/parse/AdventureWorks2008R2.load @@ -0,0 +1,16 @@ +load database + from mssql://dim:pass@localhost/AdventureWorks2008R2 + into postgresql:///advworks + + -- WITH include drop, create tables, no truncate, + -- create indexes, reset sequences, foreign keys + + SET maintenance_work_mem to '128MB', work_mem to '12MB' + + BEFORE LOAD DO + $$ drop schema if exists humanresources cascade; $$, + $$ drop schema if exists person cascade; $$, + $$ drop schema if exists production cascade; $$, + $$ drop schema if exists public cascade; $$, + $$ drop schema if exists purchasing cascade; $$, + $$ drop schema if exists sales cascade; $$;