Add a new database source type in the parser.

Now it's possible to parse a command to load data from MS SQL. The
parser was until now parsing all database URI within the same common
rule and that isn't possible anymore if we want to distinguish in
between source database right from the parser, which we actually want to
do.

This patch also implement in-passing fixes all over the place, including
the transformation function float-to-string that only happened to work
on double-float data.
This commit is contained in:
Dimitri Fontaine 2014-11-17 00:23:06 +01:00
parent fff756f95f
commit 87e157bee2
12 changed files with 248 additions and 98 deletions

View File

@ -8,8 +8,11 @@ CL = sbcl
# default to 4096 MB of RAM size in the image
DYNSIZE = 4096
LISP_SRC = $(wildcard src/*lisp) \
$(wildcard src/pgsql/*lisp) \
LISP_SRC = $(wildcard src/*lisp) \
$(wildcard src/monkey/*lisp) \
$(wildcard src/utils/*lisp) \
$(wildcard src/parsers/*lisp) \
$(wildcard src/pgsql/*lisp) \
$(wildcard src/sources/*lisp) \
pgloader.asd

View File

@ -92,6 +92,7 @@
(:file "command-dbf")
(:file "command-cast-rules")
(:file "command-mysql")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-archive")
(:file "command-parser")
@ -104,7 +105,8 @@
;; Source format specific implementations
(:module sources
:depends-on ("params"
:depends-on ("monkey" ; mssql driver patches
"params"
"package"
"sources-api"
"pgsql"

View File

@ -352,6 +352,7 @@
#:*default-cast-rules*
#:*cast-rules*)
(:import-from #:pgloader.mysql #:*mysql-default-cast-rules*)
(:import-from #:pgloader.mssql #:*mssql-default-cast-rules*)
(:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*)
(:export #:parse-commands
#:run-commands

View File

@ -65,8 +65,16 @@
(:identity t))
(defrule dsn-hostname (and (? hostname) (? dsn-port))
(:destructure (hostname &optional port)
(append (list :host hostname) port)))
(:lambda (host-port)
(destructuring-bind (host &optional port) host-port
(append (list :host
(when host
(destructuring-bind (type &optional name) host
(ecase type
(:unix (if name (cons :unix name) :unix))
(:ipv4 name)
(:host name)))))
port))))
(defrule dsn-dbname (and "/" (? namestring))
(:destructure (slash dbname)
@ -83,90 +91,52 @@
(declare (ignore qm))
(list :table-name name)))
(defrule dsn-prefix (and (or "postgresql" "pgsql" "mysql" "syslog") "://")
(:lambda (db)
(bind (((prefix _) db))
(cond ((string= "postgresql" prefix) (list :type :postgresql))
((string= "pgsql" prefix) (list :type :postgresql))
((string= "mysql" prefix) (list :type :mysql))
((string= "syslog" prefix) (list :type :syslog))))))
(defrule pgsql-prefix (and (or "postgresql" "pgsql") "://")
(:constant (list :type :postgresql)))
(defrule db-connection-uri (and dsn-prefix
(? dsn-user-password)
(? dsn-hostname)
dsn-dbname
(? dsn-table-name))
(defrule pgsql-uri (and pgsql-prefix
(? dsn-user-password)
(? dsn-hostname)
dsn-dbname
(? dsn-table-name))
(:lambda (uri)
(destructuring-bind (&key type
user
user
password
host
port
dbname
table-name)
(apply #'append uri)
;;
table-name)
(apply #'append uri)
;; Default to environment variables as described in
;; http://www.postgresql.org/docs/9.3/static/app-psql.html
;; http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html
;;
(let ((user
(or user
(case type
(:postgresql
(getenv-default "PGUSER"
#+unix (getenv-default "USER")
#-unix (getenv-default "UserName")))
(:mysql
(getenv-default "USER")))))
(password (or password
(case type
(:postgresql (getenv-default "PGPASSWORD"))
(:mysql (getenv-default "MYSQL_PWD"))))))
(list :type type
:user user
:password password
:host (or (when host
(destructuring-bind (type &optional name) host
(ecase type
(:unix (if name (cons :unix name) :unix))
(:ipv4 name)
(:host name))))
(case type
(:postgresql (getenv-default "PGHOST"
#+unix :unix
#-unix "localhost"))
(:mysql (getenv-default "MYSQL_HOST" "localhost"))))
:port (or port
(parse-integer
;; avoid a NIL is not a STRING style warning by
;; using ecase here
(ecase type
(:postgresql (getenv-default "PGPORT" "5432"))
(:mysql (getenv-default "MYSQL_TCP_PORT" "3306")))))
:dbname (or dbname
(case type
(:postgresql (getenv-default "PGDATABASE" user))))
:table-name table-name)))))
(list :type type
:user (or user
(getenv-default "PGUSER"
#+unix (getenv-default "USER")
#-unix (getenv-default "UserName")))
:password (or password (getenv-default "PGPASSWORD"))
:host (or host (getenv-default "PGHOST"
#+unix :unix
#-unix "localhost"))
:port (or port (parse-integer
(getenv-default "PGPORT" "5432")))
:dbname (or dbname (getenv-default "PGDATABASE" user))
:table-name table-name))))
(defrule get-dburi-from-environment-variable (and kw-getenv name)
(defrule get-pgsql-uri-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v))
(let ((connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'db-connection-uri connstring)))))
(parse 'pgsql-uri connstring)))))
(defrule target (and kw-into (or db-connection-uri
get-dburi-from-environment-variable))
(defrule target (and kw-into (or pgsql-uri
get-pgsql-uri-from-environment-variable))
(:destructure (into target)
(declare (ignore into))
(destructuring-bind (&key type &allow-other-keys) target
(unless (eq type :postgresql)
(error "The target must be a PostgreSQL connection string."))
target)))
target))
(defun pgsql-connection-bindings (pg-db-uri gucs)

View File

@ -0,0 +1,121 @@
;;;
;;; Parse the pgloader commands grammar
;;;
(in-package :pgloader.parser)
(defun mssql-connection-bindings (ms-db-uri)
"Generate the code needed to set MSSQL connection bindings."
(destructuring-bind (&key ((:host mshost))
((:port msport))
((:user msuser))
((:password mspass))
((:dbname msdb))
&allow-other-keys)
ms-db-uri
`((*msconn-host* ',mshost)
(*msconn-port* ,msport)
(*msconn-user* ,msuser)
(*msconn-pass* ,mspass)
(*ms-dbname* ,msdb))))
;;;
;;; Allow clauses to appear in any order
;;;
(defrule load-mssql-optional-clauses (* (or mysql-options
gucs
casts
before-load
after-load))
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))
(defrule mssql-prefix "mssql://" (:constant (list :type :mssql)))
(defrule mssql-uri (and mssql-prefix
(? dsn-user-password)
(? dsn-hostname)
dsn-dbname)
(:lambda (uri)
(destructuring-bind (&key type
user
password
host
port
dbname)
(apply #'append uri)
;; Default to environment variables as described in
;; http://www.freetds.org/userguide/envvar.htm
(list :type type
:user (or user (getenv-default "USER"))
:password password
:host (or host (getenv-default "TDSHOST" "localhost"))
:port (or port (parse-integer
(getenv-default "TDSPORT" "1433")))
:dbname dbname))))
(defrule get-mssql-uri-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v))
(let ((connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'mssql-uri connstring)))))
(defrule mssql-source (and kw-load kw-database kw-from
(or mssql-uri
get-mssql-uri-from-environment-variable))
(:lambda (source) (bind (((_ _ _ uri) source)) uri)))
(defrule load-mssql-command (and mssql-source target
load-mssql-optional-clauses)
(:lambda (command)
(destructuring-bind (source target clauses) command
`(,source ,target ,@clauses))))
;;; LOAD DATABASE FROM mssql://
(defrule load-mssql-database load-mssql-command
(:lambda (source)
(bind (((ms-db-uri pg-db-uri
&key
gucs casts before after
((:mssql-options options))) source)
((&key ((:dbname msdb)) table-name
&allow-other-keys) ms-db-uri)
((&key ((:dbname pgdb)) &allow-other-keys) pg-db-uri))
`(lambda ()
(let* ((state-before (pgloader.utils:make-pgstate))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-idx (pgloader.utils:make-pgstate))
(state-after (pgloader.utils:make-pgstate))
(*default-cast-rules* ',*mssql-default-cast-rules*)
(*cast-rules* ',casts)
,@(mssql-connection-bindings ms-db-uri)
,@(pgsql-connection-bindings pg-db-uri gucs)
,@(batch-control-bindings options)
(source
(make-instance 'pgloader.mssql::copy-mssql
:target-db ,pgdb
:source-db ,msdb)))
,(sql-code-block pgdb 'state-before before "before load")
(pgloader.mssql:copy-database source
,@(when table-name
`(:only-tables ',(list table-name)))
:state-before state-before
:state-after state-after
:state-indexes state-idx
,@(remove-batch-control-option options))
,(sql-code-block pgdb 'state-after after "after load")
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after
:parallel state-idx))))))

View File

@ -112,7 +112,44 @@
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))
(defrule load-mysql-command (and database-source target
(defrule mysql-prefix "mysql://" (:constant (list :type :mysql)))
(defrule mysql-uri (and mysql-prefix
(? dsn-user-password)
(? dsn-hostname)
dsn-dbname)
(:lambda (uri)
(destructuring-bind (&key type
user
password
host
port
dbname)
(apply #'append uri)
;; Default to environment variables as described in
;; http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html
(list :type type
:user (or user (getenv-default "USER"))
:password (or password (getenv-default "MYSQL_PWD"))
:host (or host (getenv-default "MYSQL_HOST" "localhost"))
:port (or port (parse-integer
(getenv-default "MYSQL_TCP_PORT" "3306")))
:dbname dbname))))
(defrule get-mysql-uri-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v))
(let ((connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'mysql-uri connstring)))))
(defrule mysql-source (and kw-load kw-database kw-from
(or mysql-uri
get-mysql-uri-from-environment-variable))
(:lambda (source) (bind (((_ _ _ uri) source)) uri)))
(defrule load-mysql-command (and mysql-source target
load-mysql-optional-clauses)
(:lambda (command)
(destructuring-bind (source target clauses) command

View File

@ -17,6 +17,7 @@
load-dbf-file
load-ixf-file
load-mysql-database
load-mssql-database
load-sqlite-database
;; load-syslog-messages
)

View File

@ -62,9 +62,3 @@
(:destructure (load-from ws source)
(declare (ignore load-from ws))
source))
(defrule database-source (and kw-load kw-database kw-from
(or db-connection-uri
get-dburi-from-environment-variable))
(:lambda (source)
(bind (((_ _ _ uri) source)) uri)))

View File

@ -109,31 +109,32 @@
(defmethod copy-database ((mssql copy-mssql)
&key
state-before
data-only
schema-only
state-after
state-indexes
(truncate nil)
(data-only nil)
(schema-only nil)
(create-tables t)
(include-drop t)
(create-indexes t)
(reset-sequences t)
only-tables
including
excluding
(foreign-keys t)
(identifier-case :downcase)
(encoding :utf-8))
(encoding :utf-8)
only-tables)
"Stream the given MS SQL database down to PostgreSQL."
(declare (ignore create-indexes reset-sequences))
(declare (ignore create-indexes reset-sequences foreign-keys))
(let* ((summary (null *state*))
(*state* (or *state* (make-pgstate)))
(idx-state (or state-indexes (make-pgstate)))
(state-before (or state-before (make-pgstate)))
(idx-state (make-pgstate))
(seq-state (make-pgstate))
(state-after (or state-after (make-pgstate)))
(cffi:*default-foreign-encoding* encoding)
(copy-kernel (make-kernel 2))
(all-columns (filter-column-list (list-all-columns)
:only-tables only-tables
:including including
:excluding excluding))
(all-columns (filter-column-list
(with-mssql-connection ()
(list-all-columns))
:only-tables only-tables))
;; (all-indexes (filter-column-list (list-all-indexes)
;; :only-tables only-tables
;; :including including
@ -194,6 +195,7 @@
identifier-case)
:fields columns)))
(log-message :debug "TARGET: ~a" (target table-source))
(log-message :log "target: ~s" table-source)
;; COPY the data to PostgreSQL, using copy-kernel
(unless schema-only
(copy-from table-source :kernel copy-kernel)))))
@ -202,7 +204,8 @@
(let ((lp:*kernel* copy-kernel)) (lp:end-kernel))
;; and report the total time spent on the operation
(report-full-summary "Total streaming time" *state*
:before state-before
:finally seq-state
:parallel idx-state)))
(when summary
(report-full-summary "Total streaming time" *state*
:before state-before
:finally state-after
:parallel idx-state))))

View File

@ -36,7 +36,7 @@
(:source (:type "numeric") :target (:type "numeric")
:using pgloader.transforms::float-to-string)
(:source (:type "money") :target (:type "numeric")
(:source (:type "money") :target (:type "numeric")
:using pgloader.transforms::float-to-string)
(:source (:type "smallmoney") :target (:type "numeric")

View File

@ -151,8 +151,10 @@
accepted by PostgreSQL, that is 100.0 rather than 100.0d0."
(declare (type (or null float) float))
(when float
(let ((*read-default-float-format* 'double-float))
(princ-to-string float))))
(typecase float
(double-float (let ((*read-default-float-format* 'double-float))
(princ-to-string float)))
(t (princ-to-string float)))))
(defun set-to-enum-array (set-string)
"Transform a MySQL SET value into a PostgreSQL ENUM Array"

View File

@ -0,0 +1,16 @@
load database
from mssql://dim:pass@localhost/AdventureWorks2008R2
into postgresql:///advworks
-- WITH include drop, create tables, no truncate,
-- create indexes, reset sequences, foreign keys
SET maintenance_work_mem to '128MB', work_mem to '12MB'
BEFORE LOAD DO
$$ drop schema if exists humanresources cascade; $$,
$$ drop schema if exists person cascade; $$,
$$ drop schema if exists production cascade; $$,
$$ drop schema if exists public cascade; $$,
$$ drop schema if exists purchasing cascade; $$,
$$ drop schema if exists sales cascade; $$;