diff --git a/pgloader.asd b/pgloader.asd index e40367a..306dca2 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -62,9 +62,12 @@ (:file "transforms") (:file "read-sql-files"))) + ;; generic connection api + (:file "connection" :depends-on ("utils")) + ;; package pgloader.pgsql (:module pgsql - :depends-on ("package" "params" "utils") + :depends-on ("package" "params" "utils" "connection") :components ((:file "copy-format") (:file "queries") @@ -75,7 +78,8 @@ "schema")))) (:module "parsers" - :depends-on ("params" "package" "utils" "pgsql" "monkey") + :depends-on ("params" "package" "utils" + "pgsql" "monkey" "connection") :serial t :components ((:file "parse-ini") @@ -101,52 +105,69 @@ ;; generic API for Sources (:file "sources-api" :pathname "sources" - :depends-on ("params" "package" "utils" "parsers")) + :depends-on ("params" "package" "utils" + "parsers" "connection")) ;; Source format specific implementations (:module sources :depends-on ("monkey" ; mssql driver patches "params" "package" + "connection" "sources-api" "pgsql" "utils" "queue") :components - ((:file "csv") - (:file "fixed") - (:file "db3") - (:file "ixf") - (:file "syslog") + ((:module "csv" + :components + ((:file "csv-guess") + (:file "csv-database") + (:file "csv"))) - (:module "sqlite-utils" - :pathname "sqlite" + (:file "fixed" + :depends-on ("csv")) + + (:module "db3" + :components + ((:file "db3-schema") + (:file "db3" :depends-on ("db3-schema")))) + + (:module "ixf" + :components + ((:file "ixf-schema") + (:file "ixf" :depends-on ("ixf-schema")))) + + (:file "syslog") ; experimental... + + (:module "sqlite" :components ((:file "sqlite-cast-rules") (:file "sqlite-schema" - :depends-on ("sqlite-cast-rules")))) + :depends-on ("sqlite-cast-rules")) + (:file "sqlite" + :depends-on ("sqlite-cast-rules" + "sqlite-schema")))) - (:file "sqlite" :depends-on ("sqlite-utils")) - - (:module "mssql-utils" - :pathname "mssql" + (:module "mssql" :components ((:file "mssql-cast-rules") (:file "mssql-schema" - :depends-on ("mssql-cast-rules")))) + :depends-on ("mssql-cast-rules")) + (:file "mssql" + :depends-on ("mssql-cast-rules" + "mssql-schema")))) - (:file "mssql" :depends-on ("mssql-utils")) - - (:module "mysql-utils" - :pathname "mysql" + (:module "mysql" :components ((:file "mysql-cast-rules") (:file "mysql-schema" :depends-on ("mysql-cast-rules")) (:file "mysql-csv" - :depends-on ("mysql-schema")))) - - (:file "mysql" :depends-on ("mysql-utils")))) + :depends-on ("mysql-schema")) + (:file "mysql" + :depends-on ("mysql-cast-rules" + "mysql-schema")))))) ;; the main entry file, used when building a stand-alone ;; executable image diff --git a/src/connection.lisp b/src/connection.lisp new file mode 100644 index 0000000..612016f --- /dev/null +++ b/src/connection.lisp @@ -0,0 +1,122 @@ +;; +;; Abstrat classes to define the API to connect to a data source +;; +(in-package :pgloader.connection) + +(defclass connection () + ((type :initarg :type :accessor conn-type) + (handle :initarg :conn :accessor conn-handle :initform nil)) + (:documentation "pgloader connection parameters, base class")) + +(define-condition connection-error (error) + ((type :initarg :type :reader connection-error-type) + (mesg :initarg :mesg :reader connection-error-mesg))) + +(defgeneric open-connection (connection &key) + (:documentation "Open a connection to the data source.")) + +(defgeneric close-connection (connection) + (:documentation "Close a connection to the data source.")) + +(defclass fd-connection (connection) + ((uri :initarg :uri :accessor fd-uri) + (arch :initarg :arch :accessor fd-arch) + (path :initarg :path :accessor fd-path)) + (:documentation "pgloader connection parameters for a file based data source.")) + +(define-condition fd-connection-error (connection-error) + ((path :initarg :path :reader connection-error-path)) + (:report (lambda (err stream) + (format stream "Failed to open ~a file ~s: ~a" + (connection-error-type err) + (connection-error-path err) + (connection-error-mesg err))))) + +(defmethod print-object ((fd fd-connection) stream) + (print-unreadable-object (fd stream :type t :identity t) + (let ((url (cond ((and (slot-boundp fd 'path) (slot-value fd 'path)) + (slot-value fd 'path)) + ((and (slot-boundp fd 'arch) (slot-value fd 'arch)) + (slot-value fd 'arch)) + ((and (slot-boundp fd 'uri) (slot-value fd 'uri)) + (slot-value fd 'uri))))) + (with-slots (type) fd + (format stream "~a://~a" type url))))) + +(defgeneric fetch-file (fd-connection) + (:documentation "Suport for HTTP URI for files.")) + +(defgeneric expand (fd-connection) + (:documentation "Suport for file archives.")) + +(defmethod expand ((fd fd-connection)) + "Expand the archive for the FD connection." + (when (and (slot-boundp fd 'arch) (slot-value fd 'arch)) + (let ((archive-directory (expand-archive (fd-arch fd)))) + ;; if there's a single file in the archive, it must the the path + (let ((files (uiop:directory-files archive-directory))) + (if (= 1 (length files)) + (setf (fd-path fd) (first files)) + (setf (fd-path fd) archive-directory))))) + fd) + +(defmethod fetch-file ((fd fd-connection)) + "When the fd-connection has an URI slot, download its file." + (when (and (slot-boundp fd 'uri) (slot-value fd 'uri)) + (let ((local-filename (http-fetch-file (fd-uri fd)))) + (if (archivep local-filename) + (setf (fd-arch fd) local-filename) + (setf (fd-path fd) local-filename)))) + fd) + +(defclass db-connection (connection) + ((name :initarg :name :accessor db-name) + (host :initarg :host :accessor db-host) + (port :initarg :port :accessor db-port) + (user :initarg :user :accessor db-user) + (pass :initarg :pass :accessor db-pass)) + (:documentation "pgloader connection parameters for a database service.")) + +(defmethod print-object ((c db-connection) stream) + (print-unreadable-object (c stream :type t :identity t) + (with-slots (type name host port user) c + (format stream "~a://~a@~a:~a/~a" type user host port name)))) + +(define-condition db-connection-error (connection-error) + ((host :initarg :host :reader connection-error-host) + (port :initarg :port :reader connection-error-port) + (user :initarg :user :reader connection-error-user)) + (:report (lambda (err stream) + (format stream "Failed to connect to ~a at ~s ~@[(port ~d)~]~@[ as user ~s~]: ~a" + (connection-error-type err) + (connection-error-host err) + (connection-error-port err) + (connection-error-user err) + (connection-error-mesg err))))) + +(defmacro with-connection ((var connection) &body forms) + "Connect to DB-CONNECTION and handle any condition when doing so, and when + connected execute FORMS in a protected way so that we always disconnect + at the end." + (let ((conn (gensym "conn"))) + `(let* ((,conn ,connection) + (,var (handler-case + (open-connection ,conn) + (condition (e) + (cond ((typep ,connection 'fd-connection) + (error 'fd-connection-error + :mesg (format nil "~a" e) + :type (conn-type ,conn) + :path (fd-path ,conn))) + + ((typep ,connection 'db-connection) + (error 'db-connection-error + :mesg (format nil "~a" e) + :type (conn-type ,conn) + :host (db-host ,conn) + :port (db-port ,conn) + :user (db-user ,conn)))))))) + (unwind-protect + (progn ,@forms) + (close-connection ,var))))) + diff --git a/src/main.lisp b/src/main.lisp index ab8d764..df4f349 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -281,53 +281,50 @@ ;; line, try and process them as source and target ;; arguments (if (= 2 (length arguments)) - (let ((type (parse-cli-type type)) - (source (first arguments)) - (target (parse-target-string (second arguments)))) + (let* ((type (parse-cli-type type)) + (source (first arguments)) + (source (if type + (parse-source-string-for-type type source) + (parse-source-string source))) + (target (parse-target-string (second arguments)))) - (multiple-value-bind (type source) - (if type - (parse-source-string-for-type type source) - (parse-source-string source)) + ;; some verbosity about the parsing "magic" + (log-message :info "SOURCE: ~s" source) + (log-message :info "TARGET: ~s" target) - ;; some verbosity about the parsing "magic" - (log-message :info "SOURCE: ~s" source) - (log-message :info "TARGET: ~s" target) + (cond ((and (null source) (null target) + (probe-file + (uiop:parse-unix-namestring + (first arguments))) + (probe-file + (uiop:parse-unix-namestring + (second arguments)))) + (mapcar #'process-command-file arguments)) - (cond ((and (null source) (null target) - (probe-file - (uiop:parse-unix-namestring - (first arguments))) - (probe-file - (uiop:parse-unix-namestring - (second arguments)))) - (mapcar #'process-command-file arguments)) + ((null source) + (log-message :fatal + "Failed to parse ~s as a source URI." + (first arguments)) + (log-message :log "You might need to use --type.")) - ((null source) - (log-message :fatal - "Failed to parse ~s as a source URI." - (first arguments)) - (log-message :log "You might need to use --type.")) + ((null target) + (log-message :fatal + "Failed to parse ~s as a PostgreSQL database URI." + (second arguments)))) - ((null target) - (log-message :fatal - "Failed to parse ~s as a PostgreSQL database URI." - (second arguments)))) - - ;; so, we actually have all the specs for the - ;; job on the command line now. - (when (and source target) - (let ((type (or type (getf source :type)))) - (load-data :from source - :into target - :encoding (parse-cli-encoding encoding) - :options (parse-cli-options type with) - :gucs (parse-cli-gucs set) - :type type - :fields (parse-cli-fields type field) - :casts (parse-cli-casts cast) - :before (parse-sql-file before) - :after (parse-sql-file after)))))) + ;; so, we actually have all the specs for the + ;; job on the command line now. + (when (and source target) + (load-data :from source + :into target + :encoding (parse-cli-encoding encoding) + :options (parse-cli-options type with) + :gucs (parse-cli-gucs set) + :fields (parse-cli-fields type field) + :casts (parse-cli-casts cast) + :before (parse-sql-file before) + :after (parse-sql-file after) + :start-logger nil))) ;; process the files (mapcar #'process-command-file arguments))) @@ -352,71 +349,78 @@ ;;; Main API to use from outside of pgloader. ;;; (defun load-data (&key ((:from source)) ((:into target)) - (type (getf source :type)) encoding fields options gucs casts before after - start-logger) + (start-logger t)) "Load data from SOURCE into TARGET." + (declare (type connection source) + (type pgsql-connection target)) (with-monitor (:start-logger start-logger) ;; some preliminary checks - (when (and (eq source :stdin) (null type)) - (log-message :fatal "When loading from stdin, option --type is mandatory.") + (when (and (typep source 'csv-connection) (null fields)) + (log-message :fatal "This source type requires --fields arguments.") (return-from load-data)) - (when (and (member type '(:csv :fixed)) (null fields)) - (log-message :fatal "Source type ~a requires --fields arguments." type) - (return-from load-data)) - - (when (and casts (not (member type '(:sqlite :mysql :mssql)))) - (log-message :log "Cast rules are not considered for ~s sources." type)) + (when (and casts (not (member (type-of source) + '(sqlite-connection + mysql-connection + mssql-connection)))) + (log-message :log "Cast rules are ignored for this sources.")) ;; now generates the code for the command - (log-message :debug "LOAD DATA ~a FROM ~s" type source) + (log-message :debug "LOAD DATA FROM ~s" source) (run-commands (process-relative-pathnames (uiop:getcwd) - (case type - (:csv (lisp-code-for-loading-from-csv source fields target - :encoding encoding - :gucs gucs - :csv-options options - :before before - :after after)) + (typecase source + (csv-connection + (lisp-code-for-loading-from-csv source fields target + :encoding encoding + :gucs gucs + :csv-options options + :before before + :after after)) - (:fixed (lisp-code-for-loading-from-fixed source fields target - :encoding encoding - :gucs gucs - :fixed-options options - :before before - :after after)) + (fixed-connection + (lisp-code-for-loading-from-fixed source fields target + :encoding encoding + :gucs gucs + :fixed-options options + :before before + :after after)) - (:dbf (lisp-code-for-loading-from-dbf source target - :gucs gucs - :dbf-options options - :before before - :after after)) + (dbf-connection + (lisp-code-for-loading-from-dbf source target + :gucs gucs + :dbf-options options + :before before + :after after)) - (:ixf (lisp-code-for-loading-from-ixf source target - :gucs gucs - :ixf-options options - :before before - :after after)) + (ixf-connection + (lisp-code-for-loading-from-ixf source target + :gucs gucs + :ixf-options options + :before before + :after after)) - (:sqlite (lisp-code-for-loading-from-sqlite source target - :gucs gucs - :casts casts - :sqlite-options options)) + (sqlite-connection + (lisp-code-for-loading-from-sqlite source target + :gucs gucs + :casts casts + :sqlite-options options)) - (:mysql (lisp-code-for-loading-from-mysql source target - :gucs gucs - :casts casts - :mysql-options options - :before before - :after after)) + (mysql-connection + (lisp-code-for-loading-from-mysql source target + :gucs gucs + :casts casts + :mysql-options options + :before before + :after after)) - (:mssql (lisp-code-for-loading-from-mssql source target - :gucs gucs - :casts casts - :mssql-options options - :before before - :after after)))) + (mssql-connection + (lisp-code-for-loading-from-mssql source target + :gucs gucs + :casts casts + :mssql-options options + :before before + :after after)))) :start-logger start-logger))) diff --git a/src/package.lisp b/src/package.lisp index 8d2be26..120d8e6 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -66,6 +66,20 @@ #:show-encodings #:make-external-format)) + +;; +;; Not really a source, more a util package to deal with http and zip +;; +(defpackage #:pgloader.archive + (:use #:cl #:pgloader.params) + (:import-from #:pgloader.monitor + #:log-message) + (:export #:*supporter-archive-types* + #:archivep + #:http-fetch-file + #:expand-archive + #:get-matching-filenames)) + ;;; ;;; PostgreSQL COPY support, and generic sources API. @@ -75,8 +89,35 @@ (:export #:parse-date-string #:parse-date-format)) +(defpackage #:pgloader.connection + (:use #:cl #:pgloader.archive) + (:export #:connection + #:open-connection + #:close-connection + #:fd-connection + #:db-connection + #:connection-error + #:fd-connection-error + #:db-connection-error + #:with-connection + + ;; file based connections API for HTTP and Archives support + #:fetch-file + #:expand + + ;; connection classes accessors + #:conn-type + #:conn-handle + #:db-conn + #:fd-path + #:db-name + #:db-host + #:db-port + #:db-user + #:db-pass)) + (defpackage #:pgloader.sources - (:use #:cl #:pgloader.params #:pgloader.utils) + (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.connection) (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.parse-date #:parse-date-string @@ -95,9 +136,6 @@ #:copy-to #:copy-database - ;; conditions, error handling - #:connection-error - ;; file based utils for CSV, fixed etc #:filter-column-list #:with-open-file-or-stream @@ -112,13 +150,16 @@ #:cast)) (defpackage #:pgloader.pgsql - (:use #:cl #:pgloader.params #:pgloader.utils) - (:import-from #:pgloader.sources - #:connection-error) - (:export #:with-pgsql-transaction + (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.connection) + (:export #:pgsql-connection + #:pgconn-use-ssl + #:pgconn-table-name + #:new-pgsql-connection + #:with-pgsql-transaction #:with-pgsql-connection #:pgsql-execute #:pgsql-execute-with-timing + #:pgsql-connect-and-execute-with-timing #:truncate-tables #:copy-from-file #:copy-from-queue @@ -186,9 +227,12 @@ ;; (defpackage #:pgloader.csv (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:export #:*csv-path-root* + #:csv-connection + #:specs + #:csv-specs #:get-pathname #:copy-csv #:copy-to-queue @@ -199,15 +243,20 @@ (defpackage #:pgloader.fixed (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) - (:export #:copy-fixed + (:import-from #:pgloader.csv + #:csv-connection + #:specs + #:csv-specs) + (:export #:fixed-connection + #:copy-fixed #:copy-to-queue #:copy-from)) (defpackage #:pgloader.ixf (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:import-from #:pgloader.pgsql #:with-pgsql-transaction @@ -217,14 +266,15 @@ #:create-tables #:format-pgsql-column #:format-vector-row) - (:export #:copy-ixf + (:export #:ixf-connection + #:copy-ixf #:map-rows #:copy-to-queue #:copy-from)) (defpackage #:pgloader.db3 (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:import-from #:pgloader.pgsql #:with-pgsql-transaction @@ -234,7 +284,8 @@ #:create-tables #:format-pgsql-column #:format-vector-row) - (:export #:copy-db3 + (:export #:dbf-connection + #:copy-db3 #:map-rows #:copy-to #:copy-to-queue @@ -242,10 +293,12 @@ (defpackage #:pgloader.mysql (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql + #:new-pgsql-connection + #:with-pgsql-connection #:with-pgsql-transaction #:pgsql-execute #:pgsql-execute-with-timing @@ -267,7 +320,8 @@ #:set-table-oids #:format-vector-row #:reset-sequences) - (:export #:copy-mysql + (:export #:mysql-connection + #:copy-mysql #:*mysql-default-cast-rules* #:with-mysql-connection #:map-rows @@ -281,7 +335,7 @@ (defpackage #:pgloader.sqlite (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql @@ -298,7 +352,8 @@ #:create-indexes-in-kernel #:set-table-oids #:reset-sequences) - (:export #:copy-sqlite + (:export #:sqlite-connection + #:copy-sqlite #:*sqlite-default-cast-rules* #:map-rows #:copy-to @@ -308,13 +363,16 @@ (defpackage #:pgloader.mssql (:use #:cl - #:pgloader.params #:pgloader.utils + #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql + #:new-pgsql-connection + #:with-pgsql-connection #:with-pgsql-transaction #:pgsql-execute #:pgsql-execute-with-timing + #:pgsql-connect-and-execute-with-timing #:apply-identifier-case #:list-tables-and-fkeys #:list-table-oids @@ -333,7 +391,8 @@ #:set-table-oids #:format-vector-row #:reset-sequences) - (:export #:copy-mssql + (:export #:mssql-connection + #:copy-mssql #:*mssql-default-cast-rules* #:map-rows #:copy-to @@ -350,35 +409,40 @@ #:start-syslog-server #:send-message)) - -;; -;; Not really a source, more a util package to deal with http and zip -;; -(defpackage #:pgloader.archive - (:use #:cl #:pgloader.params) - (:import-from #:pgloader.monitor - #:log-message) - (:export #:http-fetch-file - #:expand-archive - #:get-matching-filenames)) - ;;; ;;; The Command Parser ;;; (defpackage #:pgloader.parser (:use #:cl #:esrap #:metabang.bind - #:pgloader.params #:pgloader.utils #:pgloader.sql) + #:pgloader.params #:pgloader.utils #:pgloader.sql #:pgloader.connection) (:import-from #:alexandria #:read-file-into-string) (:import-from #:pgloader.pgsql + #:pgsql-connection #:with-pgsql-transaction - #:pgsql-execute) + #:pgsql-execute + #:pgconn-use-ssl + #:pgconn-table-name) + (:import-from #:pgloader.csv + #:csv-connection + #:specs + #:csv-specs) + (:import-from #:pgloader.fixed + #:fixed-connection) (:import-from #:pgloader.sources #:*default-cast-rules* #:*cast-rules*) - (:import-from #:pgloader.mysql #:*mysql-default-cast-rules*) - (:import-from #:pgloader.mssql #:*mssql-default-cast-rules*) - (:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*) + (:import-from #:pgloader.mysql + #:mysql-connection + #:*mysql-default-cast-rules*) + (:import-from #:pgloader.mssql + #:mssql-connection + #:*mssql-default-cast-rules*) + (:import-from #:pgloader.sqlite + #:sqlite-connection + #:*sqlite-default-cast-rules*) + (:import-from #:pgloader.db3 #:dbf-connection) + (:import-from #:pgloader.ixf #:ixf-connection) (:export #:parse-commands #:run-commands @@ -395,6 +459,16 @@ #:parse-cli-casts #:parse-sql-file + ;; connection types / classes symbols for use in main + #:connection + #:csv-connection + #:fixed-connection + #:dbf-connection + #:ixf-connection + #:sqlite-connection + #:mysql-connection + #:mssql-connection + ;; functions to generate lisp code from parameters #:lisp-code-for-loading-from-mysql #:lisp-code-for-loading-from-csv @@ -411,10 +485,11 @@ (defpackage #:pgloader (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.parser) (:import-from #:pgloader.pgsql + #:pgsql-connection #:copy-from-file #:list-databases #:list-tables) - (:import-from #:pgloader.sources + (:import-from #:pgloader.connection #:connection-error) (:export #:*version-string* #:*state* diff --git a/src/params.lisp b/src/params.lisp index 4ee427b..4c83ce0 100644 --- a/src/params.lisp +++ b/src/params.lisp @@ -17,19 +17,7 @@ #:*copy-batch-rows* #:*copy-batch-size* #:*concurrent-batches* - #:*pgconn* - #:pgconn-dbname #:*pg-settings* - #:*myconn-host* - #:*myconn-port* - #:*myconn-user* - #:*myconn-pass* - #:*my-dbname* - #:*msconn-host* - #:*msconn-port* - #:*msconn-user* - #:*msconn-pass* - #:*ms-dbname* #:*state* #:*default-tmpdir* #:init-params-from-environment @@ -118,45 +106,8 @@ (defparameter *concurrent-batches* 10 "How many batches do we stack in the queue in advance.") -;;; -;;; PostgreSQL Connection Credentials and Session Settings -;;; -(defparameter *pgconn* - '(:type :postgresql - :host "localhost" - :port (parse-integer (getenv-default "PGPORT" "5432")) - :user (uiop:getenv "USER") - :pass "pgpass" - :dbname nil - :table-name nil - :use-ssl nil) - "Default PostgreSQL connection string.") - -(defun pgconn-dbname () - "Return the current dbname from *pgconn* setting." - (destructuring-bind (&key dbname &allow-other-keys) *pgconn* - dbname)) - (defparameter *pg-settings* nil "An alist of GUC names and values.") -;;; -;;; MySQL Connection Credentials -;;; -(defparameter *myconn-host* "localhost") -(defparameter *myconn-port* 3306) -(defparameter *myconn-user* (uiop:getenv "USER")) -(defparameter *myconn-pass* nil) -(defparameter *my-dbname* nil) - -;;; -;;; MSSQL Connection Credentials -;;; -(defparameter *msconn-host* "localhost") -(defparameter *msconn-port* 1433) -(defparameter *msconn-user* (uiop:getenv "USER")) -(defparameter *msconn-pass* nil) -(defparameter *ms-dbname* nil) - ;;; ;;; Archive processing: downloads and unzip. ;;; diff --git a/src/parsers/command-archive.lisp b/src/parsers/command-archive.lisp index 41f8f09..9bd63eb 100644 --- a/src/parsers/command-archive.lisp +++ b/src/parsers/command-archive.lisp @@ -38,35 +38,34 @@ (defrule load-archive load-archive-clauses (:lambda (archive) - (destructuring-bind (source pg-db-uri &key before commands finally) archive - (when (and (or before finally) (null pg-db-uri)) + (destructuring-bind (source pg-db-conn &key before commands finally) archive + (when (and (or before finally) (null pg-db-conn)) (error "When using a BEFORE LOAD DO or a FINALLY block, you must provide an archive level target database connection.")) - (destructuring-bind (&key dbname &allow-other-keys) pg-db-uri - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (*state* (pgloader.utils:make-pgstate)) - ,@(pgsql-connection-bindings pg-db-uri nil) - (state-finally ,(when finally `(pgloader.utils:make-pgstate))) - (archive-file - ,(destructuring-bind (kind url) source - (ecase kind - (:http `(with-stats-collection - ("download" :state state-before) - (pgloader.archive:http-fetch-file ,url))) - (:filename url)))) - (*csv-path-root* - (with-stats-collection ("extract" :state state-before) - (pgloader.archive:expand-archive archive-file)))) - (progn - ,(sql-code-block dbname 'state-before before "before load") + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (*state* (pgloader.utils:make-pgstate)) + ,@(pgsql-connection-bindings pg-db-conn nil) + (state-finally ,(when finally `(pgloader.utils:make-pgstate))) + (archive-file + ,(destructuring-bind (kind url) source + (ecase kind + (:http `(with-stats-collection + ("download" :state state-before) + (pgloader.archive:http-fetch-file ,url))) + (:filename url)))) + (*csv-path-root* + (with-stats-collection ("extract" :state state-before) + (pgloader.archive:expand-archive archive-file)))) + (progn + ,(sql-code-block pg-db-conn 'state-before before "before load") - ;; import from files block - ,@(loop for command in commands - collect `(funcall ,command)) + ;; import from files block + ,@(loop for command in commands + collect `(funcall ,command)) - ,(sql-code-block dbname 'state-finally finally "finally") + ,(sql-code-block pg-db-conn 'state-finally finally "finally") - ;; reporting - (report-full-summary "Total import time" *state* - :before state-before - :finally state-finally)))))))) + ;; reporting + (report-full-summary "Total import time" *state* + :before state-before + :finally state-finally))))))) diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index 5c9c72a..a034663 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -354,10 +354,26 @@ (uiop:native-namestring dir))) `(:regex ,first-or-all ,regex ,root)))) +(defrule csv-uri (and "csv://" filename) + (:lambda (source) + (bind (((_ filename) source)) + (make-instance 'csv-connection :specs filename)))) + (defrule csv-file-source (or stdin inline + http-uri + csv-uri filename-matching - maybe-quoted-filename)) + maybe-quoted-filename) + (:lambda (src) + (if (typep src 'csv-connection) src + (destructuring-bind (type &rest specs) src + (case type + (:stdin (make-instance 'csv-connection :specs src)) + (:inline (make-instance 'csv-connection :specs src)) + (:filename (make-instance 'csv-connection :specs src)) + (:regex (make-instance 'csv-connection :specs src)) + (:http (make-instance 'csv-connection :uri (first specs)))))))) (defrule get-csv-file-source-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -371,14 +387,7 @@ (or get-csv-file-source-from-environment-variable csv-file-source)) (:lambda (src) - (bind (((_ _ _ source) src) - ;; source is (:filename #P"pathname/here") - ((type &rest _) source)) - (ecase type - (:stdin source) - (:inline source) - (:filename source) - (:regex source))))) + (bind (((_ _ _ source) src)) source))) (defun list-symbols (expression &optional s) "Return a list of the symbols used in EXPRESSION." @@ -405,44 +414,43 @@ (destructuring-bind (source encoding fields target columns clauses) command `(,source ,encoding ,fields ,target ,columns ,@clauses)))) -(defun lisp-code-for-loading-from-csv (source fields pg-db-uri +(defun lisp-code-for-loading-from-csv (csv-conn fields pg-db-conn &key (encoding :utf-8) columns gucs before after ((:csv-options options))) - (bind (((&key dbname table-name &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) - (summary (null *state*)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-after ,(when after `(pgloader.utils:make-pgstate))) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options)) + `(lambda () + (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) + (summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-after ,(when after `(pgloader.utils:make-pgstate))) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options)) - (progn - ,(sql-code-block dbname 'state-before before "before load") + (progn + ,(sql-code-block pg-db-conn 'state-before before "before load") - (let ((truncate (getf ',options :truncate)) - (source - (make-instance 'pgloader.csv:copy-csv - :target-db ,dbname - :source ',source - :target ,table-name - :encoding ,encoding - :fields ',fields - :columns ',columns - ,@(remove-batch-control-option - options :extras '(:truncate))))) - (pgloader.sources:copy-from source :truncate truncate)) + (let ((truncate (getf ',options :truncate)) + (source + (make-instance 'pgloader.csv:copy-csv + :target-db ,pg-db-conn + :source ,(expand (fetch-file csv-conn)) + :target ,(pgconn-table-name pg-db-conn) + :encoding ,encoding + :fields ',fields + :columns ',columns + ,@(remove-batch-control-option + options :extras '(:truncate))))) + (pgloader.sources:copy-from source :truncate truncate)) - ,(sql-code-block dbname 'state-after after "after load") + ,(sql-code-block pg-db-conn 'state-after after "after load") - ;; reporting - (when summary - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after))))))) + ;; reporting + (when summary + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after)))))) (defrule load-csv-file load-csv-file-command (:lambda (command) diff --git a/src/parsers/command-db-uri.lisp b/src/parsers/command-db-uri.lisp index 4de0fd9..ac6a5ef 100644 --- a/src/parsers/command-db-uri.lisp +++ b/src/parsers/command-db-uri.lisp @@ -143,20 +143,22 @@ (apply #'append uri) ;; Default to environment variables as described in ;; http://www.postgresql.org/docs/9.3/static/app-psql.html - (list :type type - :user (or user - (getenv-default "PGUSER" - #+unix (getenv-default "USER") - #-unix (getenv-default "UserName"))) - :password (or password (getenv-default "PGPASSWORD")) - :host (or host (getenv-default "PGHOST" - #+unix :unix - #-unix "localhost")) - :port (or port (parse-integer - (getenv-default "PGPORT" "5432"))) - :use-ssl use-ssl - :dbname (or dbname (getenv-default "PGDATABASE" user)) - :table-name table-name)))) + (declare (ignore type)) + (make-instance 'pgsql-connection + :user (or user + (getenv-default "PGUSER" + #+unix (getenv-default "USER") + #-unix (getenv-default "UserName"))) + :pass (or password (getenv-default "PGPASSWORD")) + :host (or host (getenv-default "PGHOST" + #+unix :unix + #-unix "localhost")) + :port (or port (parse-integer + (getenv-default "PGPORT" "5432"))) + :name (or dbname (getenv-default "PGDATABASE" user)) + + :use-ssl use-ssl + :table-name table-name)))) (defrule get-pgsql-uri-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -175,9 +177,7 @@ (defun pgsql-connection-bindings (pg-db-uri gucs) "Generate the code needed to set PostgreSQL connection bindings." - (destructuring-bind (&key ((:dbname pgdb)) &allow-other-keys) pg-db-uri - `((*pgconn* ',pg-db-uri) - (*pg-settings* ',gucs) - (pgloader.pgsql::*pgsql-reserved-keywords* - (pgloader.pgsql:list-reserved-keywords ,pgdb))))) + `((*pg-settings* ',gucs) + (pgloader.pgsql::*pgsql-reserved-keywords* + (pgloader.pgsql:list-reserved-keywords ,pg-db-uri)))) diff --git a/src/parsers/command-dbf.lisp b/src/parsers/command-dbf.lisp index 3fec340..40a349a 100644 --- a/src/parsers/command-dbf.lisp +++ b/src/parsers/command-dbf.lisp @@ -7,7 +7,7 @@ (in-package #:pgloader.parser) (defrule option-create-table (and kw-create kw-table) - (:constant (cons :create-table t))) + (:constant (cons :create-tables t))) (defrule quoted-table-name (and #\' (or qualified-table-name namestring) #\') (:lambda (qtn) @@ -22,7 +22,11 @@ option-batch-size option-batch-concurrency option-truncate + option-data-only + option-schema-only + option-include-drop option-create-table + option-create-tables option-table-name)) (defrule another-dbf-option (and comma dbf-option) @@ -39,7 +43,20 @@ (bind (((_ opts) source)) (cons :dbf-options opts)))) -(defrule dbf-source (and kw-load kw-dbf kw-from filename-or-http-uri) +(defrule dbf-uri (and "dbf://" filename) + (:lambda (source) + (bind (((_ filename) source)) + (make-instance 'dbf-connection :path (second filename))))) + +(defrule dbf-file-source (or dbf-uri filename-or-http-uri) + (:lambda (conn-or-path-or-uri) + (if (typep conn-or-path-or-uri 'dbf-connection) conn-or-path-or-uri + (destructuring-bind (kind url) conn-or-path-or-uri + (case kind + (:filename (make-instance 'dbf-connection :path url)) + (:http (make-instance 'dbf-connection :uri url))))))) + +(defrule dbf-source (and kw-load kw-dbf kw-from dbf-file-source) (:lambda (src) (bind (((_ _ _ source) src)) source))) @@ -55,54 +72,40 @@ (destructuring-bind (source target clauses) command `(,source ,target ,@clauses)))) -(defun lisp-code-for-loading-from-dbf (source pg-db-uri +(defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn &key gucs before after ((:dbf-options options))) - (bind (((&key dbname table-name &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (summary (null *state*)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-after ,(when after `(pgloader.utils:make-pgstate))) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options) - ,@(identifier-case-binding options) - (source - ,(bind (((kind url) source)) - (ecase kind - (:http `(with-stats-collection - ("download" :state state-before) - (pgloader.archive:http-fetch-file ,url))) - (:filename url)))) - (source - (if (string= "zip" (pathname-type source)) - (progn - (with-stats-collection ("extract" :state state-before) - (let ((d (pgloader.archive:expand-archive source))) - (make-pathname :defaults d - :name (pathname-name source) - :type "dbf")))) - source)) - (source - (make-instance 'pgloader.db3:copy-db3 - :target-db ,dbname - :source source - :target ,table-name))) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-after ,(when after `(pgloader.utils:make-pgstate))) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options) + ,@(identifier-case-binding options) + (table-name ,(pgconn-table-name pg-db-conn)) + (source-db (with-stats-collection ("fetch" :state state-before) + (expand (fetch-file ,dbf-db-conn)))) + (source + (make-instance 'pgloader.db3:copy-db3 + :target-db ,pg-db-conn + :source-db source-db + :target table-name))) - ,(sql-code-block dbname 'state-before before "before load") + ,(sql-code-block pg-db-conn 'state-before before "before load") - (pgloader.sources:copy-from source - :state-before state-before - ,@(remove-batch-control-option options)) + (pgloader.sources:copy-database source + :state-before state-before + ,@(remove-batch-control-option options)) - ,(sql-code-block dbname 'state-after after "after load") + ,(sql-code-block pg-db-conn 'state-after after "after load") - ;; reporting - (when summary - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after)))))) + ;; reporting + (when summary + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after))))) (defrule load-dbf-file load-dbf-command (:lambda (command) diff --git a/src/parsers/command-fixed.lisp b/src/parsers/command-fixed.lisp index 3dfed67..71f7e56 100644 --- a/src/parsers/command-fixed.lisp +++ b/src/parsers/command-fixed.lisp @@ -63,11 +63,26 @@ (bind (((_ opts) source)) (cons :fixed-options opts)))) +(defrule fixed-uri (and "fixed://" filename) + (:lambda (source) + (bind (((_ filename) source)) + (make-instance 'fixed-connection :specs filename)))) (defrule fixed-file-source (or stdin inline + http-uri + fixed-uri filename-matching - maybe-quoted-filename)) + maybe-quoted-filename) + (:lambda (src) + (if (typep src 'fixed-connection) src + (destructuring-bind (type &rest specs) src + (case type + (:stdin (make-instance 'fixed-connection :specs src)) + (:inline (make-instance 'fixed-connection :specs src)) + (:filename (make-instance 'fixed-connection :specs src)) + (:regex (make-instance 'fixed-connection :specs src)) + (:http (make-instance 'fixed-connection :uri (first specs)))))))) (defrule get-fixed-file-source-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -81,14 +96,7 @@ (or get-fixed-file-source-from-environment-variable fixed-file-source)) (:lambda (src) - (bind (((_ _ _ source) src) - ;; source is (:filename #P"pathname/here") - ((type &rest _) source)) - (ecase type - (:stdin source) - (:inline source) - (:filename source) - (:regex source))))) + (bind (((_ _ _ source) src)) source))) (defrule load-fixed-cols-file-optional-clauses (* (or fixed-options gucs @@ -106,43 +114,42 @@ (destructuring-bind (source encoding fields target columns clauses) command `(,source ,encoding ,fields ,target ,columns ,@clauses)))) -(defun lisp-code-for-loading-from-fixed (source fields pg-db-uri +(defun lisp-code-for-loading-from-fixed (fixed-conn fields pg-db-conn &key (encoding :utf-8) columns gucs before after ((:fixed-options options))) - (bind (((&key dbname table-name &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) - (summary (null *state*)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-after ,(when after `(pgloader.utils:make-pgstate))) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options)) + `(lambda () + (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) + (summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-after ,(when after `(pgloader.utils:make-pgstate))) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options)) - (progn - ,(sql-code-block dbname 'state-before before "before load") + (progn + ,(sql-code-block pg-db-conn 'state-before before "before load") - (let ((truncate ,(getf options :truncate)) - (source - (make-instance 'pgloader.fixed:copy-fixed - :target-db ,dbname - :source ',source - :target ,table-name - :encoding ,encoding - :fields ',fields - :columns ',columns - :skip-lines ,(or (getf options :skip-line) 0)))) - (pgloader.sources:copy-from source :truncate truncate)) + (let ((truncate ,(getf options :truncate)) + (source + (make-instance 'pgloader.fixed:copy-fixed + :target-db ,pg-db-conn + :source ,(expand (fetch-file fixed-conn)) + :target ,(pgconn-table-name pg-db-conn) + :encoding ,encoding + :fields ',fields + :columns ',columns + :skip-lines ,(or (getf options :skip-line) 0)))) + (pgloader.sources:copy-from source :truncate truncate)) - ,(sql-code-block dbname 'state-after after "after load") + ,(sql-code-block pg-db-conn 'state-after after "after load") - ;; reporting - (when summary - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after))))))) + ;; reporting + (when summary + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after)))))) (defrule load-fixed-cols-file load-fixed-cols-file-command (:lambda (command) diff --git a/src/parsers/command-ixf.lisp b/src/parsers/command-ixf.lisp index 4700f98..73a23bc 100644 --- a/src/parsers/command-ixf.lisp +++ b/src/parsers/command-ixf.lisp @@ -7,7 +7,7 @@ (in-package #:pgloader.parser) (defrule option-create-table (and kw-create kw-table) - (:constant (cons :create-table t))) + (:constant (cons :create-tables t))) ;;; piggyback on DBF parsing (defrule ixf-options (and kw-with dbf-option-list) @@ -15,7 +15,20 @@ (bind (((_ opts) source)) (cons :ixf-options opts)))) -(defrule ixf-source (and kw-load kw-ixf kw-from filename-or-http-uri) +(defrule ixf-uri (and "ixf://" filename) + (:lambda (source) + (bind (((_ filename) source)) + (make-instance 'ixf-connection :path (second filename))))) + +(defrule ixf-file-source (or ixf-uri filename-or-http-uri) + (:lambda (conn-or-path-or-uri) + (if (typep conn-or-path-or-uri 'ixf-connection) conn-or-path-or-uri + (destructuring-bind (kind url) conn-or-path-or-uri + (case kind + (:filename (make-instance 'ixf-connection :path url)) + (:http (make-instance 'ixf-connection :uri url))))))) + +(defrule ixf-source (and kw-load kw-ixf kw-from ixf-file-source) (:lambda (src) (bind (((_ _ _ source) src)) source))) @@ -31,53 +44,39 @@ (destructuring-bind (source target clauses) command `(,source ,target ,@clauses)))) -(defun lisp-code-for-loading-from-ixf (source pg-db-uri +(defun lisp-code-for-loading-from-ixf (ixf-db-conn pg-db-conn &key gucs before after ((:ixf-options options))) - (bind (((&key dbname table-name &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (summary (null *state*)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-after ,(when after `(pgloader.utils:make-pgstate))) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options) - ,@(identifier-case-binding options) - (source - ,(bind (((kind url) source)) - (ecase kind - (:http `(with-stats-collection - ("download" :state state-before) - (pgloader.archive:http-fetch-file ,url))) - (:filename url)))) - (source - (if (string= "zip" (pathname-type source)) - (progn - (with-stats-collection ("extract" :state state-before) - (let ((d (pgloader.archive:expand-archive source))) - (make-pathname :defaults d - :name (pathname-name source) - :type "ixf")))) - source)) - (source - (make-instance 'pgloader.ixf:copy-ixf - :target-db ,dbname - :source source - :target ,table-name))) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-after ,(when after `(pgloader.utils:make-pgstate))) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options) + ,@(identifier-case-binding options) + (table-name ,(pgconn-table-name pg-db-conn)) + (source-db (with-stats-collection ("fetch" :state state-before) + (expand (fetch-file ,ixf-db-conn)))) + (source + (make-instance 'pgloader.ixf:copy-ixf + :target-db ,pg-db-conn + :source-db source-db + :target table-name))) - ,(sql-code-block dbname 'state-before before "before load") + ,(sql-code-block pg-db-conn 'state-before before "before load") - (pgloader.sources:copy-from source - :state-before state-before - ,@(remove-batch-control-option options)) + (pgloader.sources:copy-database source + :state-before state-before + ,@(remove-batch-control-option options)) - ,(sql-code-block dbname 'state-after after "after load") + ,(sql-code-block pg-db-conn 'state-after after "after load") - (when summary - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after)))))) + (when summary + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after))))) (defrule load-ixf-file load-ixf-command (:lambda (command) diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp index 1ff8f99..c2efb57 100644 --- a/src/parsers/command-mssql.lisp +++ b/src/parsers/command-mssql.lisp @@ -4,22 +4,6 @@ (in-package :pgloader.parser) -(defun mssql-connection-bindings (ms-db-uri) - "Generate the code needed to set MSSQL connection bindings." - (destructuring-bind (&key ((:host mshost)) - ((:port msport)) - ((:user msuser)) - ((:password mspass)) - ((:dbname msdb)) - &allow-other-keys) - ms-db-uri - `((*msconn-host* ',mshost) - (*msconn-port* ,msport) - (*msconn-user* ,msuser) - (*msconn-pass* ,mspass) - (*ms-dbname* ,msdb)))) - - ;;; ;;; INCLUDING ONLY and EXCLUDING clauses for MS SQL ;;; @@ -95,13 +79,14 @@ (apply #'append uri) ;; Default to environment variables as described in ;; http://www.freetds.org/userguide/envvar.htm - (list :type type - :user (or user (getenv-default "USER")) - :password password - :host (or host (getenv-default "TDSHOST" "localhost")) - :port (or port (parse-integer + (declare (ignore type)) + (make-instance 'mssql-connection + :user (or user (getenv-default "USER")) + :pass password + :host (or host (getenv-default "TDSHOST" "localhost")) + :port (or port (parse-integer (getenv-default "TDSPORT" "1433"))) - :dbname dbname)))) + :name dbname)))) (defrule get-mssql-uri-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -124,50 +109,43 @@ ;;; LOAD DATABASE FROM mssql:// -(defun lisp-code-for-loading-from-mssql (source pg-db-uri +(defun lisp-code-for-loading-from-mssql (ms-db-conn pg-db-conn &key gucs casts before after ((:mssql-options options)) (including) (excluding)) - (bind (((&key ((:dbname msdb)) table-name - &allow-other-keys) source) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-idx (pgloader.utils:make-pgstate)) + (state-after (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*mssql-default-cast-rules*) + (*cast-rules* ',casts) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options) + ,@(identifier-case-binding options) + (source + (make-instance 'pgloader.mssql::copy-mssql + :target-db ,pg-db-conn + :source-db ,ms-db-conn))) - ((&key ((:dbname pgdb)) &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-idx (pgloader.utils:make-pgstate)) - (state-after (pgloader.utils:make-pgstate)) - (*default-cast-rules* ',*mssql-default-cast-rules*) - (*cast-rules* ',casts) - ,@(mssql-connection-bindings source) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options) - ,@(identifier-case-binding options) - (source - (make-instance 'pgloader.mssql::copy-mssql - :target-db ,pgdb - :source-db ,msdb))) + ,(sql-code-block pg-db-conn 'state-before before "before load") - ,(sql-code-block pgdb 'state-before before "before load") + (pgloader.mssql:copy-database source + :state-before state-before + :state-after state-after + :state-indexes state-idx + :including ',including + :excluding ',excluding + ,@(remove-batch-control-option options)) - (pgloader.mssql:copy-database source - ,@(when table-name - `(:only-tables ',(list table-name))) - :state-before state-before - :state-after state-after - :state-indexes state-idx - :including ',including - :excluding ',excluding - ,@(remove-batch-control-option options)) + ,(sql-code-block pg-db-conn 'state-after after "after load") - ,(sql-code-block pgdb 'state-after after "after load") - - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after - :parallel state-idx))))) + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after + :parallel state-idx)))) (defrule load-mssql-database load-mssql-command (:lambda (source) diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp index b99f883..823cee7 100644 --- a/src/parsers/command-mysql.lisp +++ b/src/parsers/command-mysql.lisp @@ -4,21 +4,6 @@ (in-package :pgloader.parser) -(defun mysql-connection-bindings (my-db-uri) - "Generate the code needed to set MySQL connection bindings." - (destructuring-bind (&key ((:host myhost)) - ((:port myport)) - ((:user myuser)) - ((:password mypass)) - ((:dbname mydb)) - &allow-other-keys) - my-db-uri - `((*myconn-host* ',myhost) - (*myconn-port* ,myport) - (*myconn-user* ,myuser) - (*myconn-pass* ,mypass) - (*my-dbname* ,mydb)))) - ;;; ;;; Materialize views by copying their data over, allows for doing advanced ;;; ETL processing by having parts of the processing happen on the MySQL @@ -130,13 +115,14 @@ (apply #'append uri) ;; Default to environment variables as described in ;; http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html - (list :type type - :user (or user (getenv-default "USER")) - :password (or password (getenv-default "MYSQL_PWD")) - :host (or host (getenv-default "MYSQL_HOST" "localhost")) - :port (or port (parse-integer - (getenv-default "MYSQL_TCP_PORT" "3306"))) - :dbname dbname)))) + (declare (ignore type)) + (make-instance 'mysql-connection + :user (or user (getenv-default "USER")) + :pass (or password (getenv-default "MYSQL_PWD")) + :host (or host (getenv-default "MYSQL_HOST" "localhost")) + :port (or port (parse-integer + (getenv-default "MYSQL_TCP_PORT" "3306"))) + :name dbname)))) (defrule get-mysql-uri-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -159,53 +145,46 @@ ;;; LOAD DATABASE FROM mysql:// -(defun lisp-code-for-loading-from-mysql (my-db-uri pg-db-uri +(defun lisp-code-for-loading-from-mysql (my-db-conn pg-db-conn &key gucs casts views before after ((:mysql-options options)) ((:including incl)) ((:excluding excl)) ((:decoding decoding-as))) - (bind (((&key ((:dbname mydb)) table-name - &allow-other-keys) my-db-uri) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (state-idx (pgloader.utils:make-pgstate)) + (state-after (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*mysql-default-cast-rules*) + (*cast-rules* ',casts) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options) + ,@(identifier-case-binding options) + (source + (make-instance 'pgloader.mysql::copy-mysql + :target-db ,pg-db-conn + :source-db ,my-db-conn))) - ((&key ((:dbname pgdb)) &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (state-idx (pgloader.utils:make-pgstate)) - (state-after (pgloader.utils:make-pgstate)) - (*default-cast-rules* ',*mysql-default-cast-rules*) - (*cast-rules* ',casts) - ,@(mysql-connection-bindings my-db-uri) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options) - ,@(identifier-case-binding options) - (source - (make-instance 'pgloader.mysql::copy-mysql - :target-db ,pgdb - :source-db ,mydb))) + ,(sql-code-block pg-db-conn 'state-before before "before load") - ,(sql-code-block pgdb 'state-before before "before load") + (pgloader.mysql:copy-database source + :including ',incl + :excluding ',excl + :decoding-as ',decoding-as + :materialize-views ',views + :state-before state-before + :state-after state-after + :state-indexes state-idx + ,@(remove-batch-control-option options)) - (pgloader.mysql:copy-database source - ,@(when table-name - `(:only-tables ',(list table-name))) - :including ',incl - :excluding ',excl - :decoding-as ',decoding-as - :materialize-views ',views - :state-before state-before - :state-after state-after - :state-indexes state-idx - ,@(remove-batch-control-option options)) + ,(sql-code-block pg-db-conn 'state-after after "after load") - ,(sql-code-block pgdb 'state-after after "after load") - - (report-full-summary "Total import time" *state* - :before state-before - :finally state-after - :parallel state-idx))))) + (report-full-summary "Total import time" *state* + :before state-before + :finally state-after + :parallel state-idx)))) (defrule load-mysql-database load-mysql-command (:lambda (source) diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp index 5b858ee..4a31c1d 100644 --- a/src/parsers/command-parser.lisp +++ b/src/parsers/command-parser.lisp @@ -34,25 +34,56 @@ (defun inject-inline-data-position (command position) "We have '(:inline nil) somewhere in command, have '(:inline position) instead." (loop - for s-exp in command - when (equal '(:inline nil) s-exp) collect (list :inline position) - else collect (if (and (consp s-exp) (listp (cdr s-exp))) - (inject-inline-data-position s-exp position) - s-exp))) + :for s-exp :in command + + :when (and (or (typep s-exp 'csv-connection) + (typep s-exp 'fixed-connection)) + (slot-boundp s-exp 'specs) + (eq :inline (first (csv-specs s-exp)))) + :collect (progn (setf (second (csv-specs s-exp)) position) + s-exp) + + :else :collect (if (and (consp s-exp) (listp (cdr s-exp))) + (inject-inline-data-position s-exp position) + s-exp))) (defun process-relative-pathnames (filename command) "Walk the COMMAND to replace relative pathname with absolute ones, merging them within the directory where we found the command FILENAME." (loop - for s-exp in command - when (pathnamep s-exp) - collect (if (uiop:relative-pathname-p s-exp) - (uiop:merge-pathnames* s-exp filename) - s-exp) - else - collect (if (and (consp s-exp) (listp (cdr s-exp))) - (process-relative-pathnames filename s-exp) - s-exp))) + :for s-exp :in command + + :collect (cond ((pathnamep s-exp) + (if (uiop:relative-pathname-p s-exp) + (uiop:merge-pathnames* s-exp filename) + s-exp)) + + ((and (typep s-exp 'fd-connection) + (slot-boundp s-exp 'pgloader.connection::path)) + (if (uiop:relative-pathname-p (fd-path s-exp)) + (progn (setf (fd-path s-exp) + (uiop:merge-pathnames* (fd-path s-exp) + filename)) + s-exp) + s-exp)) + + ((and (or (typep s-exp 'csv-connection) + (typep s-exp 'fixed-connection)) + (slot-boundp s-exp 'specs) + (eq :filename (car (csv-specs s-exp)))) + (let ((path (second (csv-specs s-exp)))) + (if (uiop:relative-pathname-p path) + (progn (setf (csv-specs s-exp) + `(:filename + ,(uiop:merge-pathnames* path + filename))) + s-exp) + s-exp))) + + (t + (if (and (consp s-exp) (listp (cdr s-exp))) + (process-relative-pathnames filename s-exp) + s-exp))))) (defun parse-commands-from-file (maybe-relative-filename &aux (filename @@ -145,23 +176,6 @@ ;;; ;;; Parse an URI without knowing before hand what kind of uri it is. ;;; -(defvar *db-uri-prefix-list* '((:postgresql . ("pgsql://" - "postgres://" - "postgresql://")) - (:sqlite . ("sqlite://")) - (:mysql . ("mysql://")) - (:mssql . ("mssql://")))) - -(defun parse-db-uri-prefix (source-string) - "See if SOURCE-STRING starts with a known dburi" - (loop :for (type . prefixes) :in *db-uri-prefix-list* - :for prefix := (loop :for prefix :in prefixes - :when (let ((plen (length prefix))) - (and (< plen (length source-string)) - (string-equal prefix source-string :end2 plen))) - :return prefix) - :when prefix :return type)) - (defvar *data-source-filename-extensions* '((:csv . ("csv" "tsv" "txt" "text")) (:sqlite . ("sqlite" "db")) @@ -186,44 +200,39 @@ (defun parse-source-string-for-type (type source-string) "use the parse rules as per xxx-source rules" - (let ((source (case type - (:csv (parse 'csv-file-source source-string)) - (:fixed (parse 'fixed-file-source source-string)) - (:dbf (parse 'filename-or-http-uri source-string)) - (:ixf (parse 'filename-or-http-uri source-string)) - (:sqlite (parse 'sqlite-uri source-string)) - (:postgresql (parse 'pgsql-uri source-string)) - (:mysql (parse 'mysql-uri source-string)) - (:mssql (parse 'mssql-uri source-string))))) - (values type source))) + (case type + (:csv (parse 'csv-file-source source-string)) + (:fixed (parse 'fixed-file-source source-string)) + (:dbf (parse 'dbf-file-source source-string)) + (:ixf (parse 'ixf-file-source source-string)) + (:sqlite (parse 'sqlite-uri source-string)) + (:postgresql (parse 'pgsql-uri source-string)) + (:mysql (parse 'mysql-uri source-string)) + (:mssql (parse 'mssql-uri source-string)))) + +(defrule source-uri (or csv-uri + fixed-uri + dbf-uri + ixf-uri + sqlite-db-uri + pgsql-uri + mysql-uri + mssql-uri + filename-or-http-uri)) (defun parse-source-string (source-string) - "Guess type from SOURCE-STRING then parse it accordingly." - (cond ((probe-file (uiop:parse-unix-namestring source-string)) - (let ((type (parse-filename-for-source-type source-string))) - (parse-source-string-for-type type source-string))) + (let ((source (parse 'source-uri source-string))) + (cond ((typep source 'connection) + source) - ((and source-string - (< (length "http://") (length source-string)) - (string-equal "http://" source-string :end2 (length "http://"))) - (let ((type (parse-filename-for-source-type - (puri:uri-path (puri:parse-uri source-string))))) - (case type - (:csv (log-message :fatal "No HTTP support for CSV files yet.")) - (:fixed (log-message :fatal "No HTTP support for FIXED files yet.")) - (:sqlite (parse-source-string-for-type :sqlite source-string)) - (:db3 (parse-source-string-for-type :db3 source-string)) - (:ixf (parse-source-string-for-type :ixf source-string))))) - - ((and source-string (parse-db-uri-prefix source-string)) - (let* ((type (parse-db-uri-prefix source-string))) - (multiple-value-bind (type conn) - (parse-source-string-for-type type source-string) - (if (eq type (getf conn :type)) - (values type conn) - (log-message :fatal "Parsed a ~s connection string for type ~s"))))) - - (t nil))) + (t + (destructuring-bind (kind url) source + (let ((type + (case kind + (:filename (parse-filename-for-source-type url)) + (:http (parse-filename-for-source-type + (puri:uri-path (puri:parse-uri url))))))) + (parse-source-string-for-type type source-string))))))) (defun parse-target-string (target-string) (parse 'pgsql-uri target-string)) diff --git a/src/parsers/command-sql-block.lisp b/src/parsers/command-sql-block.lisp index e941312..f58f35f 100644 --- a/src/parsers/command-sql-block.lisp +++ b/src/parsers/command-sql-block.lisp @@ -71,15 +71,15 @@ (:lambda (after) (cons :after after))) -(defun sql-code-block (dbname state commands label) +(defun sql-code-block (pgconn state commands label) "Return lisp code to run COMMANDS against DBNAME, updating STATE." (when commands `(with-stats-collection (,label - :dbname ,dbname + :dbname ,(db-name pgconn) :state ,state :use-result-as-read t :use-result-as-rows t) - (with-pgsql-transaction (:dbname ,dbname) + (with-pgsql-transaction (:pgconn ,pgconn) (loop for command in ',commands do (log-message :notice command) diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index 5b29cc2..cb66dad 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -50,11 +50,14 @@ load database (defrule sqlite-db-uri (and "sqlite://" filename) (:lambda (source) - (bind (((_ filename) source) ; (prefix filename) - ((_ path) filename)) ; (type path) - (list :sqlite path)))) + (bind (((_ filename) source)) filename))) -(defrule sqlite-uri (or sqlite-db-uri http-uri maybe-quoted-filename)) +(defrule sqlite-uri (or sqlite-db-uri http-uri maybe-quoted-filename) + (:lambda (source) + (destructuring-bind (kind url) source + (case kind + (:http (make-instance 'sqlite-connection :uri url)) + (:filename (make-instance 'sqlite-connection :path url)))))) (defrule get-sqlite-uri-from-environment-variable (and kw-getenv name) (:lambda (p-e-v) @@ -84,49 +87,28 @@ load database (destructuring-bind (source target clauses) command `(,source ,target ,@clauses)))) -(defun lisp-code-for-loading-from-sqlite (source pg-db-uri +(defun lisp-code-for-loading-from-sqlite (sqlite-db-conn pg-db-conn &key gucs casts ((:sqlite-options options)) ((:including incl)) ((:excluding excl))) - (bind (((&key dbname table-name &allow-other-keys) pg-db-uri)) - `(lambda () - (let* ((state-before (pgloader.utils:make-pgstate)) - (*state* (pgloader.utils:make-pgstate)) - (*default-cast-rules* ',*sqlite-default-cast-rules*) - (*cast-rules* ',casts) - ,@(pgsql-connection-bindings pg-db-uri gucs) - ,@(batch-control-bindings options) - (db - ,(bind (((kind url) source)) - (ecase kind - (:http `(with-stats-collection - ("download" :state state-before) - (pgloader.archive:http-fetch-file ,url))) - (:sqlite url) - (:filename url)))) - (db - (if (string= "zip" (pathname-type db)) - (progn - (with-stats-collection ("extract" :state state-before) - (let ((d (pgloader.archive:expand-archive db))) - (make-pathname :defaults d - :name (pathname-name db) - :type "db")))) - db)) - (source - (make-instance 'pgloader.sqlite::copy-sqlite - :target-db ,dbname - :source-db db))) - (pgloader.sqlite:copy-database - source - :state-before state-before - ,@(when table-name - `(:only-tables ',(list table-name))) - :including ',incl - :excluding ',excl - ,@(remove-batch-control-option options)))))) + `(lambda () + (let* ((state-before (pgloader.utils:make-pgstate)) + (*state* (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*sqlite-default-cast-rules*) + (*cast-rules* ',casts) + ,@(pgsql-connection-bindings pg-db-conn gucs) + ,@(batch-control-bindings options) + (source + (make-instance 'pgloader.sqlite::copy-sqlite + :target-db ,pg-db-conn + :source-db ,(expand (fetch-file sqlite-db-conn))))) + (pgloader.sqlite:copy-database source + :state-before state-before + :including ',incl + :excluding ',excl + ,@(remove-batch-control-option options))))) (defrule load-sqlite-database load-sqlite-command (:lambda (source) diff --git a/src/pgsql/pgsql.lisp b/src/pgsql/pgsql.lisp index f1094dc..162513c 100644 --- a/src/pgsql/pgsql.lisp +++ b/src/pgsql/pgsql.lisp @@ -12,7 +12,7 @@ &key (db pomo:*database*)) "Copy current *writer-batch* into TABLE-NAME." (handler-case - (with-pgsql-transaction (:dbname dbname :database db) + (with-pgsql-transaction (:database db) ;; We need to keep a copy of the rows we send through the COPY ;; protocol to PostgreSQL to be able to process them again in case ;; of a data error being signaled, that's the BATCH here. @@ -40,17 +40,17 @@ ;;; We receive fully prepared batch from an lparallel queue, push their ;;; content down to PostgreSQL, handling any data related errors in the way. ;;; -(defun copy-from-queue (dbname table-name queue +(defun copy-from-queue (pgconn table-name queue &key columns (truncate t) ((:state *state*) *state*)) "Fetch from the QUEUE messages containing how many rows are in the *writer-batch* for us to send down to PostgreSQL, and when that's done update *state*." (when truncate - (truncate-tables dbname (list table-name))) + (truncate-tables pgconn (list table-name))) (log-message :debug "pgsql:copy-from-queue: ~a ~a" table-name columns) - (with-pgsql-connection (dbname) + (with-pgsql-connection (pgconn) (loop for (mesg batch read oversized?) = (lq:pop-queue queue) until (eq mesg :end-of-data) diff --git a/src/pgsql/queries.lisp b/src/pgsql/queries.lisp index 663305f..bba1d66 100644 --- a/src/pgsql/queries.lisp +++ b/src/pgsql/queries.lisp @@ -6,6 +6,45 @@ ;;; ;;; PostgreSQL Tools connecting to a database ;;; +(defclass pgsql-connection (db-connection) + ((use-ssl :initarg :use-ssl :accessor pgconn-use-ssl) + (table-name :initarg :table-name :accessor pgconn-table-name)) + (:documentation "PostgreSQL connection for pgloader")) + +(defmethod initialize-instance :after ((pgconn pgsql-connection) &key) + "Assign the type slot to pgsql." + (setf (slot-value pgconn 'type) "pgsql")) + +(defun new-pgsql-connection (pgconn) + "Prepare a new connection object with all the same properties as pgconn, + so as to avoid stepping on it's handle" + (make-instance 'pgsql-connection + :user (db-user pgconn) + :pass (db-pass pgconn) + :host (db-host pgconn) + :port (db-port pgconn) + :name (db-name pgconn) + :use-ssl (pgconn-use-ssl pgconn) + :table-name (pgconn-table-name pgconn))) + +(defmethod open-connection ((pgconn pgsql-connection) &key username) + "Open a PostgreSQL connection." + (setf (conn-handle pgconn) + (pomo:connect (db-name pgconn) + (or username (db-user pgconn)) + (db-pass pgconn) + (let ((host (db-host pgconn))) + (if (and (consp host) (eq :unix (car host))) :unix host)) + :port (db-port pgconn) + :use-ssl (or (pgconn-use-ssl pgconn) :no))) + pgconn) + +(defmethod close-connection ((pgconn pgsql-connection)) + "Close a PostgreSQL connection." + (pomo:disconnect (conn-handle pgconn)) + (setf (conn-handle pgconn) nil) + pgconn) + (defmacro handling-pgsql-notices (&body forms) "The BODY is run within a PostgreSQL transaction where *pg-settings* have been applied. PostgreSQL warnings and errors are logged at the @@ -20,94 +59,42 @@ (muffle-warning)))) (progn ,@forms))) -(defmacro with-pgsql-transaction ((&key dbname username database) &body forms) +(defmacro with-pgsql-transaction ((&key pgconn database) &body forms) "Run FORMS within a PostgreSQL transaction to DBNAME, reusing DATABASE if - given. To get the connection spec from the DBNAME, use `get-connection-spec'." + given." (if database `(let ((pomo:*database* ,database)) (handling-pgsql-notices - (pomo:with-transaction () - (log-message :debug "BEGIN") - (set-session-gucs *pg-settings* :transaction t) - ,@forms))) + (pomo:with-transaction () + (log-message :debug "BEGIN") + ,@forms))) ;; no database given, create a new database connection - `(let (#+unix (cl-postgres::*unix-socket-dir* (get-unix-socket-dir))) - (let ((pomo:*database* - (handler-case - (apply #'pomo:connect (get-connection-spec :dbname ,dbname - :username ,username)) - (condition (e) - (destructuring-bind (&key host port user &allow-other-keys) - *pgconn* - (error 'connection-error - :mesg (format nil "~a" e) - :type "PostgreSQL" - :host (if (consp host) (cdr host) host) - :port port - :user user)))))) - (unwind-protect - (progn - (log-message :debug "CONNECT") - (set-session-gucs *pg-settings*) - (handling-pgsql-notices () - (pomo:with-transaction () - (log-message :debug "BEGIN") - ,@forms))) - (pomo:disconnect pomo:*database*)))))) + `(with-pgsql-connection (,pgconn) + (pomo:with-transaction () + (log-message :debug "BEGIN") + ,@forms)))) -(defmacro with-pgsql-connection ((dbname) &body forms) +(defmacro with-pgsql-connection ((pgconn) &body forms) "Run FROMS within a PostgreSQL connection to DBNAME. To get the connection spec from the DBNAME, use `get-connection-spec'." - `(let (#+unix (cl-postgres::*unix-socket-dir* (get-unix-socket-dir))) - (let ((pomo:*database* - (handler-case - (apply #'pomo:connect (get-connection-spec :dbname ,dbname)) - (condition (e) - (destructuring-bind (&key host port user &allow-other-keys) *pgconn* - (error 'connection-error - :mesg (format nil "~a" e) - :type "PostgreSQL" - :host (if (consp host) (cdr host) host) - :port port - :user user)))))) - (unwind-protect - (progn - (log-message :debug "CONNECT ~s" (get-connection-spec :dbname ,dbname)) - (set-session-gucs *pg-settings*) - (handling-pgsql-notices () - ,@forms)) - (pomo:disconnect pomo:*database*))))) + `(let (#+unix (cl-postgres::*unix-socket-dir* (get-unix-socket-dir ,pgconn))) + (with-connection (conn ,pgconn) + (let ((pomo:*database* (conn-handle conn))) + (log-message :debug "CONNECT ~s" conn) + (set-session-gucs *pg-settings*) + (handling-pgsql-notices + ,@forms))))) -(defun get-unix-socket-dir () +(defun get-unix-socket-dir (pgconn) "When *pgconn* host is a (cons :unix path) value, return the right value for cl-postgres::*unix-socket-dir*." - (destructuring-bind (&key host &allow-other-keys) *pgconn* + (let ((host (db-host pgconn))) (if (and (consp host) (eq :unix (car host))) ;; set to *pgconn* host value (directory-namestring (fad:pathname-as-directory (cdr host))) ;; keep as is. cl-postgres::*unix-socket-dir*))) -(defun get-connection-spec (&key dbname username (with-port t)) - "pomo:with-connection and cl-postgres:open-database and open-db-writer are - not using the same connection spec format..." - (destructuring-bind (&key type host port user password - ((:dbname pgconn-dbname)) - use-ssl - table-name) - *pgconn* - (declare (ignore type table-name)) - (let* ((host (if (and (consp host) (eq :unix (car host))) :unix host)) - (conspec (list (or dbname pgconn-dbname) - (or username user) - password - host))) - (if with-port - (setf conspec (append conspec (list :port port))) - (setf conspec (append conspec (list port)))) - - (if use-ssl (append conspec (list :use-ssl use-ssl)) conspec)))) - (defun set-session-gucs (alist &key transaction database) "Set given GUCs to given values for the current session." (let ((pomo:*database* (or database pomo:*database*))) @@ -118,16 +105,20 @@ (log-message :debug set) (pomo:execute set)))) -(defun pgsql-execute-with-timing (dbname label sql state &key (count 1)) +(defun pgsql-connect-and-execute-with-timing (pgconn label sql state &key (count 1)) + "Run pgsql-execute-with-timing within a newly establised connection." + (with-pgsql-connection (pgconn) + (pomo:with-transaction () + (pgsql-execute-with-timing label sql state :count count)))) + +(defun pgsql-execute-with-timing (label sql state &key (count 1)) "Execute given SQL and resgister its timing into STATE." (multiple-value-bind (res secs) (timing - (handler-case - (with-pgsql-transaction (:dbname dbname) - (pgsql-execute sql)) - (cl-postgres:database-error (e) - (declare (ignore e)) ; a log has already been printed - (pgstate-incf state label :errs 1 :rows (- count))))) + (handler-case (pgsql-execute sql) + (cl-postgres:database-error (e) + (declare (ignore e)) ; a log has already been printed + (pgstate-incf state label :errs 1 :rows (- count))))) (declare (ignore res)) (pgstate-incf state label :read count :rows count :secs secs))) @@ -145,35 +136,35 @@ ;;; PostgreSQL Utility Queries ;;; -(defun list-databases (&optional (username "postgres")) - "Connect to a local database and get the database list" - (with-pgsql-transaction (:dbname "postgres" :username username) - (loop for (dbname) in (pomo:query - "select datname - from pg_database - where datname !~ 'postgres|template'") - collect dbname))) +;; (defun list-databases (&optional (username "postgres")) +;; "Connect to a local database and get the database list" +;; (with-pgsql-transaction (:dbname "postgres" :username username) +;; (loop for (dbname) in (pomo:query +;; "select datname +;; from pg_database +;; where datname !~ 'postgres|template'") +;; collect dbname))) -(defun list-tables (&optional dbname) - "Return an alist of tables names and list of columns to pay attention to." - (with-pgsql-transaction (:dbname dbname) - (loop for (relname colarray) in (pomo:query " -select relname, array_agg(case when typname in ('date', 'timestamptz') - then attnum end - order by attnum) - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - left join pg_attribute a on c.oid = a.attrelid - join pg_type t on t.oid = a.atttypid - where c.relkind = 'r' - and attnum > 0 - and n.nspname = 'public' - group by relname -") - collect (cons relname (loop - for attnum across colarray - unless (eq attnum :NULL) - collect attnum))))) +;; (defun list-tables (&optional dbname) +;; "Return an alist of tables names and list of columns to pay attention to." +;; (with-pgsql-transaction (:dbname dbname) +;; (loop for (relname colarray) in (pomo:query " +;; select relname, array_agg(case when typname in ('date', 'timestamptz') +;; then attnum end +;; order by attnum) +;; from pg_class c +;; join pg_namespace n on n.oid = c.relnamespace +;; left join pg_attribute a on c.oid = a.attrelid +;; join pg_type t on t.oid = a.atttypid +;; where c.relkind = 'r' +;; and attnum > 0 +;; and n.nspname = 'public' +;; group by relname +;; ") +;; collect (cons relname (loop +;; for attnum across colarray +;; unless (eq attnum :NULL) +;; collect attnum))))) (defun list-tables-and-fkeys (&optional schema) "Yet another table listing query." @@ -186,9 +177,9 @@ select relname, array_agg(case when typname in ('date', 'timestamptz') group by relname;" schema schema)) collect (cons relname (sq:split-sequence #\, fkeys)))) -(defun list-columns (dbname table-name &key schema) +(defun list-columns (pgconn table-name &key schema) "Return a list of column names for given TABLE-NAME." - (with-pgsql-transaction (:dbname dbname) + (with-pgsql-transaction (:pgconn pgconn) (pomo:query (format nil " select attname from pg_class c @@ -198,27 +189,28 @@ select relname, array_agg(case when typname in ('date', 'timestamptz') where c.oid = '~:[~*~a~;~a.~a~]'::regclass and attnum > 0 order by attnum" schema schema table-name) :column))) -(defun list-reserved-keywords (dbname) +(defun list-reserved-keywords (pgconn) "Connect to PostgreSQL DBNAME and fetch reserved keywords." - (with-pgsql-transaction (:dbname dbname) + (with-pgsql-connection (pgconn) (pomo:query "select word from pg_get_keywords() where catcode IN ('R', 'T')" :column))) -(defun reset-all-sequences (dbname &key tables) +(defun reset-all-sequences (pgconn &key tables) "Reset all sequences to the max value of the column they are attached to." - (with-pgsql-connection (dbname) - (set-session-gucs *pg-settings*) - (pomo:execute "set client_min_messages to warning;") - (pomo:execute "listen seqs") + (let ((newconn (new-pgsql-connection pgconn))) + (with-pgsql-connection (newconn) + (set-session-gucs *pg-settings*) + (pomo:execute "set client_min_messages to warning;") + (pomo:execute "listen seqs") - (when tables - (pomo:execute - (format nil "create temp table reloids(oid) as values ~{('~a'::regclass)~^,~}" - tables))) + (when tables + (pomo:execute + (format nil "create temp table reloids(oid) as values ~{('~a'::regclass)~^,~}" + tables))) - (handler-case - (let ((sql (format nil " + (handler-case + (let ((sql (format nil " DO $$ DECLARE n integer := 0; @@ -248,10 +240,10 @@ BEGIN PERFORM pg_notify('seqs', n::text); END; $$; " tables))) - (pomo:execute sql)) - ;; now get the notification signal - (cl-postgres:postgresql-notification (c) - (parse-integer (cl-postgres:postgresql-notification-payload c)))))) + (pomo:execute sql)) + ;; now get the notification signal + (cl-postgres:postgresql-notification (c) + (parse-integer (cl-postgres:postgresql-notification-payload c))))))) (defun list-table-oids (table-names) "Return an alist of (TABLE-NAME . TABLE-OID) for all table in the diff --git a/src/pgsql/schema.lisp b/src/pgsql/schema.lisp index f633834..f2684a4 100644 --- a/src/pgsql/schema.lisp +++ b/src/pgsql/schema.lisp @@ -97,9 +97,9 @@ ;; alter table if exists ... drop constraint if exists ... (format nil "ALTER TABLE ~a DROP CONSTRAINT ~a" table-name constraint-name)))) -(defun drop-pgsql-fkeys (all-fkeys &key (dbname (pgconn-dbname))) +(defun drop-pgsql-fkeys (all-fkeys) "Drop all Foreign Key Definitions given, to prepare for a clean run." - (let ((all-pgsql-fkeys (list-tables-and-fkeys dbname))) + (let ((all-pgsql-fkeys (list-tables-and-fkeys))) (loop for (table-name . fkeys) in all-fkeys do (loop for fkey in fkeys @@ -110,20 +110,19 @@ (log-message :notice "~a;" sql) (pgsql-execute sql))))) -(defun create-pgsql-fkeys (all-fkeys +(defun create-pgsql-fkeys (pgconn all-fkeys &key - (dbname (pgconn-dbname)) state (label "Foreign Keys")) "Actually create the Foreign Key References that where declared in the MySQL database" - (pgstate-add-table state dbname label) + (pgstate-add-table state (db-name pgconn) label) (loop for (table-name . fkeys) in all-fkeys do (loop for fkey in fkeys for sql = (format-pgsql-create-fkey fkey) do (log-message :notice "~a;" sql) - (pgsql-execute-with-timing dbname "Foreign Keys" sql state)))) + (pgsql-execute-with-timing "Foreign Keys" sql state)))) ;;; @@ -195,9 +194,9 @@ (pgsql-execute sql :client-min-messages client-min-messages) finally (return nb-tables))) -(defun truncate-tables (dbname table-name-list) +(defun truncate-tables (pgconn table-name-list) "Truncate given TABLE-NAME in database DBNAME" - (with-pgsql-transaction (:dbname dbname) + (with-pgsql-transaction (:pgconn pgconn) (let ((sql (format nil "TRUNCATE ~{~a~^,~};" (loop :for table-name :in table-name-list :collect (apply-identifier-case table-name))))) @@ -251,7 +250,7 @@ ;;; ;;; Parallel index building. ;;; -(defun create-indexes-in-kernel (dbname indexes kernel channel +(defun create-indexes-in-kernel (pgconn indexes kernel channel &key state (label "Create Indexes")) @@ -260,7 +259,7 @@ copying." (let* ((lp:*kernel* kernel)) ;; ensure we have a stats entry - (pgstate-add-table state dbname label) + (pgstate-add-table state (db-name pgconn) label) (loop :for index :in indexes @@ -269,8 +268,11 @@ (format-pgsql-create-index index) (log-message :notice "~a" sql) - (lp:submit-task channel #'pgsql-execute-with-timing - dbname label sql state) + (lp:submit-task channel + #'pgsql-connect-and-execute-with-timing + ;; each thread must have its own connection + (new-pgsql-connection pgconn) + label sql state) ;; return the pkey "upgrade" statement pkey)))) @@ -293,19 +295,18 @@ for table-oid = (cdr (assoc table-name table-oids :test #'string=)) unless table-oid do (error "OID not found for ~s." table-name) do (loop for index in indexes - do (setf (pgloader.pgsql::pgsql-index-table-oid index) table-oid))))) + do (setf (pgsql-index-table-oid index) table-oid))))) ;;; ;;; Sequences ;;; -(defun reset-sequences (table-names - &key (dbname (pgconn-dbname)) state) +(defun reset-sequences (table-names &key pgconn state) "Reset all sequences created during this MySQL migration." (log-message :notice "Reset sequences") (with-stats-collection ("Reset Sequences" - :dbname dbname + :dbname (db-name pgconn) :use-result-as-rows t :state state) - (reset-all-sequences dbname :tables table-names))) + (reset-all-sequences pgconn :tables table-names))) diff --git a/src/sources.lisp b/src/sources.lisp index 10f7615..6e69a0d 100644 --- a/src/sources.lisp +++ b/src/sources.lisp @@ -3,21 +3,20 @@ ;;; (in-package :pgloader.sources) -(define-condition connection-error (error) - ((type :initarg :type :reader connection-error-type) - (mesg :initarg :mesg :reader connection-error-mesg) - (host :initarg :host :reader connection-error-host) - (port :initarg :port :reader connection-error-port) - (user :initarg :user :reader connection-error-user)) - (:report (lambda (err stream) - (format stream "Failed to connect to ~a at ~s ~@[(port ~d)~]~@[ as user ~s: ~a~]" - (connection-error-type err) - (connection-error-host err) - (connection-error-port err) - (connection-error-user err) - (connection-error-mesg err))))) +;; (defgeneric list-all-columns (connection &key) +;; (:documentation "Discover all columns in CONNECTION source.")) -;; Abstract classes to define the API with +;; (defgeneric list-all-indexes (connection &key) +;; (:documentation "Discover all indexes in CONNECTION source.")) + +;; (defgeneric list-all-fkeys (connection &key) +;; (:documentation "Discover all foreign keys in CONNECTION source.")) + +;; (defgeneric fetch-metadata (connection &key) +;; (:documentation "Full discovery of the CONNECTION data source.")) + + +;; Abstract classes to define the data loading API with ;; ;; The source name might be a table name (database server source) or a ;; filename (csv, dbase, etc), or something else entirely, like e.g. mongodb @@ -83,8 +82,7 @@ create-tables include-drop create-indexes - reset-sequences - only-tables) + reset-sequences) (:documentation "Auto-discover source schema, convert it to PostgreSQL, migrate the data from the source definition to PostgreSQL for all the discovered diff --git a/src/sources/csv/csv-database.lisp b/src/sources/csv/csv-database.lisp new file mode 100644 index 0000000..49b8432 --- /dev/null +++ b/src/sources/csv/csv-database.lisp @@ -0,0 +1,43 @@ +;;; +;;; Experimental code, used to be used a long time ago, before this lisp +;;; code became pgloader. The idea is to use it again sometimes, someway. +;;; +(in-package #:pgloader.csv) + +;;; +;;; When you exported a whole database as a bunch of CSV files to be found +;;; in the same directory, each file name being the name of the target +;;; table, then this function allows to import them all at once. +;;; +;;; TODO: expose it from the command language, and test it. +;;; +(defun import-database (dbname + &key + (csv-path-root *csv-path-root*) + (skip-lines 0) + (separator #\Tab) + (quote cl-csv:*quote*) + (escape cl-csv:*quote-escape*) + (truncate t) + only-tables) + "Export MySQL data and Import it into PostgreSQL" + (let ((*state* (pgloader.utils:make-pgstate))) + (report-header) + (loop + for (table-name . date-columns) in (pgloader.pgsql:list-tables dbname) + for filename = (get-pathname dbname table-name + :csv-path-root csv-path-root) + when (or (null only-tables) + (member table-name only-tables :test #'equal)) + do + (let ((source (make-instance 'copy-csv + :target-db dbname + :source (list :filename filename) + :target table-name + :skip-lines skip-lines + :separator separator + :quote quote + :escape escape))) + (copy-from source :truncate truncate)) + finally + (report-pgstate-stats *state* "Total import time")))) diff --git a/src/sources/csv/csv-guess.lisp b/src/sources/csv/csv-guess.lisp new file mode 100644 index 0000000..8238b85 --- /dev/null +++ b/src/sources/csv/csv-guess.lisp @@ -0,0 +1,60 @@ +;;; +;;; Automatic guess the CSV format parameters +;;; +(in-package #:pgloader.csv) + +(defparameter *separators* '(#\Tab #\, #\; #\| #\% #\^ #\! #\$) + "Common CSV separators to try when guessing file parameters.") + +(defparameter *escape-quotes* '("\\\"" "\"\"") + "Common CSV quotes to try when guessing file parameters.") + +(defun get-file-sample (filename &key (sample-size 10)) + "Return the first SAMPLE-SIZE lines in FILENAME (or less), or nil if the + file does not exists." + (with-open-file + ;; we just ignore files that don't exist + (input filename + :direction :input + :external-format :utf-8 + :if-does-not-exist nil) + (when input + (loop + for line = (read-line input nil) + while line + repeat sample-size + collect line)))) + +(defun try-csv-params (lines cols &key separator quote escape) + "Read LINES as CSV with SEPARATOR and ESCAPE params, and return T when + each line in LINES then contains exactly COLS columns" + (let ((rows (loop + for line in lines + append + (handler-case + (cl-csv:read-csv line + :quote quote + :separator separator + :escape escape) + ((or cl-csv:csv-parse-error type-error) () + nil))))) + (and rows + (every (lambda (row) (= cols (length row))) rows)))) + +(defun guess-csv-params (filename cols &key (sample-size 10)) + "Try a bunch of field separators with LINES and return the first one that + returns COLS number of columns" + + (let ((sample (get-file-sample filename :sample-size sample-size))) + (loop + for sep in *separators* + for esc = (loop + for escape in *escape-quotes* + when (try-csv-params sample cols + :quote #\" + :separator sep + :escape escape) + do (return escape)) + when esc + do (return (list :separator sep :quote #\" :escape esc))))) + diff --git a/src/sources/csv.lisp b/src/sources/csv/csv.lisp similarity index 60% rename from src/sources/csv.lisp rename to src/sources/csv/csv.lisp index 17fc6bc..031fff8 100644 --- a/src/sources/csv.lisp +++ b/src/sources/csv/csv.lisp @@ -4,6 +4,33 @@ (in-package :pgloader.csv) +(defclass csv-connection (fd-connection) + ((specs :initarg :specs :accessor csv-specs))) + +(defmethod initialize-instance :after ((csvconn csv-connection) &key) + "Assign the type slot to sqlite." + (setf (slot-value csvconn 'type) "csv")) + +(defmethod print-object ((csv csv-connection) stream) + (print-unreadable-object (csv stream :type t :identity t) + (let ((specs (if (slot-boundp csv 'specs) (slot-value csv 'specs) + `(:http ,(slot-value csv 'pgloader.connection::uri))))) + (with-slots (type) csv + (format stream "~a://~a:~a" type (first specs) (second specs)))))) + +(defmethod expand :after ((csv csv-connection)) + "Expand the archive for the FD connection." + (when (and (slot-boundp csv 'pgloader.connection::path) + (slot-value csv 'pgloader.connection::path) + (uiop:file-pathname-p (fd-path csv))) + (setf (csv-specs csv) `(:filename ,(fd-path csv))))) + +(defmethod fetch-file :after ((csv csv-connection)) + "When the fd-connection has an URI slot, download its file." + (when (and (slot-boundp csv 'pgloader.connection::path) + (slot-value csv 'pgloader.connection::path)) + (setf (csv-specs csv) `(:filename ,(fd-path csv))))) + ;;; ;;; Implementing the pgloader source API ;;; @@ -35,7 +62,7 @@ (defmethod initialize-instance :after ((csv copy-csv) &key) "Compute the real source definition from the given source parameter, and set the transforms function list as needed too." - (let ((source (slot-value csv 'source))) + (let ((source (csv-specs (slot-value csv 'source)))) (setf (slot-value csv 'source-type) (car source)) (setf (slot-value csv 'source) (get-absolute-pathname source))) @@ -65,9 +92,9 @@ Finally returns how many rows where read and processed." (let ((filenames (case (source-type csv) (:stdin (list (source csv))) - (:inline (list (car (source csv)))) - (:regex (source csv)) - (t (list (source csv)))))) + (:inline (list (car (source csv)))) + (:regex (source csv)) + (t (list (source csv)))))) (loop for filename in filenames do (with-open-file-or-stream @@ -121,19 +148,20 @@ (*state* (or *state* (pgloader.utils:make-pgstate))) (lp:*kernel* (make-kernel 2)) (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*)) - (dbname (target-db csv)) - (table-name (target csv))) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) - (with-stats-collection (table-name :state *state* :summary summary) + (with-stats-collection ((target csv) + :dbname (db-name (target-db csv)) + :state *state* :summary summary) (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) - (log-message :notice "COPY ~a.~a" dbname table-name) + (log-message :notice "COPY ~a" (target csv)) (lp:submit-task channel #'copy-to-queue csv queue) ;; and start another task to push that data from the queue to PostgreSQL (lp:submit-task channel ;; this function update :rows stats - #'pgloader.pgsql:copy-from-queue dbname table-name queue + #'pgloader.pgsql:copy-from-queue + (target-db csv) (target csv) queue ;; we only are interested into the column names here :columns (mapcar (lambda (col) ;; always double quote column names @@ -144,100 +172,3 @@ ;; now wait until both the tasks are over (loop for tasks below 2 do (lp:receive-result channel) finally (lp:end-kernel)))))) - -;;; -;;; When you exported a whole database as a bunch of CSV files to be found -;;; in the same directory, each file name being the name of the target -;;; table, then this function allows to import them all at once. -;;; -;;; TODO: expose it from the command language, and test it. -;;; -(defun import-database (dbname - &key - (csv-path-root *csv-path-root*) - (skip-lines 0) - (separator #\Tab) - (quote cl-csv:*quote*) - (escape cl-csv:*quote-escape*) - (truncate t) - only-tables) - "Export MySQL data and Import it into PostgreSQL" - (let ((*state* (pgloader.utils:make-pgstate))) - (report-header) - (loop - for (table-name . date-columns) in (pgloader.pgsql:list-tables dbname) - for filename = (get-pathname dbname table-name - :csv-path-root csv-path-root) - when (or (null only-tables) - (member table-name only-tables :test #'equal)) - do - (let ((source (make-instance 'copy-csv - :target-db dbname - :source (list :filename filename) - :target table-name - :skip-lines skip-lines - :separator separator - :quote quote - :escape escape))) - (copy-from source :truncate truncate)) - finally - (report-pgstate-stats *state* "Total import time")))) - -;;; -;;; Automatic guess the CSV format parameters -;;; -(defparameter *separators* '(#\Tab #\, #\; #\| #\% #\^ #\! #\$) - "Common CSV separators to try when guessing file parameters.") - -(defparameter *escape-quotes* '("\\\"" "\"\"") - "Common CSV quotes to try when guessing file parameters.") - -(defun get-file-sample (filename &key (sample-size 10)) - "Return the first SAMPLE-SIZE lines in FILENAME (or less), or nil if the - file does not exists." - (with-open-file - ;; we just ignore files that don't exist - (input filename - :direction :input - :external-format :utf-8 - :if-does-not-exist nil) - (when input - (loop - for line = (read-line input nil) - while line - repeat sample-size - collect line)))) - -(defun try-csv-params (lines cols &key separator quote escape) - "Read LINES as CSV with SEPARATOR and ESCAPE params, and return T when - each line in LINES then contains exactly COLS columns" - (let ((rows (loop - for line in lines - append - (handler-case - (cl-csv:read-csv line - :quote quote - :separator separator - :escape escape) - ((or cl-csv:csv-parse-error type-error) () - nil))))) - (and rows - (every (lambda (row) (= cols (length row))) rows)))) - -(defun guess-csv-params (filename cols &key (sample-size 10)) - "Try a bunch of field separators with LINES and return the first one that - returns COLS number of columns" - - (let ((sample (get-file-sample filename :sample-size sample-size))) - (loop - for sep in *separators* - for esc = (loop - for escape in *escape-quotes* - when (try-csv-params sample cols - :quote #\" - :separator sep - :escape escape) - do (return escape)) - when esc - do (return (list :separator sep :quote #\" :escape esc))))) - diff --git a/src/sources/db3.lisp b/src/sources/db3.lisp deleted file mode 100644 index e84a5f5..0000000 --- a/src/sources/db3.lisp +++ /dev/null @@ -1,177 +0,0 @@ -;;; -;;; Tools to handle the DBF file format -;;; - -(in-package :pgloader.db3) - -(defvar *db3-pgsql-type-mapping* - '(("C" . "text") ; ignore field-length - ("N" . "numeric") ; handle both integers and floats - ("L" . "boolean") ; PostgreSQL compatible representation - ("D" . "date") ; no TimeZone in DB3 files - ("M" . "text"))) ; not handled yet - -(defstruct (db3-field - (:constructor make-db3-field (name type length))) - name type length) - -(defmethod format-pgsql-column ((col db3-field)) - "Return a string representing the PostgreSQL column definition." - (let* ((column-name - (apply-identifier-case (db3-field-name col))) - (type-definition - (cdr (assoc (db3-field-type col) - *db3-pgsql-type-mapping* - :test #'string=)))) - (format nil "~a ~22t ~a" column-name type-definition))) - -(defun list-all-columns (db3-file-name - &optional (table-name (pathname-name db3-file-name))) - "Return the list of columns for the given DB3-FILE-NAME." - (with-open-file (stream db3-file-name - :direction :input - :element-type '(unsigned-byte 8)) - (let ((db3 (make-instance 'db3:db3))) - (db3:load-header db3 stream) - (list - (cons table-name - (loop - for field in (db3::fields db3) - collect (make-db3-field (db3::field-name field) - (db3::field-type field) - (db3::field-length field)))))))) - -(declaim (inline logical-to-boolean - db3-trim-string - db3-date-to-pgsql-date)) - -(defun logical-to-boolean (value) - "Convert a DB3 logical value to a PostgreSQL boolean." - (if (string= value "?") nil value)) - -(defun db3-trim-string (value) - "DB3 Strings a right padded with spaces, fix that." - (string-right-trim '(#\Space) value)) - -(defun db3-date-to-pgsql-date (value) - "Convert a DB3 date to a PostgreSQL date." - (let ((year (subseq value 0 4)) - (month (subseq value 4 6)) - (day (subseq value 6 8))) - (format nil "~a-~a-~a" year month day))) - -(defun list-transforms (input) - "Return the list of transforms to apply to each row of data in order to - convert values to PostgreSQL format" - (with-open-file (stream input - :direction :input - :element-type '(unsigned-byte 8)) - (let ((db3 (make-instance 'db3:db3))) - (db3:load-header db3 stream) - (loop - for field in (db3::fields db3) - for type = (db3::field-type field) - collect - (cond ((string= type "L") #'logical-to-boolean) - ((string= type "C") #'db3-trim-string) - ((string= type "D") #'db3-date-to-pgsql-date) - (t nil)))))) - - -;;; -;;; Integration with pgloader -;;; -(defclass copy-db3 (copy) () - (:documentation "pgloader DBF Data Source")) - -(defmethod initialize-instance :after ((db3 copy-db3) &key) - "Add a default value for transforms in case it's not been provided." - (let ((transforms (when (slot-boundp db3 'transforms) - (slot-value db3 'transforms)))) - (unless transforms - (setf (slot-value db3 'transforms) - (list-transforms (source db3)))))) - -(defmethod map-rows ((copy-db3 copy-db3) &key process-row-fn) - "Extract DB3 data and call PROCESS-ROW-FN function with a single - argument (a list of column values) for each row." - (with-open-file (stream (source copy-db3) - :direction :input - :element-type '(unsigned-byte 8)) - (let ((db3 (make-instance 'db3:db3))) - (db3:load-header db3 stream) - (loop - with count = (db3:record-count db3) - repeat count - for row-array = (db3:load-record db3 stream) - do (funcall process-row-fn row-array) - finally (return count))))) - -(defmethod copy-to ((db3 copy-db3) pgsql-copy-filename) - "Extract data from DB3 file into a PotgreSQL COPY TEXT formated file" - (with-open-file (text-file pgsql-copy-filename - :direction :output - :if-exists :supersede - :external-format :utf-8) - (let ((transforms (list-transforms (source db3)))) - (map-rows db3 - :process-row-fn - (lambda (row) - (format-vector-row text-file row transforms)))))) - -(defmethod copy-to-queue ((db3 copy-db3) queue) - "Copy data from DB3 file FILENAME into queue DATAQ" - (let ((read (pgloader.queue:map-push-queue db3 queue))) - (pgstate-incf *state* (target db3) :read read))) - -(defmethod copy-from ((db3 copy-db3) - &key - table-name - state-before - (truncate t) - (create-table t)) - "Open the DB3 and stream its content to a PostgreSQL database." - (let* ((summary (null *state*)) - (*state* (or *state* (make-pgstate))) - (dbname (target-db db3)) - (table-name (or table-name - (target db3) - (pathname-name (source db3))))) - - ;; fix the table-name in the db3 object - (setf (target db3) table-name) - - (with-stats-collection ("create, truncate" :state state-before :summary summary) - (with-pgsql-transaction () - (when create-table - (log-message :notice "Create table \"~a\"" table-name) - (create-tables (list-all-columns (source db3) table-name) - :if-not-exists t)) - - (when (and truncate (not create-table)) - ;; we don't TRUNCATE a table we just CREATEd - (let ((truncate-sql (format nil "TRUNCATE ~a;" table-name))) - (log-message :notice "~a" truncate-sql) - (pgsql-execute truncate-sql))))) - - (let* ((lp:*kernel* (make-kernel 2)) - (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) - - (with-stats-collection (table-name :state *state* :summary summary) - (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) - (log-message :notice "COPY \"~a\" from '~a'" (target db3) (source db3)) - (lp:submit-task channel #'copy-to-queue db3 queue) - - ;; and start another task to push that data from the queue to PostgreSQL - (lp:submit-task channel - #'pgloader.pgsql:copy-from-queue - dbname table-name queue - :truncate truncate) - - ;; now wait until both the tasks are over, and kill the kernel - (loop for tasks below 2 do (lp:receive-result channel) - finally - (log-message :info "COPY \"~a\" done." table-name) - (lp:end-kernel))))))) - diff --git a/src/sources/db3/db3-schema.lisp b/src/sources/db3/db3-schema.lisp new file mode 100644 index 0000000..7f397d3 --- /dev/null +++ b/src/sources/db3/db3-schema.lisp @@ -0,0 +1,91 @@ +;;; +;;; Tools to handle the DBF file format +;;; + +(in-package :pgloader.db3) + +(defclass dbf-connection (fd-connection) + ((db3 :initarg db3 :accessor fd-db3)) + (:documentation "pgloader connection parameters for DBF files.")) + +(defmethod initialize-instance :after ((dbfconn dbf-connection) &key) + "Assign the type slot to dbf." + (setf (slot-value dbfconn 'type) "dbf")) + +(defmethod open-connection ((dbfconn dbf-connection) &key) + (setf (conn-handle dbfconn) + (open (fd-path dbfconn) + :direction :input + :element-type '(unsigned-byte 8))) + (let ((db3 (make-instance 'db3:db3))) + (db3:load-header db3 (conn-handle dbfconn)) + (setf (fd-db3 dbfconn) db3)) + dbfconn) + +(defmethod close-connection ((dbfconn dbf-connection)) + (close (conn-handle dbfconn)) + (setf (conn-handle dbfconn) nil + (fd-db3 dbfconn) nil) + dbfconn) + +(defvar *db3-pgsql-type-mapping* + '(("C" . "text") ; ignore field-length + ("N" . "numeric") ; handle both integers and floats + ("L" . "boolean") ; PostgreSQL compatible representation + ("D" . "date") ; no TimeZone in DB3 files + ("M" . "text"))) ; not handled yet + +(defstruct (db3-field + (:constructor make-db3-field (name type length))) + name type length) + +(defmethod format-pgsql-column ((col db3-field)) + "Return a string representing the PostgreSQL column definition." + (let* ((column-name + (apply-identifier-case (db3-field-name col))) + (type-definition + (cdr (assoc (db3-field-type col) + *db3-pgsql-type-mapping* + :test #'string=)))) + (format nil "~a ~22t ~a" column-name type-definition))) + +(defun list-all-columns (db3 table-name) + "Return the list of columns for the given DB3-FILE-NAME." + (list + (cons table-name + (loop + for field in (db3::fields db3) + collect (make-db3-field (db3::field-name field) + (db3::field-type field) + (db3::field-length field)))))) + +(declaim (inline logical-to-boolean + db3-trim-string + db3-date-to-pgsql-date)) + +(defun logical-to-boolean (value) + "Convert a DB3 logical value to a PostgreSQL boolean." + (if (string= value "?") nil value)) + +(defun db3-trim-string (value) + "DB3 Strings a right padded with spaces, fix that." + (string-right-trim '(#\Space) value)) + +(defun db3-date-to-pgsql-date (value) + "Convert a DB3 date to a PostgreSQL date." + (let ((year (subseq value 0 4)) + (month (subseq value 4 6)) + (day (subseq value 6 8))) + (format nil "~a-~a-~a" year month day))) + +(defun list-transforms (db3) + "Return the list of transforms to apply to each row of data in order to + convert values to PostgreSQL format" + (loop + for field in (db3::fields db3) + for type = (db3::field-type field) + collect + (cond ((string= type "L") #'logical-to-boolean) + ((string= type "C") #'db3-trim-string) + ((string= type "D") #'db3-date-to-pgsql-date) + (t nil)))) diff --git a/src/sources/db3/db3.lisp b/src/sources/db3/db3.lisp new file mode 100644 index 0000000..9633080 --- /dev/null +++ b/src/sources/db3/db3.lisp @@ -0,0 +1,130 @@ +;;; +;;; Tools to handle the DBF file format +;;; + +(in-package :pgloader.db3) + +;;; +;;; Integration with pgloader +;;; +(defclass copy-db3 (copy) () + (:documentation "pgloader DBF Data Source")) + +(defmethod initialize-instance :after ((db3 copy-db3) &key) + "Add a default value for transforms in case it's not been provided." + (setf (slot-value db3 'source) (pathname-name (fd-path (source-db db3)))) + + (with-connection (conn (source-db db3)) + (unless (and (slot-boundp db3 'columns) (slot-value db3 'columns)) + (setf (slot-value db3 'columns) + (list-all-columns (fd-db3 conn) (source db3)))) + + (let ((transforms (when (slot-boundp db3 'transforms) + (slot-value db3 'transforms)))) + (unless transforms + (setf (slot-value db3 'transforms) + (list-transforms (fd-db3 conn))))))) + +(defmethod map-rows ((copy-db3 copy-db3) &key process-row-fn) + "Extract DB3 data and call PROCESS-ROW-FN function with a single + argument (a list of column values) for each row." + (with-connection (conn (source-db copy-db3)) + (let ((stream (conn-handle (source-db copy-db3))) + (db3 (fd-db3 (source-db copy-db3)))) + (loop + :with count := (db3:record-count db3) + :repeat count + :for row-array := (db3:load-record db3 stream) + :do (funcall process-row-fn row-array) + :finally (return count))))) + +(defmethod copy-to ((db3 copy-db3) pgsql-copy-filename) + "Extract data from DB3 file into a PotgreSQL COPY TEXT formated file" + (with-open-file (text-file pgsql-copy-filename + :direction :output + :if-exists :supersede + :external-format :utf-8) + (let ((transforms (list-transforms (source db3)))) + (map-rows db3 + :process-row-fn + (lambda (row) + (format-vector-row text-file row transforms)))))) + +(defmethod copy-to-queue ((db3 copy-db3) queue) + "Copy data from DB3 file FILENAME into queue DATAQ" + (let ((read (pgloader.queue:map-push-queue db3 queue))) + (pgstate-incf *state* (target db3) :read read))) + +(defmethod copy-from ((db3 copy-db3) &key (kernel nil k-s-p) truncate) + (let* ((summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (lp:*kernel* (or kernel (make-kernel 2))) + (channel (lp:make-channel)) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) + + (with-stats-collection ((target db3) + :dbname (db-name (target-db db3)) + :state *state* + :summary summary) + (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) + (log-message :notice "COPY \"~a\" from '~a'" (target db3) (source db3)) + (lp:submit-task channel #'copy-to-queue db3 queue) + + ;; and start another task to push that data from the queue to PostgreSQL + (lp:submit-task channel + #'pgloader.pgsql:copy-from-queue + (target-db db3) (target db3) queue + :truncate truncate) + + ;; now wait until both the tasks are over, and kill the kernel + (loop for tasks below 2 do (lp:receive-result channel) + finally + (log-message :info "COPY \"~a\" done." (target db3)) + (unless k-s-p (lp:end-kernel))))))) + +(defmethod copy-database ((db3 copy-db3) + &key + table-name + state-before + data-only + schema-only + (truncate t) + (create-tables t) + (include-drop t) + (create-indexes t) + (reset-sequences t)) + "Open the DB3 and stream its content to a PostgreSQL database." + (declare (ignore create-indexes reset-sequences)) + (let* ((summary (null *state*)) + (*state* (or *state* (make-pgstate))) + (table-name (or table-name + (target db3) + (source db3)))) + + ;; fix the table-name in the db3 object + (setf (target db3) table-name) + + (handler-case + (when (and (or create-tables schema-only (not data-only))) + (with-stats-collection ("create, truncate" + :state state-before + :summary summary) + (with-pgsql-transaction (:pgconn (target-db db3)) + (when create-tables + (log-message :notice "Create table \"~a\"" table-name) + (create-tables (columns db3) + :include-drop include-drop + :if-not-exists t))))) + + (cl-postgres::database-errors (e) + (declare (ignore e)) ; a log has already been printed + (log-message :fatal "Failed to create the schema, see above.") + (return-from copy-database))) + + (unless schema-only + (copy-from db3 :truncate truncate)) + + ;; and report the total time spent on the operation + (when summary + (report-full-summary "Total streaming time" *state* + :before state-before)))) diff --git a/src/sources/fixed.lisp b/src/sources/fixed.lisp index 4bea1ca..c413d1c 100644 --- a/src/sources/fixed.lisp +++ b/src/sources/fixed.lisp @@ -4,6 +4,8 @@ (in-package :pgloader.fixed) +(defclass fixed-connection (csv-connection) ()) + (defclass copy-fixed (copy) ((source-type :accessor source-type ; one of :inline, :stdin, :regex :initarg :source-type) ; or :filename @@ -17,7 +19,7 @@ (defmethod initialize-instance :after ((fixed copy-fixed) &key) "Compute the real source definition from the given source parameter, and set the transforms function list as needed too." - (let ((source (slot-value fixed 'source))) + (let ((source (csv-specs (slot-value fixed 'source)))) (setf (slot-value fixed 'source-type) (car source)) (setf (slot-value fixed 'source) (get-absolute-pathname source))) @@ -103,19 +105,21 @@ (*state* (or *state* (pgloader.utils:make-pgstate))) (lp:*kernel* (make-kernel 2)) (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*)) - (dbname (target-db fixed)) - (table-name (target fixed))) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) - (with-stats-collection (table-name :state *state* :summary summary) + (with-stats-collection ((target fixed) + :dbname (db-name (target-db fixed)) + :state *state* + :summary summary) (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) - (log-message :notice "COPY ~a.~a" dbname table-name) + (log-message :notice "COPY ~a" (target fixed)) (lp:submit-task channel #'copy-to-queue fixed queue) ;; and start another task to push that data from the queue to PostgreSQL (lp:submit-task channel ;; this function update :rows stats - #'pgloader.pgsql:copy-from-queue dbname table-name queue + #'pgloader.pgsql:copy-from-queue + (target-db fixed) (target fixed) queue ;; we only are interested into the column names here :columns (mapcar (lambda (col) ;; always double quote column names diff --git a/src/sources/ixf.lisp b/src/sources/ixf.lisp deleted file mode 100644 index 3cb3874..0000000 --- a/src/sources/ixf.lisp +++ /dev/null @@ -1,157 +0,0 @@ -;;; -;;; Tools to handle IBM PC version of IXF file format -;;; -;;; http://www-01.ibm.com/support/knowledgecenter/SSEPGG_10.5.0/com.ibm.db2.luw.admin.dm.doc/doc/r0004667.html - -(in-package :pgloader.ixf) - -(defvar *ixf-pgsql-type-mapping* - '((#. ixf:+smallint+ . "smallint") - (#. ixf:+integer+ . "integer") - (#. ixf:+bigint+ . "bigint") - - (#. ixf:+decimal+ . "numeric") - (#. ixf:+float+ . "double precision") - - (#. ixf:+timestamp+ . "timestamptz") - (#. ixf:+date+ . "date") - (#. ixf:+time+ . "time") - - (#. ixf:+char+ . "text") - (#. ixf:+varchar+ . "text"))) - -(defun cast-ixf-type (ixf-type) - "Return the PostgreSQL type name for a given IXF type name." - (cdr (assoc ixf-type *ixf-pgsql-type-mapping*))) - -(defmethod format-pgsql-column ((col ixf:ixf-column)) - "Return a string reprensenting the PostgreSQL column definition" - (let* ((column-name (apply-identifier-case (ixf:ixf-column-name col))) - (type-definition - (format nil - "~a~:[ not null~;~]~:[~*~; default ~a~]" - (cast-ixf-type (ixf:ixf-column-type col)) - (ixf:ixf-column-nullable col) - (ixf:ixf-column-has-default col) - (ixf:ixf-column-default col)))) - - (format nil "~a ~22t ~a" column-name type-definition))) - -(defun list-all-columns (ixf-file-name - &optional (table-name (pathname-name ixf-file-name))) - "Return the list of columns for the given IXF-FILE-NAME." - (ixf:with-ixf-file ixf-file-name - (let ((ixf (ixf:read-headers))) - (list (cons table-name - (coerce (ixf:ixf-table-columns (ixf:ixf-file-table ixf)) - 'list)))))) - - -;;; -;;; Integration with pgloader -;;; -(defclass copy-ixf (copy) () - (:documentation "pgloader IXF Data Source")) - -(defmethod initialize-instance :after ((source copy-ixf) &key) - "Add a default value for transforms in case it's not been provided." - (let* ((fields (or (and (slot-boundp source 'fields) - (slot-value source 'fields)) - (cdar (list-all-columns (source source))))) - - (transforms (when (slot-boundp source 'transforms) - (slot-value source 'transforms)))) - - (when fields - (unless (slot-boundp source 'fields) - (setf (slot-value source 'fields) fields)) - - (unless transforms - (setf (slot-value source 'transforms) - (loop :for field :in fields - :collect - (let ((coltype (cast-ixf-type (ixf:ixf-column-type field)))) - ;; - ;; The IXF driver we use maps the data type and gets - ;; back proper CL typed objects, where we only want to - ;; deal with text. - ;; - (cond ((or (string-equal "float" coltype) - (string-equal "real" coltype) - (string-equal "double precision" coltype) - (and (<= 7 (length coltype)) - (string-equal "numeric" coltype :end2 7))) - #'pgloader.transforms::float-to-string) - - ((string-equal "text" coltype) - nil) - - ((string-equal "bytea" coltype) - #'pgloader.transforms::byte-vector-to-bytea) - - (t - (compile nil (lambda (c) - (when c - (format nil "~a" c))))))))))))) - -(defmethod map-rows ((copy-ixf copy-ixf) &key process-row-fn) - "Extract IXF data and call PROCESS-ROW-FN function with a single - argument (a list of column values) for each row." - (ixf:with-ixf-file (source copy-ixf) - (let ((ixf (ixf:read-headers)) - (row-fn (lambda (row) - (pgstate-incf *state* (target copy-ixf) :read 1) - (funcall process-row-fn row)))) - (ixf:map-data ixf row-fn)))) - -(defmethod copy-to-queue ((ixf copy-ixf) queue) - "Copy data from IXF file FILENAME into queue DATAQ" - (let ((read (pgloader.queue:map-push-queue ixf queue))) - (pgstate-incf *state* (target ixf) :read read))) - -(defmethod copy-from ((ixf copy-ixf) - &key state-before truncate create-table table-name) - "Open the IXF and stream its content to a PostgreSQL database." - (let* ((summary (null *state*)) - (*state* (or *state* (make-pgstate))) - (dbname (target-db ixf)) - (table-name (or table-name - (target ixf) - (pathname-name (source ixf))))) - - ;; fix the table-name in the ixf object - (setf (target ixf) table-name) - - (with-stats-collection ("create, truncate" :state state-before :summary summary) - (with-pgsql-transaction () - (when create-table - (log-message :notice "Create table \"~a\"" table-name) - (create-tables (list (cons table-name (fields ixf))) :if-not-exists t)) - - (when (and truncate (not create-table)) - ;; we don't TRUNCATE a table we just CREATEd - (let ((truncate-sql (format nil "TRUNCATE ~a;" table-name))) - (log-message :notice "~a" truncate-sql) - (pgsql-execute truncate-sql))))) - - (let* ((lp:*kernel* (make-kernel 2)) - (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) - - (with-stats-collection (table-name :state *state* :summary summary) - (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) - (log-message :notice "COPY \"~a\" from '~a'" (target ixf) (source ixf)) - (lp:submit-task channel #'copy-to-queue ixf queue) - - ;; and start another task to push that data from the queue to PostgreSQL - (lp:submit-task channel - #'pgloader.pgsql:copy-from-queue - dbname table-name queue - :truncate truncate) - - ;; now wait until both the tasks are over, and kill the kernel - (loop for tasks below 2 do (lp:receive-result channel) - finally - (log-message :info "COPY \"~a\" done." table-name) - (lp:end-kernel))))))) - diff --git a/src/sources/ixf/ixf-schema.lisp b/src/sources/ixf/ixf-schema.lisp new file mode 100644 index 0000000..fb37663 --- /dev/null +++ b/src/sources/ixf/ixf-schema.lisp @@ -0,0 +1,65 @@ +;;; +;;; Tools to handle IBM PC version of IXF file format +;;; +;;; http://www-01.ibm.com/support/knowledgecenter/SSEPGG_10.5.0/com.ibm.db2.luw.admin.dm.doc/doc/r0004667.html + +(in-package :pgloader.ixf) + +(defclass ixf-connection (fd-connection) () + (:documentation "pgloader connection parameters for IXF files.")) + +(defmethod initialize-instance :after ((ixfconn ixf-connection) &key) + "Assign the type slot to dbf." + (setf (slot-value ixfconn 'type) "ixf")) + +(defmethod open-connection ((ixfconn ixf-connection) &key) + (setf (conn-handle ixfconn) + (open (fd-path ixfconn) + :direction :input + :element-type '(unsigned-byte 8))) + ixfconn) + +(defmethod close-connection ((ixfconn ixf-connection)) + (close (conn-handle ixfconn)) + (setf (conn-handle ixfconn) nil) + ixfconn) + +(defvar *ixf-pgsql-type-mapping* + '((#. ixf:+smallint+ . "smallint") + (#. ixf:+integer+ . "integer") + (#. ixf:+bigint+ . "bigint") + + (#. ixf:+decimal+ . "numeric") + (#. ixf:+float+ . "double precision") + + (#. ixf:+timestamp+ . "timestamptz") + (#. ixf:+date+ . "date") + (#. ixf:+time+ . "time") + + (#. ixf:+char+ . "text") + (#. ixf:+varchar+ . "text"))) + +(defun cast-ixf-type (ixf-type) + "Return the PostgreSQL type name for a given IXF type name." + (cdr (assoc ixf-type *ixf-pgsql-type-mapping*))) + +(defmethod format-pgsql-column ((col ixf:ixf-column)) + "Return a string reprensenting the PostgreSQL column definition" + (let* ((column-name (apply-identifier-case (ixf:ixf-column-name col))) + (type-definition + (format nil + "~a~:[ not null~;~]~:[~*~; default ~a~]" + (cast-ixf-type (ixf:ixf-column-type col)) + (ixf:ixf-column-nullable col) + (ixf:ixf-column-has-default col) + (ixf:ixf-column-default col)))) + + (format nil "~a ~22t ~a" column-name type-definition))) + +(defun list-all-columns (ixf-stream table-name) + "Return the list of columns for the given IXF-FILE-NAME." + (let ((ixf:*ixf-stream* ixf-stream)) + (let ((ixf (ixf:read-headers))) + (list (cons table-name + (coerce (ixf:ixf-table-columns (ixf:ixf-file-table ixf)) + 'list)))))) diff --git a/src/sources/ixf/ixf.lisp b/src/sources/ixf/ixf.lisp new file mode 100644 index 0000000..da0f04f --- /dev/null +++ b/src/sources/ixf/ixf.lisp @@ -0,0 +1,143 @@ +;;; +;;; Tools to handle IBM PC version of IXF file format +;;; +;;; http://www-01.ibm.com/support/knowledgecenter/SSEPGG_10.5.0/com.ibm.db2.luw.admin.dm.doc/doc/r0004667.html + +(in-package :pgloader.ixf) + +;;; +;;; Integration with pgloader +;;; +(defclass copy-ixf (copy) () + (:documentation "pgloader IXF Data Source")) + +(defmethod initialize-instance :after ((source copy-ixf) &key) + "Add a default value for transforms in case it's not been provided." + (setf (slot-value source 'source) + (pathname-name (fd-path (source-db source)))) + + (with-connection (conn (source-db source)) + (unless (and (slot-boundp source 'columns) (slot-value source 'columns)) + (setf (slot-value source 'columns) + (list-all-columns (conn-handle conn) (source source)))) + + (let ((transforms (when (slot-boundp source 'transforms) + (slot-value source 'transforms)))) + (unless transforms + (setf (slot-value source 'transforms) + (loop :for field :in (cdar (columns source)) + :collect + (let ((coltype (cast-ixf-type (ixf:ixf-column-type field)))) + ;; + ;; The IXF driver we use maps the data type and gets + ;; back proper CL typed objects, where we only want to + ;; deal with text. + ;; + (cond ((or (string-equal "float" coltype) + (string-equal "real" coltype) + (string-equal "double precision" coltype) + (and (<= 7 (length coltype)) + (string-equal "numeric" coltype :end2 7))) + #'pgloader.transforms::float-to-string) + + ((string-equal "text" coltype) + nil) + + ((string-equal "bytea" coltype) + #'pgloader.transforms::byte-vector-to-bytea) + + (t + (compile nil (lambda (c) + (when c + (format nil "~a" c))))))))))))) + +(defmethod map-rows ((copy-ixf copy-ixf) &key process-row-fn) + "Extract IXF data and call PROCESS-ROW-FN function with a single + argument (a list of column values) for each row." + (with-connection (conn (source-db copy-ixf)) + (let ((ixf:*ixf-stream* (conn-handle conn))) + (let ((ixf (ixf:read-headers)) + (row-fn (lambda (row) + (pgstate-incf *state* (target copy-ixf) :read 1) + (funcall process-row-fn row)))) + (ixf:map-data ixf row-fn))))) + +(defmethod copy-to-queue ((ixf copy-ixf) queue) + "Copy data from IXF file FILENAME into queue DATAQ" + (let ((read (pgloader.queue:map-push-queue ixf queue))) + (pgstate-incf *state* (target ixf) :read read))) + +(defmethod copy-from ((ixf copy-ixf) &key (kernel nil k-s-p) truncate) + (let* ((summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (lp:*kernel* (or kernel (make-kernel 2))) + (channel (lp:make-channel)) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) + + (with-stats-collection ((target ixf) + :dbname (db-name (target-db ixf)) + :state *state* + :summary summary) + (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) + (log-message :notice "COPY \"~a\" from '~a'" (target ixf) (source ixf)) + (lp:submit-task channel #'copy-to-queue ixf queue) + + ;; and start another task to push that data from the queue to PostgreSQL + (lp:submit-task channel + #'pgloader.pgsql:copy-from-queue + (target-db ixf) (target ixf) queue + :truncate truncate) + + ;; now wait until both the tasks are over, and kill the kernel + (loop for tasks below 2 do (lp:receive-result channel) + finally + (log-message :info "COPY \"~a\" done." (target ixf)) + (unless k-s-p (lp:end-kernel))))))) + +(defmethod copy-database ((ixf copy-ixf) + &key + table-name + state-before + data-only + schema-only + (truncate t) + (create-tables t) + (include-drop t) + (create-indexes t) + (reset-sequences t)) + "Open the IXF and stream its content to a PostgreSQL database." + (declare (ignore create-indexes reset-sequences)) + (let* ((summary (null *state*)) + (*state* (or *state* (make-pgstate))) + (table-name (or table-name + (target ixf) + (source ixf)))) + + ;; fix the table-name in the ixf object + (setf (target ixf) table-name) + + (handler-case + (when (and (or create-tables schema-only) (not data-only)) + (with-stats-collection ("create, truncate" + :state state-before + :summary summary) + (with-pgsql-transaction (:pgconn (target-db ixf)) + (when create-tables + (log-message :notice "Create table \"~a\"" table-name) + (create-tables (columns ixf) + :include-drop include-drop + :if-not-exists t))))) + + (cl-postgres::database-errors (e) + (declare (ignore e)) ; a log has already been printed + (log-message :fatal "Failed to create the schema, see above.") + (return-from copy-database))) + + (unless schema-only + (copy-from ixf :truncate truncate)) + + ;; and report the total time spent on the operation + (when summary + (report-full-summary "Total streaming time" *state* + :before state-before)))) + diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp index 8ca1743..57050e6 100644 --- a/src/sources/mssql/mssql-schema.lisp +++ b/src/sources/mssql/mssql-schema.lisp @@ -10,33 +10,29 @@ ;;; ;;; General utility to manage MySQL connection ;;; +(defclass mssql-connection (db-connection) ()) + +(defmethod initialize-instance :after ((msconn mssql-connection) &key) + "Assign the type slot to mssql." + (setf (slot-value msconn 'type) "mssql")) + +(defmethod open-connection ((msconn mssql-connection) &key) + (setf (conn-handle msconn) (mssql:connect (db-name msconn) + (db-user msconn) + (db-pass msconn) + (db-host msconn))) + ;; return the connection object + msconn) + +(defmethod close-connection ((msconn mssql-connection)) + (mssql:disconnect (conn-handle msconn)) + (setf (conn-handle msconn) nil) + msconn) + (defun mssql-query (query) "Execute given QUERY within the current *connection*, and set proper defaults for pgloader." - (mssql:query query :connection *mssql-db*)) - -(defmacro with-mssql-connection ((&optional (dbname *ms-dbname*)) &body forms) - "Connect to MSSQL, use given DBNAME as the current database if provided, - and execute FORMS in a protected way so that we always disconnect when - done. - - Connection parameters are *myconn-host*, *myconn-port*, *myconn-user* and - *myconn-pass*." - `(let* ((dbname (or ,dbname *ms-dbname*)) - (*mssql-db* (handler-case - (mssql:connect dbname - *msconn-user* - *msconn-pass* - *msconn-host*) - (condition (e) - (error 'connection-error - :mesg (format nil "~a" e) - :type "MS SQL" - :host *msconn-host* - :user *msconn-user*))))) - (unwind-protect - (progn ,@forms) - (mssql:disconnect *mssql-db*)))) + (mssql:query query :connection (conn-handle *mssql-db*))) ;;; @@ -86,7 +82,6 @@ table-name-list))) (defun list-all-columns (&key - (dbname *ms-dbname*) (table-type :table) including excluding @@ -127,7 +122,7 @@ ~:[~*~;and (~{~a~^~&~10t and ~})~] order by table_schema, table_name, ordinal_position" - dbname + (db-name *mssql-db*) table-type-name including ; do we print the clause? (filter-list-to-where-clause including @@ -247,7 +242,7 @@ order by SchemaName, (loop :for (schema . tables) :in result :collect (cons schema (reverse-indexes-cols tables)))))))) -(defun list-all-fkeys (&key (dbname *ms-dbname*) including excluding) +(defun list-all-fkeys (&key including excluding) "Get the list of MSSQL index definitions per table." (loop :with result := nil @@ -283,7 +278,7 @@ order by SchemaName, ~:[~*~;and (~{~a~^ and ~})~] ORDER BY CONSTRAINT_NAME, KCU1.ORDINAL_POSITION" - dbname dbname + (db-name *mssql-db*) (db-name *mssql-db*) including ; do we print the clause? (filter-list-to-where-clause including nil diff --git a/src/sources/mssql.lisp b/src/sources/mssql/mssql.lisp similarity index 80% rename from src/sources/mssql.lisp rename to src/sources/mssql/mssql.lisp index 68c3071..68973ef 100644 --- a/src/sources/mssql.lisp +++ b/src/sources/mssql/mssql.lisp @@ -12,35 +12,10 @@ (defmethod initialize-instance :after ((source copy-mssql) &key) "Add a default value for transforms in case it's not been provided." - (let* ((source-db (slot-value source 'source-db)) - (table-name (when (slot-boundp source 'source) - (slot-value source 'source))) - (fields (or (and (slot-boundp source 'fields) - (slot-value source 'fields)) - (when table-name - (let* ((all-columns (list-all-columns :dbname source-db))) - (cdr (assoc table-name all-columns - :test #'string=)))))) - (transforms (when (slot-boundp source 'transforms) + (let* ((transforms (when (slot-boundp source 'transforms) (slot-value source 'transforms)))) - - ;; default to using the same database name as source and target - (when (and source-db - (or (not (slot-boundp source 'target-db)) - (not (slot-value source 'target-db)))) - (setf (slot-value source 'target-db) source-db)) - - ;; default to using the same table-name as source and target - (when (and table-name - (or (not (slot-boundp source 'target)) - (not (slot-value source 'target)))) - (setf (slot-value source 'target) table-name)) - - (when fields - (unless (slot-boundp source 'fields) - (setf (slot-value source 'fields) fields)) - - (loop :for field :in fields + (when (and (slot-boundp source 'fields) (slot-value source 'fields)) + (loop :for field :in (slot-value source 'fields) :for (column fn) := (multiple-value-bind (column fn) (cast-mssql-column-definition-to-pgsql field) (list column fn)) @@ -53,7 +28,7 @@ (defmethod map-rows ((mssql copy-mssql) &key process-row-fn) "Extract Mssql data and call PROCESS-ROW-FN function with a single argument (a list of column values) for each row." - (with-mssql-connection ((source-db mssql)) + (with-connection (*mssql-db* (source-db mssql)) (let* ((sql (destructuring-bind (schema . table-name) (source mssql) (format nil "SELECT ~{~a~^, ~} FROM [~a].[~a];" @@ -72,7 +47,9 @@ (log-message :error "~a" c) (pgstate-incf *state* (target mssql) :errs 1) (invoke-restart 'mssql::use-nil)))) - (mssql::map-query-results sql :row-fn row-fn :connection *mssql-db*)) + (mssql::map-query-results sql + :row-fn row-fn + :connection (conn-handle *mssql-db*))) (condition (e) (progn (log-message :error "~a" e) @@ -93,7 +70,10 @@ ;; we account stats against the target table-name, because that's all we ;; know on the PostgreSQL thread - (with-stats-collection (table-name :state *state* :summary summary) + (with-stats-collection (table-name + :dbname (db-name (target-db mssql)) + :state *state* + :summary summary) (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) (log-message :notice "COPY ~a" table-name) ;; read data from Mssql @@ -113,33 +93,34 @@ ;; return the copy-mssql object we just did the COPY for mssql)) -(defun complete-pgsql-database (all-columns all-fkeys pkeys +(defun complete-pgsql-database (pgconn all-columns all-fkeys pkeys &key state data-only foreign-keys reset-sequences) - "After loading the data into PostgreSQL, we can now reset the sequences + "After loading the data into PostgreSQL, we can now reset the sequences and declare foreign keys." - ;; - ;; Now Reset Sequences, the good time to do that is once the whole data - ;; has been imported and once we have the indexes in place, as max() is - ;; able to benefit from the indexes. In particular avoid doing that step - ;; while CREATE INDEX statements are in flight (avoid locking). - ;; - (when reset-sequences - (let ((table-names (mapcar #'car (qualified-table-name-list all-columns)))) - (reset-sequences table-names :state state))) + ;; + ;; Now Reset Sequences, the good time to do that is once the whole data + ;; has been imported and once we have the indexes in place, as max() is + ;; able to benefit from the indexes. In particular avoid doing that step + ;; while CREATE INDEX statements are in flight (avoid locking). + ;; + (when reset-sequences + (let ((table-names (mapcar #'car (qualified-table-name-list all-columns)))) + (reset-sequences table-names :pgconn pgconn :state state))) - ;; - ;; Turn UNIQUE indexes into PRIMARY KEYS now - ;; - (pgstate-add-table state (pgconn-dbname) "Primary Keys") + ;; + ;; Turn UNIQUE indexes into PRIMARY KEYS now + ;; + (with-pgsql-connection (pgconn) + (pgstate-add-table state (db-name pgconn) "Primary Keys") (loop :for sql :in pkeys :when sql :do (progn (log-message :notice "~a" sql) - (pgsql-execute-with-timing (pgconn-dbname) "Primary Keys" sql state))) + (pgsql-execute-with-timing "Primary Keys" sql state))) ;; ;; Foreign Key Constraints @@ -149,23 +130,23 @@ ;; and indexes are imported before doing that. ;; (when (and foreign-keys (not data-only)) - ;; convert to schema-less list of fkeys - ;; TODO: fix the MySQL support inherited API - (let ((all-fkeys - (loop :for (schema . tables) :in all-fkeys - :append (loop :for (table-name . fkeys) :in tables - :collect (cons table-name (mapcar #'cdr fkeys)))))) - (let ((*identifier-case* :none)) - (create-pgsql-fkeys all-fkeys :state state))))) + (pgstate-add-table state (db-name pgconn) "Foreign Keys") + (loop :for (schema . tables) :in all-fkeys + :do (loop :for (table-name . fkeys) :in tables + :do (loop :for fkey :in fkeys + :for sql := (format-pgsql-create-fkey fkey) + :do (progn + (log-message :notice "~a;" sql) + (pgsql-execute-with-timing "Foreign Keys" sql state)))))))) -(defun fetch-mssql-metadata (&key state including excluding) +(defun fetch-mssql-metadata (mssql &key state including excluding) "MS SQL introspection to prepare the migration." (let (all-columns all-indexes all-fkeys) (with-stats-collection ("fetch meta data" :use-result-as-rows t :use-result-as-read t :state state) - (with-mssql-connection () + (with-connection (*mssql-db* (source-db mssql)) (setf all-columns (list-all-columns :including including :excluding excluding)) @@ -201,17 +182,9 @@ (reset-sequences t) (foreign-keys t) (encoding :utf-8) - only-tables including excluding) "Stream the given MS SQL database down to PostgreSQL." - - ;; only-tables is part of the generic lambda list, but we don't use it - ;; here as we didn't implement forcing the schema in the table name, and - ;; splitting the schema name and table name for processing in list-all-* - ;; filtering functions - (declare (ignore only-tables)) - (let* ((summary (null *state*)) (*state* (or *state* (make-pgstate))) (idx-state (or state-indexes (make-pgstate))) @@ -223,7 +196,8 @@ (destructuring-bind (&key all-columns all-indexes all-fkeys pkeys) ;; to prepare the run we need to fetch MS SQL meta-data - (fetch-mssql-metadata :state state-before + (fetch-mssql-metadata mssql + :state state-before :including including :excluding excluding) @@ -245,7 +219,7 @@ (with-stats-collection ("create, truncate" :state state-before :summary summary) - (with-pgsql-transaction () + (with-pgsql-transaction (:pgconn (target-db mssql)) (loop :for (schema . tables) :in all-columns :do (let ((schema (apply-identifier-case schema))) ;; create schema @@ -341,7 +315,8 @@ ;; ;; Complete the PostgreSQL database before handing over. ;; - (complete-pgsql-database all-columns all-fkeys pkeys + (complete-pgsql-database (new-pgsql-connection (target-db mssql)) + all-columns all-fkeys pkeys :state state-after :data-only data-only :foreign-keys foreign-keys diff --git a/src/sources/mysql/mysql-schema.lisp b/src/sources/mysql/mysql-schema.lisp index afb9b25..ed7d488 100644 --- a/src/sources/mysql/mysql-schema.lisp +++ b/src/sources/mysql/mysql-schema.lisp @@ -46,76 +46,68 @@ ;;; ;;; General utility to manage MySQL connection ;;; +(defclass mysql-connection (db-connection) ()) + +(defmethod initialize-instance :after ((myconn mysql-connection) &key) + "Assign the type slot to mysql." + (setf (slot-value myconn 'type) "mysql")) + +(defmethod open-connection ((myconn mysql-connection) &key) + (setf (conn-handle myconn) + (if (and (consp (db-host myconn)) (eq :unix (car (db-host myconn)))) + (qmynd:mysql-local-connect :path (cdr (db-host myconn)) + :username (db-user myconn) + :password (db-pass myconn) + :database (db-name myconn)) + (qmynd:mysql-connect :host (db-host myconn) + :port (db-port myconn) + :username (db-user myconn) + :password (db-pass myconn) + :database (db-name myconn)))) + ;; return the connection object + myconn) + +(defmethod close-connection ((myconn mysql-connection)) + (qmynd:mysql-disconnect (conn-handle myconn)) + (setf (conn-handle myconn) nil) + myconn) + (defun mysql-query (query &key row-fn (as-text t) (result-type 'list)) "Execute given QUERY within the current *connection*, and set proper defaults for pgloader." - (qmynd:mysql-query *connection* query + (qmynd:mysql-query (conn-handle *connection*) query :row-fn row-fn :as-text as-text :result-type result-type)) -(defmacro with-mysql-connection ((&optional (dbname *my-dbname*)) &body forms) - "Connect to MySQL, use given DBNAME as the current database if provided, - and execute FORMS in a protected way so that we always disconnect when - done. - - Connection parameters are *myconn-host*, *myconn-port*, *myconn-user* and - *myconn-pass*." - `(let* ((dbname (or ,dbname *my-dbname*)) - (*connection* - (handler-case - (if (and (consp *myconn-host*) (eq :unix (car *myconn-host*))) - (qmynd:mysql-local-connect :path (cdr *myconn-host*) - :username *myconn-user* - :password *myconn-pass* - :database dbname) - (qmynd:mysql-connect :host *myconn-host* - :port *myconn-port* - :username *myconn-user* - :password *myconn-pass* - :database dbname)) - (condition (e) - (error 'connection-error - :mesg (format nil "~a" e) - :type "MySQL" - :host (if (and (consp *myconn-host*) - (eq :unix (car *myconn-host*))) - (cdr *myconn-host*) - *myconn-host*) - :port *myconn-port* - :user *myconn-user*))))) - (unwind-protect - (progn ,@forms) - (qmynd:mysql-disconnect *connection*)))) - ;;; ;;; Function for accessing the MySQL catalogs, implementing auto-discovery. ;;; ;;; Interactive use only, will create its own database connection. ;;; -(defun list-databases () - "Connect to a local database and get the database list" - (with-mysql-connection () - (mysql-query "show databases"))) +;; (defun list-databases () +;; "Connect to a local database and get the database list" +;; (with-mysql-connection () +;; (mysql-query "show databases"))) -(defun list-tables (dbname) - "Return a flat list of all the tables names known in given DATABASE" - (with-mysql-connection (dbname) - (mysql-query (format nil " - select table_name - from information_schema.tables - where table_schema = '~a' and table_type = 'BASE TABLE' -order by table_name" dbname)))) +;; (defun list-tables (dbname) +;; "Return a flat list of all the tables names known in given DATABASE" +;; (with-mysql-connection (dbname) +;; (mysql-query (format nil " +;; select table_name +;; from information_schema.tables +;; where table_schema = '~a' and table_type = 'BASE TABLE' +;; order by table_name" dbname)))) -(defun list-views (dbname &key only-tables) - "Return a flat list of all the view names and definitions known in given DBNAME" - (with-mysql-connection (dbname) - (mysql-query (format nil " - select table_name, view_definition - from information_schema.views - where table_schema = '~a' - ~@[and table_name in (~{'~a'~^,~})~] -order by table_name" dbname only-tables)))) +;; (defun list-views (dbname &key only-tables) +;; "Return a flat list of all the view names and definitions known in given DBNAME" +;; (with-mysql-connection (dbname) +;; (mysql-query (format nil " +;; select table_name, view_definition +;; from information_schema.views +;; where table_schema = '~a' +;; ~@[and table_name in (~{'~a'~^,~})~] +;; order by table_name" dbname only-tables)))) ;;; @@ -180,7 +172,6 @@ order by table_name" dbname only-tables)))) (t default))) (defun list-all-columns (&key - (dbname *my-dbname*) (table-type :table) only-tables including @@ -203,7 +194,7 @@ order by table_name" dbname only-tables)))) ~:[~*~;and (~{table_name ~a~^ or ~})~] ~:[~*~;and (~{table_name ~a~^ and ~})~] order by table_name, ordinal_position" - dbname + (db-name *connection*) table-type-name only-tables ; do we print the clause? only-tables @@ -228,7 +219,6 @@ order by table_name, ordinal_position" collect (cons name (reverse cols)))))) (defun list-all-indexes (&key - (dbname *my-dbname*) only-tables including excluding) @@ -245,12 +235,12 @@ order by table_name, ordinal_position" ~:[~*~;and (~{table_name ~a~^ or ~})~] ~:[~*~;and (~{table_name ~a~^ and ~})~] GROUP BY table_name, index_name;" - dbname + (db-name *connection*) only-tables ; do we print the clause? only-tables - including ; do we print the clause? + including ; do we print the clause? (filter-list-to-where-clause including) - excluding ; do we print the clause? + excluding ; do we print the clause? (filter-list-to-where-clause excluding t))) do (let ((entry (assoc table-name schema :test 'equal)) (index @@ -272,7 +262,6 @@ GROUP BY table_name, index_name;" ;;; MySQL Foreign Keys ;;; (defun list-all-fkeys (&key - (dbname *my-dbname*) only-tables including excluding) @@ -301,12 +290,12 @@ GROUP BY table_name, index_name;" ~:[~*~;and (~{table_name ~a~^ and ~})~] GROUP BY table_name, constraint_name, ft;" - dbname dbname + (db-name *connection*) (db-name *connection*) only-tables ; do we print the clause? only-tables - including ; do we print the clause? + including ; do we print the clause? (filter-list-to-where-clause including) - excluding ; do we print the clause? + excluding ; do we print the clause? (filter-list-to-where-clause excluding t))) do (let ((entry (assoc table-name schema :test 'equal)) (fk diff --git a/src/sources/mysql.lisp b/src/sources/mysql/mysql.lisp similarity index 83% rename from src/sources/mysql.lisp rename to src/sources/mysql/mysql.lisp index 761a2b1..ab59ed7 100644 --- a/src/sources/mysql.lisp +++ b/src/sources/mysql/mysql.lisp @@ -18,43 +18,18 @@ (defmethod initialize-instance :after ((source copy-mysql) &key) "Add a default value for transforms in case it's not been provided." - (let* ((source-db (slot-value source 'source-db)) - (table-name (when (slot-boundp source 'source) - (slot-value source 'source))) - (fields (or (and (slot-boundp source 'fields) - (slot-value source 'fields)) - (when table-name - (let* ((all-columns (list-all-columns :dbname source-db))) - (cdr (assoc table-name all-columns - :test #'string=)))))) - (transforms (when (slot-boundp source 'transforms) - (slot-value source 'transforms)))) - - ;; default to using the same database name as source and target - (when (and source-db - (or (not (slot-boundp source 'target-db)) - (not (slot-value source 'target-db)))) - (setf (slot-value source 'target-db) source-db)) - - ;; default to using the same table-name as source and target - (when (and table-name - (or (not (slot-boundp source 'target)) - (not (slot-value source 'target)))) - (setf (slot-value source 'target) table-name)) - - (when fields - (unless (slot-boundp source 'fields) - (setf (slot-value source 'fields) fields)) - - (loop for field in fields - for (column fn) = (multiple-value-bind (column fn) - (cast-mysql-column-definition-to-pgsql field) - (list column fn)) - collect column into columns - collect fn into fns - finally (progn (setf (slot-value source 'columns) columns) - (unless transforms - (setf (slot-value source 'transforms) fns))))))) + (let ((transforms (and (slot-boundp source 'transforms) + (slot-value source 'transforms)))) + (when (and (slot-boundp source 'fields) (slot-value source 'fields)) + (loop :for field :in (slot-value source 'fields) + :for (column fn) := (multiple-value-bind (column fn) + (cast-mysql-column-definition-to-pgsql field) + (list column fn)) + :collect column :into columns + :collect fn :into fns + :finally (progn (setf (slot-value source 'columns) columns) + (unless transforms + (setf (slot-value source 'transforms) fns))))))) ;;; @@ -63,18 +38,17 @@ (defmethod map-rows ((mysql copy-mysql) &key process-row-fn) "Extract MySQL data and call PROCESS-ROW-FN function with a single argument (a list of column values) for each row." - (let ((dbname (source-db mysql)) - (table-name (source mysql)) + (let ((table-name (source mysql)) (qmynd:*mysql-encoding* (when (encoding mysql) #+sbcl (encoding mysql) #+ccl (ccl:external-format-character-encoding (encoding mysql))))) - (with-mysql-connection (dbname) + (with-connection (*connection* (source-db mysql)) (when qmynd:*mysql-encoding* (log-message :notice "Force encoding to ~a for ~a" qmynd:*mysql-encoding* table-name)) - (let* ((cols (get-column-list dbname table-name)) + (let* ((cols (get-column-list (db-name (source-db mysql)) table-name)) (sql (format nil "SELECT ~{~a~^, ~} FROM `~a`;" cols table-name)) (row-fn (lambda (row) @@ -140,7 +114,10 @@ ;; we account stats against the target table-name, because that's all we ;; know on the PostgreSQL thread - (with-stats-collection (table-name :state *state* :summary summary) + (with-stats-collection (table-name + :dbname (db-name (target-db mysql)) + :state *state* + :summary summary) (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) (log-message :notice "COPY ~a" table-name) ;; read data from MySQL @@ -167,7 +144,8 @@ ;;; ;;; Prepare the PostgreSQL database before streaming the data into it. ;;; -(defun prepare-pgsql-database (all-columns all-indexes all-fkeys +(defun prepare-pgsql-database (pgconn + all-columns all-indexes all-fkeys materialize-views view-columns &key state @@ -186,7 +164,7 @@ (loop for (name . idxs) in all-indexes sum (length idxs))) (with-stats-collection ("create, drop" :use-result-as-rows t :state state) - (with-pgsql-transaction () + (with-pgsql-transaction (:pgconn pgconn) ;; we need to first drop the Foreign Key Constraints, so that we ;; can DROP TABLE when asked (when (and foreign-keys include-drop) @@ -205,32 +183,33 @@ (when materialize-views (create-tables view-columns :include-drop include-drop)))))) -(defun complete-pgsql-database (all-columns all-fkeys pkeys +(defun complete-pgsql-database (pgconn all-columns all-fkeys pkeys &key state data-only foreign-keys reset-sequences) - "After loading the data into PostgreSQL, we can now reset the sequences + "After loading the data into PostgreSQL, we can now reset the sequences and declare foreign keys." - ;; - ;; Now Reset Sequences, the good time to do that is once the whole data - ;; has been imported and once we have the indexes in place, as max() is - ;; able to benefit from the indexes. In particular avoid doing that step - ;; while CREATE INDEX statements are in flight (avoid locking). - ;; - (when reset-sequences - (reset-sequences (mapcar #'car all-columns) :state state)) + ;; + ;; Now Reset Sequences, the good time to do that is once the whole data + ;; has been imported and once we have the indexes in place, as max() is + ;; able to benefit from the indexes. In particular avoid doing that step + ;; while CREATE INDEX statements are in flight (avoid locking). + ;; + (when reset-sequences + (reset-sequences (mapcar #'car all-columns) :pgconn pgconn :state state)) - ;; - ;; Turn UNIQUE indexes into PRIMARY KEYS now - ;; - (pgstate-add-table state (pgconn-dbname) "Primary Keys") + ;; + ;; Turn UNIQUE indexes into PRIMARY KEYS now + ;; + (with-pgsql-connection (pgconn) + (pgstate-add-table state (db-name pgconn) "Primary Keys") (loop :for sql :in pkeys :when sql :do (progn (log-message :notice "~a" sql) - (pgsql-execute-with-timing (pgconn-dbname) "Primary Keys" sql state))) + (pgsql-execute-with-timing "Primary Keys" sql state))) ;; ;; Foreign Key Constraints @@ -240,9 +219,16 @@ ;; and indexes are imported before doing that. ;; (when (and foreign-keys (not data-only)) - (create-pgsql-fkeys all-fkeys :state state))) + (pgstate-add-table state (db-name pgconn) "Foreign Keys") + (loop :for (table-name . fkeys) :in all-fkeys + :do (loop :for fkey :in fkeys + :for sql := (format-pgsql-create-fkey fkey) + :do (progn + (log-message :notice "~a;" sql) + (pgsql-execute-with-timing "Foreign Keys" sql state))))))) -(defun fetch-mysql-metadata (&key +(defun fetch-mysql-metadata (mysql + &key state materialize-views only-tables @@ -256,7 +242,7 @@ :use-result-as-rows t :use-result-as-read t :state state) - (with-mysql-connection () + (with-connection (*connection* (source-db mysql)) ;; If asked to MATERIALIZE VIEWS, now is the time to create them in ;; MySQL, when given definitions rather than existing view names. (when (and materialize-views (not (eq :all materialize-views))) @@ -332,13 +318,12 @@ (state-before (or state-before (make-pgstate))) (state-after (or state-after (make-pgstate))) (copy-kernel (make-kernel 2)) - (dbname (source-db mysql)) - (pg-dbname (target-db mysql)) idx-kernel idx-channel) (destructuring-bind (&key view-columns all-columns all-fkeys all-indexes pkeys) ;; to prepare the run, we need to fetch MySQL meta-data - (fetch-mysql-metadata :state state-before + (fetch-mysql-metadata mysql + :state state-before :materialize-views materialize-views :only-tables only-tables :including including @@ -359,7 +344,8 @@ ;; if asked, first drop/create the tables on the PostgreSQL side (handler-case (cond ((and (or create-tables schema-only) (not data-only)) - (prepare-pgsql-database all-columns + (prepare-pgsql-database (target-db mysql) + all-columns all-indexes all-fkeys materialize-views @@ -382,7 +368,7 @@ ;; we did already create our Views in the MySQL database, so clean ;; that up now. (when materialize-views - (with-mysql-connection () + (with-connection (*connection* (source-db mysql)) (drop-my-views materialize-views))) (return-from copy-database))) @@ -404,8 +390,8 @@ (table-source (make-instance 'copy-mysql - :source-db dbname - :target-db pg-dbname + :source-db (source-db mysql) + :target-db (target-db mysql) :source table-name :target (apply-identifier-case table-name) :fields columns @@ -430,8 +416,8 @@ (cdr (assoc table-name all-indexes :test #'string=)))) (alexandria:appendf pkeys - (create-indexes-in-kernel pg-dbname indexes - idx-kernel idx-channel + (create-indexes-in-kernel (target-db mysql) + indexes idx-kernel idx-channel :state idx-state)))))) ;; now end the kernels @@ -448,13 +434,14 @@ ;; If we created some views for this run, now is the time to DROP'em ;; (when materialize-views - (with-mysql-connection () + (with-connection (*connection* (source-db mysql)) (drop-my-views materialize-views))) ;; ;; Complete the PostgreSQL database before handing over. ;; - (complete-pgsql-database all-columns all-fkeys pkeys + (complete-pgsql-database (new-pgsql-connection (target-db mysql)) + all-columns all-fkeys pkeys :state state-after :data-only data-only :foreign-keys foreign-keys diff --git a/src/sources/sqlite.lisp b/src/sources/sqlite.lisp deleted file mode 100644 index 3d54b1f..0000000 --- a/src/sources/sqlite.lisp +++ /dev/null @@ -1,223 +0,0 @@ -;;; -;;; Tools to handle the SQLite Database -;;; - -(in-package :pgloader.sqlite) - -;;; -;;; Integration with the pgloader Source API -;;; -(defclass copy-sqlite (copy) - ((db :accessor db :initarg :db)) - (:documentation "pgloader SQLite Data Source")) - -(defmethod initialize-instance :after ((source copy-sqlite) &key) - "Add a default value for transforms in case it's not been provided." - (let* ((source-db (slot-value source 'source-db)) - (db (sqlite:connect (get-absolute-pathname `(:filename ,source-db)))) - (table-name (when (slot-boundp source 'source) - (slot-value source 'source))) - (fields (or (and (slot-boundp source 'fields) - (slot-value source 'fields)) - (when table-name - (list-columns table-name db)))) - (transforms (when (slot-boundp source 'transforms) - (slot-value source 'transforms)))) - - ;; we will reuse the same SQLite database handler that we just opened - (setf (slot-value source 'db) db) - - ;; default to using the same table-name as source and target - (when (and table-name - (or (not (slot-boundp source 'target)) - (slot-value source 'target))) - (setf (slot-value source 'target) table-name)) - - (when fields - (unless (slot-boundp source 'fields) - (setf (slot-value source 'fields) fields)) - - (loop for field in fields - for (column fn) = (multiple-value-bind (column fn) - (cast-sqlite-column-definition-to-pgsql field) - (list column fn)) - collect column into columns - collect fn into fns - finally (progn (setf (slot-value source 'columns) columns) - (unless transforms - (setf (slot-value source 'transforms) fns))))))) - -;;; Map a function to each row extracted from SQLite -;;; -(defmethod map-rows ((sqlite copy-sqlite) &key process-row-fn) - "Extract SQLite data and call PROCESS-ROW-FN function with a single - argument (a list of column values) for each row" - (let ((sql (format nil "SELECT * FROM ~a" (source sqlite))) - (blobs-p - (coerce (mapcar #'cast-to-bytea-p (fields sqlite)) 'vector))) - (handler-case - (loop - with statement = (sqlite:prepare-statement (db sqlite) sql) - with len = (loop :for name :in (sqlite:statement-column-names statement) - :count name) - while (sqlite:step-statement statement) - for row = (let ((v (make-array len))) - (loop :for x :below len - :for raw := (sqlite:statement-column-value statement x) - :for val := (if (and (aref blobs-p x) (stringp raw)) - (base64:base64-string-to-usb8-array raw) - raw) - :do (setf (aref v x) val)) - v) - counting t into rows - do (funcall process-row-fn row) - finally - (sqlite:finalize-statement statement) - (return rows)) - (condition (e) - (progn - (log-message :error "~a" e) - (pgstate-incf *state* (target sqlite) :errs 1)))))) - - -(defmethod copy-to-queue ((sqlite copy-sqlite) queue) - "Copy data from SQLite table TABLE-NAME within connection DB into queue DATAQ" - (let ((read (pgloader.queue:map-push-queue sqlite queue))) - (pgstate-incf *state* (target sqlite) :read read))) - -(defmethod copy-from ((sqlite copy-sqlite) &key (kernel nil k-s-p) truncate) - "Stream the contents from a SQLite database table down to PostgreSQL." - (let* ((summary (null *state*)) - (*state* (or *state* (pgloader.utils:make-pgstate))) - (lp:*kernel* (or kernel (make-kernel 2))) - (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*)) - (table-name (target sqlite)) - (pg-dbname (target-db sqlite))) - - (with-stats-collection (table-name :state *state* :summary summary) - (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) - (log-message :notice "COPY ~a" table-name) - ;; read data from SQLite - (lp:submit-task channel #'copy-to-queue sqlite queue) - - ;; and start another task to push that data from the queue to PostgreSQL - (lp:submit-task channel - #'pgloader.pgsql:copy-from-queue - pg-dbname table-name queue - :truncate truncate) - - ;; now wait until both the tasks are over - (loop for tasks below 2 do (lp:receive-result channel) - finally - (log-message :info "COPY ~a done." table-name) - (unless k-s-p (lp:end-kernel))))))) - -(defmethod copy-database ((sqlite copy-sqlite) - &key - state-before - data-only - schema-only - (truncate nil) - (create-tables t) - (include-drop t) - (create-indexes t) - (reset-sequences t) - only-tables - including - excluding - (encoding :utf-8)) - "Stream the given SQLite database down to PostgreSQL." - (let* ((summary (null *state*)) - (*state* (or *state* (make-pgstate))) - (state-before (or state-before (make-pgstate))) - (idx-state (make-pgstate)) - (seq-state (make-pgstate)) - (cffi:*default-foreign-encoding* encoding) - (copy-kernel (make-kernel 2)) - (all-columns (filter-column-list (list-all-columns (db sqlite)) - :only-tables only-tables - :including including - :excluding excluding)) - (all-indexes (filter-column-list (list-all-indexes (db sqlite)) - :only-tables only-tables - :including including - :excluding excluding)) - (max-indexes (loop for (table . indexes) in all-indexes - maximizing (length indexes))) - (idx-kernel (when (and max-indexes (< 0 max-indexes)) - (make-kernel max-indexes))) - (idx-channel (when idx-kernel - (let ((lp:*kernel* idx-kernel)) - (lp:make-channel)))) - (pg-dbname (target-db sqlite))) - - ;; if asked, first drop/create the tables on the PostgreSQL side - (cond ((and (or create-tables schema-only) (not data-only)) - (log-message :notice "~:[~;DROP then ~]CREATE TABLES" include-drop) - (with-stats-collection ("create, truncate" - :state state-before - :summary summary) - (with-pgsql-transaction () - (create-tables all-columns :include-drop include-drop)))) - - (truncate - (truncate-tables *pg-dbname* (mapcar #'car all-columns)))) - - (loop - for (table-name . columns) in all-columns - do - (let ((table-source - (make-instance 'copy-sqlite - :db (db sqlite) - :source-db (source-db sqlite) - :target-db pg-dbname - :source table-name - :target table-name - :fields columns))) - ;; first COPY the data from SQLite to PostgreSQL, using copy-kernel - (unless schema-only - (copy-from table-source :kernel copy-kernel)) - - ;; Create the indexes for that table in parallel with the next - ;; COPY, and all at once in concurrent threads to benefit from - ;; PostgreSQL synchronous scan ability - ;; - ;; We just push new index build as they come along, if one - ;; index build requires much more time than the others our - ;; index build might get unsync: indexes for different tables - ;; will get built in parallel --- not a big problem. - (when (and create-indexes (not data-only)) - (let* ((indexes - (cdr (assoc table-name all-indexes :test #'string=)))) - (create-indexes-in-kernel pg-dbname indexes - idx-kernel idx-channel - :state idx-state))))) - - ;; don't forget to reset sequences, but only when we did actually import - ;; the data. - (when reset-sequences - (let ((tables (or only-tables - (mapcar #'car all-columns)))) - (log-message :notice "Reset sequences") - (with-stats-collection ("reset sequences" - :use-result-as-rows t - :state seq-state) - (pgloader.pgsql:reset-all-sequences pg-dbname :tables tables)))) - - ;; now end the kernels - (let ((lp:*kernel* copy-kernel)) (lp:end-kernel)) - (let ((lp:*kernel* idx-kernel)) - ;; wait until the indexes are done being built... - ;; don't forget accounting for that waiting time. - (when (and create-indexes (not data-only)) - (with-stats-collection ("index build completion" :state *state*) - (loop for idx in all-indexes do (lp:receive-result idx-channel)))) - (lp:end-kernel)) - - ;; and report the total time spent on the operation - (report-full-summary "Total streaming time" *state* - :before state-before - :finally seq-state - :parallel idx-state))) - diff --git a/src/sources/sqlite/sqlite.lisp b/src/sources/sqlite/sqlite.lisp new file mode 100644 index 0000000..577fd78 --- /dev/null +++ b/src/sources/sqlite/sqlite.lisp @@ -0,0 +1,257 @@ +;;; +;;; Tools to handle the SQLite Database +;;; + +(in-package :pgloader.sqlite) + +;;; +;;; Integration with the pgloader Source API +;;; +(defclass sqlite-connection (fd-connection) ()) + +(defmethod initialize-instance :after ((slconn sqlite-connection) &key) + "Assign the type slot to sqlite." + (setf (slot-value slconn 'type) "sqlite")) + +(defmethod open-connection ((slconn sqlite-connection) &key) + (setf (conn-handle slconn) + (sqlite:connect (fd-path slconn))) + slconn) + +(defmethod close-connection ((slconn sqlite-connection)) + (sqlite:disconnect (conn-handle slconn)) + (setf (conn-handle slconn) nil) + slconn) + +(defclass copy-sqlite (copy) + ((db :accessor db :initarg :db)) + (:documentation "pgloader SQLite Data Source")) + +(defmethod initialize-instance :after ((source copy-sqlite) &key) + "Add a default value for transforms in case it's not been provided." + (let* ((transforms (when (slot-boundp source 'transforms) + (slot-value source 'transforms)))) + (when (and (slot-boundp source 'fields) (slot-value source 'fields)) + (loop for field in (slot-value source 'fields) + for (column fn) = (multiple-value-bind (column fn) + (cast-sqlite-column-definition-to-pgsql field) + (list column fn)) + collect column into columns + collect fn into fns + finally (progn (setf (slot-value source 'columns) columns) + (unless transforms + (setf (slot-value source 'transforms) fns))))))) + +;;; Map a function to each row extracted from SQLite +;;; +(defmethod map-rows ((sqlite copy-sqlite) &key process-row-fn) + "Extract SQLite data and call PROCESS-ROW-FN function with a single + argument (a list of column values) for each row" + (let ((sql (format nil "SELECT * FROM ~a" (source sqlite))) + (blobs-p + (coerce (mapcar #'cast-to-bytea-p (fields sqlite)) 'vector))) + (with-connection (*sqlite-db* (source-db sqlite)) + (let ((db (conn-handle *sqlite-db*))) + (handler-case + (loop + with statement = (sqlite:prepare-statement db sql) + with len = (loop :for name + :in (sqlite:statement-column-names statement) + :count name) + while (sqlite:step-statement statement) + for row = (let ((v (make-array len))) + (loop :for x :below len + :for raw := (sqlite:statement-column-value statement x) + :for val := (if (and (aref blobs-p x) (stringp raw)) + (base64:base64-string-to-usb8-array raw) + raw) + :do (setf (aref v x) val)) + v) + counting t into rows + do (funcall process-row-fn row) + finally + (sqlite:finalize-statement statement) + (return rows)) + (condition (e) + (progn + (log-message :error "~a" e) + (pgstate-incf *state* (target sqlite) :errs 1)))))))) + + +(defmethod copy-to-queue ((sqlite copy-sqlite) queue) + "Copy data from SQLite table TABLE-NAME within connection DB into queue DATAQ" + (let ((read (pgloader.queue:map-push-queue sqlite queue))) + (pgstate-incf *state* (target sqlite) :read read))) + +(defmethod copy-from ((sqlite copy-sqlite) &key (kernel nil k-s-p) truncate) + "Stream the contents from a SQLite database table down to PostgreSQL." + (let* ((summary (null *state*)) + (*state* (or *state* (pgloader.utils:make-pgstate))) + (lp:*kernel* (or kernel (make-kernel 2))) + (channel (lp:make-channel)) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) + + (with-stats-collection ((target sqlite) + :dbname (db-name (target-db sqlite)) + :state *state* + :summary summary) + (lp:task-handler-bind ((error #'lp:invoke-transfer-error)) + (log-message :notice "COPY ~a" (target sqlite)) + ;; read data from SQLite + (lp:submit-task channel #'copy-to-queue sqlite queue) + + ;; and start another task to push that data from the queue to PostgreSQL + (lp:submit-task channel + #'pgloader.pgsql:copy-from-queue + (target-db sqlite) (target sqlite) queue + :truncate truncate) + + ;; now wait until both the tasks are over + (loop for tasks below 2 do (lp:receive-result channel) + finally + (log-message :info "COPY ~a done." (target sqlite)) + (unless k-s-p (lp:end-kernel))))))) + +(defun fetch-sqlite-metadata (sqlite + &key + state + including + excluding) + "SQLite introspection to prepare the migration." + (let (all-columns all-indexes) + (with-stats-collection ("fetch meta data" + :use-result-as-rows t + :use-result-as-read t + :state state) + (with-connection (conn (source-db sqlite)) + (let ((*sqlite-db* (conn-handle conn))) + (setf all-columns (filter-column-list (list-all-columns *sqlite-db*) + :including including + :excluding excluding) + + all-indexes (filter-column-list (list-all-indexes *sqlite-db*) + :including including + :excluding excluding))) + + ;; return how many objects we're going to deal with in total + ;; for stats collection + (+ (length all-columns) (length all-indexes)))) + + ;; now return a plist to the caller + (list :all-columns all-columns + :all-indexes all-indexes))) + +(defmethod copy-database ((sqlite copy-sqlite) + &key + state-before + data-only + schema-only + (truncate nil) + (create-tables t) + (include-drop t) + (create-indexes t) + (reset-sequences t) + only-tables + including + excluding + (encoding :utf-8)) + "Stream the given SQLite database down to PostgreSQL." + (declare (ignore only-tables)) + (let* ((summary (null *state*)) + (*state* (or *state* (make-pgstate))) + (state-before (or state-before (make-pgstate))) + (idx-state (make-pgstate)) + (seq-state (make-pgstate)) + (cffi:*default-foreign-encoding* encoding) + (copy-kernel (make-kernel 2)) + idx-kernel idx-channel) + + (destructuring-bind (&key all-columns all-indexes pkeys) + (fetch-sqlite-metadata sqlite + :state state-before + :including including + :excluding excluding) + + (let ((max-indexes + (loop for (table . indexes) in all-indexes + maximizing (length indexes)))) + + (setf idx-kernel (when (and max-indexes (< 0 max-indexes)) + (make-kernel max-indexes))) + + (setf idx-channel (when idx-kernel + (let ((lp:*kernel* idx-kernel)) + (lp:make-channel))))) + + ;; if asked, first drop/create the tables on the PostgreSQL side + (handler-case + (cond ((and (or create-tables schema-only) (not data-only)) + (log-message :notice "~:[~;DROP then ~]CREATE TABLES" include-drop) + (with-stats-collection ("create, truncate" + :state state-before + :summary summary) + (with-pgsql-transaction (:pgconn (target-db sqlite)) + (create-tables all-columns :include-drop include-drop)))) + + (truncate + (truncate-tables (target-db sqlite) (mapcar #'car all-columns)))) + + (cl-postgres::database-errror (e) + (declare (ignore e)) ; a log has already been printed + (log-message :fatal "Failed to create the schema, see above.") + (return-from copy-database))) + + (loop + for (table-name . columns) in all-columns + do + (let ((table-source + (make-instance 'copy-sqlite + :source-db (source-db sqlite) + :target-db (target-db sqlite) + :source table-name + :target table-name + :fields columns))) + ;; first COPY the data from SQLite to PostgreSQL, using copy-kernel + (unless schema-only + (copy-from table-source :kernel copy-kernel)) + + ;; Create the indexes for that table in parallel with the next + ;; COPY, and all at once in concurrent threads to benefit from + ;; PostgreSQL synchronous scan ability + ;; + ;; We just push new index build as they come along, if one + ;; index build requires much more time than the others our + ;; index build might get unsync: indexes for different tables + ;; will get built in parallel --- not a big problem. + (when (and create-indexes (not data-only)) + (let* ((indexes + (cdr (assoc table-name all-indexes :test #'string=)))) + (alexandria:appendf + pkeys + (create-indexes-in-kernel (target-db sqlite) indexes + idx-kernel idx-channel + :state idx-state)))))) + + ;; now end the kernels + (let ((lp:*kernel* copy-kernel)) (lp:end-kernel)) + (let ((lp:*kernel* idx-kernel)) + ;; wait until the indexes are done being built... + ;; don't forget accounting for that waiting time. + (when (and create-indexes (not data-only)) + (with-stats-collection ("index build completion" :state *state*) + (loop for idx in all-indexes do (lp:receive-result idx-channel)))) + (lp:end-kernel)) + + ;; don't forget to reset sequences, but only when we did actually import + ;; the data. + (when reset-sequences + (reset-sequences (mapcar #'car all-columns) + :pgconn (target-db sqlite) + :state seq-state)) + + ;; and report the total time spent on the operation + (report-full-summary "Total streaming time" *state* + :before state-before + :finally seq-state + :parallel idx-state)))) + diff --git a/src/utils/archive.lisp b/src/utils/archive.lisp index 6e63cf1..163ad67 100644 --- a/src/utils/archive.lisp +++ b/src/utils/archive.lisp @@ -4,6 +4,12 @@ (in-package #:pgloader.archive) +(defparameter *supported-archive-types* '(:tar :tgz :gz :zip)) + +(defun archivep (archive-file) + "Return non-nil when the ARCHIVE-FILE is something we know how to expand." + (member (archive-type archive-file) *supported-archive-types*)) + (defun http-fetch-file (url &key (tmpdir *default-tmpdir*)) "Download a file from URL into TMPDIR." diff --git a/src/utils/report.lisp b/src/utils/report.lisp index 104c24e..bc3e64f 100644 --- a/src/utils/report.lisp +++ b/src/utils/report.lisp @@ -65,7 +65,7 @@ (defmacro with-stats-collection ((table-name &key - (dbname (pgconn-dbname)) + dbname summary use-result-as-read use-result-as-rows diff --git a/src/utils/threads.lisp b/src/utils/threads.lisp index 69c78c0..0b625a1 100644 --- a/src/utils/threads.lisp +++ b/src/utils/threads.lisp @@ -11,16 +11,7 @@ (*copy-batch-rows* . ,*copy-batch-rows*) (*copy-batch-size* . ,*copy-batch-size*) (*concurrent-batches* . ,*concurrent-batches*) - (*pgconn* . ',*pgconn*) (*pg-settings* . ',*pg-settings*) - (*myconn-host* . ',*myconn-host*) - (*myconn-port* . ,*myconn-port*) - (*myconn-user* . ,*myconn-user*) - (*myconn-pass* . ,*myconn-pass*) - (*msconn-host* . ',*msconn-host*) - (*msconn-port* . ,*msconn-port*) - (*msconn-user* . ,*msconn-user*) - (*msconn-pass* . ,*msconn-pass*) (*state* . ,*state*) (*client-min-messages* . ,*client-min-messages*) (*log-min-messages* . ,*log-min-messages*)