diff --git a/pgloader.1.md b/pgloader.1.md index 0288bee..5180757 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -571,7 +571,7 @@ The `database` command accepts the following clauses and options: When loading from a `MySQL` database, the following options are supported: - - *drop table* + - *include drop* When this option is listed, pgloader drop in the PostgreSQL connection all the table whose names have been found in the MySQL @@ -579,10 +579,19 @@ The `database` command accepts the following clauses and options: times in a row until you figure out all the options, starting automatically from a clean environment. + - *include no drop* + + When this option is listed, pgloader will not include any `DROP` + statement when loading the data. + - *truncate* When this option is listed, pgloader issue the `TRUNCATE` command against each PostgreSQL table just before loading data into it. + + - *no truncate* + + When this topion is listed, pgloader issues no `TRUNCATE` command. - *create tables* @@ -591,24 +600,48 @@ The `database` command accepts the following clauses and options: fields with their data type. A standard data type conversion from DBF to PostgreSQL is done. + - *create no tables* + + When this option is listed, pgloader skips the creation of table + before lading data, target tables must then already exist. + - *create indexes* When this option is listed, pgloader gets the definitions of all the indexes found in the MySQL database and create the same set of index definitions against the PostgreSQL database. + - *create no indexes* + + When this option is listed, pgloader skips the creating indexes. + - *foreign keys* When this option is listed, pgloader gets the definitions of all the foreign keys found in the MySQL database and create the same set of foreign key definitions against the PostgreSQL database. + - *no foreign keys* + + When this option is listed, pgloader skips creating foreign keys. + - *reset sequences* When this option is listed, at the end of the data loading and after the indexes have all been created, pgloader resets all the PostgreSQL sequences created to the current maximum value of the column they are attached to. + + The options *schema-only* and *data-ony* have no effects on this + option. + + - *reset no sequences* + + When this option is listed, pgloader skips resetting sequences after + the load. + + The options *schema-only* and *data-ony* have no effects on this + option. - *downcase identifiers* @@ -627,9 +660,14 @@ The `database` command accepts the following clauses and options: - *schema only* - When this option is listed pgloader will refrain from migrating the - data over. Note that the schema in this context includes the indexes - when the option *create indexes* has been listed. + When this option is listed pgloader refrains from migrating the data + over. Note that the schema in this context includes the indexes when + the option *create indexes* has been listed. + + - *data-only* + + When this option is listed pgloader only issues the `COPY` + statements, without doing any other processing. - *SET* @@ -846,7 +884,7 @@ The `sqlite` command accepts the following clauses and options: When loading from a `SQLite` database, the following options are supported: - - *drop table* + - *include drop* When this option is listed, pgloader drop in the PostgreSQL connection all the table whose names have been found in the SQLite @@ -854,11 +892,20 @@ The `sqlite` command accepts the following clauses and options: times in a row until you figure out all the options, starting automatically from a clean environment. + - *include no drop* + + When this option is listed, pgloader will not include any `DROP` + statement when loading the data. + - *truncate* When this option is listed, pgloader issue the `TRUNCATE` command against each PostgreSQL table just before loading data into it. + - *no truncate* + + When this topion is listed, pgloader issues no `TRUNCATE` command. + - *create tables* When this option is listed, pgloader creates the table using the @@ -866,12 +913,21 @@ The `sqlite` command accepts the following clauses and options: fields with their data type. A standard data type conversion from DBF to PostgreSQL is done. + - *create no tables* + + When this option is listed, pgloader skips the creation of table + before lading data, target tables must then already exist. + - *create indexes* When this option is listed, pgloader gets the definitions of all the indexes found in the SQLite database and create the same set of index definitions against the PostgreSQL database. + - *create no indexes* + + When this option is listed, pgloader skips the creating indexes. + - *reset sequences* When this option is listed, at the end of the data loading and after @@ -879,12 +935,25 @@ The `sqlite` command accepts the following clauses and options: PostgreSQL sequences created to the current maximum value of the column they are attached to. + - *reset no sequences* + + When this option is listed, pgloader skips resetting sequences after + the load. + + The options *schema-only* and *data-ony* have no effects on this + option. + - *schema only* When this option is listed pgloader will refrain from migrating the data over. Note that the schema in this context includes the indexes when the option *create indexes* has been listed. + - *data-only* + + When this option is listed pgloader only issues the `COPY` + statements, without doing any other processing. + - *SET* This clause allows to specify session parameters to be set for all the diff --git a/src/parser.lisp b/src/parser.lisp index 2e0fae3..1825f60 100644 --- a/src/parser.lisp +++ b/src/parser.lisp @@ -74,9 +74,11 @@ (def-keyword-rule "column") (def-keyword-rule "type") (def-keyword-rule "extra") + (def-keyword-rule "include") (def-keyword-rule "drop") (def-keyword-rule "not") (def-keyword-rule "to") + (def-keyword-rule "no") (def-keyword-rule "null") (def-keyword-rule "default") (def-keyword-rule "using") @@ -426,26 +428,33 @@ (declare (ignore w e)) (cons :workers (parse-integer (text nb)))))) -(defrule option-drop-tables (and kw-drop kw-tables) - (:constant (cons :include-drop t))) +(defmacro make-option-rule (name rule &optional option) + "Generates a rule named NAME to parse RULE and return OPTION." + (let* ((bindings + (loop for element in rule + unless (member element '(and or)) + collect (if (and (typep element 'list) + (eq '? (car element))) 'no (gensym)))) + (ignore (loop for b in bindings unless (eq 'no b) collect b)) + (option-name (intern (string-upcase (format nil "option-~a" name)))) + (option (or option (intern (symbol-name name) :keyword)))) + `(defrule ,option-name ,rule + (:destructure ,bindings + (declare (ignore ,@ignore)) + (cons ,option (null no)))))) -(defrule option-truncate (and kw-truncate) - (:constant (cons :truncate t))) +(make-option-rule include-drop (and kw-include (? kw-no) kw-drop)) +(make-option-rule truncate (and (? kw-no) kw-truncate)) +(make-option-rule create-tables (and kw-create (? kw-no) kw-tables)) +(make-option-rule create-indexes (and kw-create (? kw-no) kw-indexes)) +(make-option-rule reset-sequences (and kw-reset (? kw-no) kw-sequences)) +(make-option-rule foreign-keys (and (? kw-no) kw-foreign kw-keys)) (defrule option-schema-only (and kw-schema kw-only) (:constant (cons :schema-only t))) -(defrule option-create-tables (and kw-create kw-tables) - (:constant (cons :create-tables t))) - -(defrule option-create-indexes (and kw-create kw-indexes) - (:constant (cons :create-indexes t))) - -(defrule option-reset-sequences (and kw-reset kw-sequences) - (:constant (cons :reset-sequences t))) - -(defrule option-foreign-keys (and kw-foreign kw-keys) - (:constant (cons :foreign-keys t))) +(defrule option-data-only (and kw-data kw-only) + (:constant (cons :data-only t))) (defrule option-identifiers-case (and (or kw-downcase kw-quote) kw-identifiers) (:lambda (id-case) @@ -455,8 +464,9 @@ (defrule mysql-option (or option-workers option-truncate + option-data-only option-schema-only - option-drop-tables + option-include-drop option-create-tables option-create-indexes option-reset-sequences @@ -783,7 +793,7 @@ load database |# (defrule sqlite-option (or option-truncate option-schema-only - option-drop-tables + option-include-drop option-create-tables option-create-indexes option-reset-sequences)) diff --git a/src/pgsql/pgsql.lisp b/src/pgsql/pgsql.lisp index a7f931a..c7b59c1 100644 --- a/src/pgsql/pgsql.lisp +++ b/src/pgsql/pgsql.lisp @@ -66,7 +66,9 @@ ((:state *state*) *state*) transforms) "Fetch data from the QUEUE until we see :end-of-data. Update *state*" - (when truncate (truncate-table dbname table-name)) + (when truncate + (log-message :notice "TRUNCATE ~a.~a;" dbname table-name) + (truncate-table dbname table-name)) (log-message :debug "pgsql:copy-from-queue: ~a ~a ~a" dbname table-name columns) diff --git a/src/pgsql/queries.lisp b/src/pgsql/queries.lisp index b5c48f1..115f41c 100644 --- a/src/pgsql/queries.lisp +++ b/src/pgsql/queries.lisp @@ -71,6 +71,7 @@ (defun truncate-table (dbname table-name) "Truncate given TABLE-NAME in database DBNAME" (pomo:with-connection (get-connection-spec dbname) + (set-session-gucs *pg-settings*) (pomo:execute (format nil "truncate ~a;" table-name)))) (defun list-databases (&optional (username "postgres")) diff --git a/src/pgsql/schema.lisp b/src/pgsql/schema.lisp index ac3a6d7..0d4ab45 100644 --- a/src/pgsql/schema.lisp +++ b/src/pgsql/schema.lisp @@ -170,6 +170,7 @@ :if-not-exists if-not-exists :identifier-case identifier-case :include-drop include-drop) + when sql do (pgsql-execute sql :client-min-messages client-min-messages) finally (return nb-tables))) diff --git a/src/sources/mysql.lisp b/src/sources/mysql.lisp index 2c84774..ed98596 100644 --- a/src/sources/mysql.lisp +++ b/src/sources/mysql.lisp @@ -190,6 +190,7 @@ state-after state-indexes truncate + data-only schema-only create-tables include-drop @@ -229,7 +230,7 @@ (lp:make-channel))))) ;; if asked, first drop/create the tables on the PostgreSQL side - (when create-tables + (when (and (or create-tables schema-only) (not data-only)) (log-message :notice "~:[~;DROP then ~]CREATE TABLES" include-drop) (with-stats-collection (pg-dbname "create, drop" :use-result-as-rows t @@ -269,7 +270,7 @@ ;; index build requires much more time than the others our ;; index build might get unsync: indexes for different tables ;; will get built in parallel --- not a big problem. - (when create-indexes + (when (and create-indexes (not data-only)) (let* ((indexes (cdr (assoc table-name all-indexes :test #'string=)))) (create-indexes-in-kernel pg-dbname table-name indexes @@ -278,23 +279,28 @@ :include-drop include-drop :identifier-case identifier-case))))) - ;; don't forget to reset sequences, but only when we did actually import - ;; the data. - (when (and (not schema-only) reset-sequences) - (reset-sequences all-columns - :dbname pg-dbname - :state state-after - :identifier-case identifier-case)) - ;; now end the kernels (let ((lp:*kernel* copy-kernel)) (lp:end-kernel)) (let ((lp:*kernel* idx-kernel)) ;; wait until the indexes are done being built... ;; don't forget accounting for that waiting time. - (with-stats-collection (pg-dbname "Index Build Completion" :state *state*) - (loop for idx in all-indexes do (lp:receive-result idx-channel))) + (when (and create-indexes (not data-only)) + (with-stats-collection (pg-dbname "Index Build Completion" :state *state*) + (loop for idx in all-indexes do (lp:receive-result idx-channel)))) (lp:end-kernel)) + ;; + ;; Now Reset Sequences, the good time to do that is once the whole data + ;; has been imported and once we have the indexes in place, as max() is + ;; able to benefit from the indexes. In particular avoid doing that step + ;; while CREATE INDEX statements are in flight (avoid locking). + ;; + (when reset-sequences + (reset-sequences all-columns + :dbname pg-dbname + :state state-after + :identifier-case identifier-case)) + ;; ;; Foreign Key Constraints ;; @@ -302,7 +308,7 @@ ;; tables to be able to build the foreign keys, so wait until all tables ;; and indexes are imported before doing that. ;; - (when foreign-keys + (when (and foreign-keys (not data-only)) (create-fkeys all-fkeys :dbname pg-dbname :state state-after diff --git a/src/sources/sources.lisp b/src/sources/sources.lisp index cf1d609..394b4f7 100644 --- a/src/sources/sources.lisp +++ b/src/sources/sources.lisp @@ -61,6 +61,7 @@ (defgeneric copy-database (source &key truncate + data-only schema-only create-tables include-drop diff --git a/src/sources/sqlite.lisp b/src/sources/sqlite.lisp index ab81a9c..dc00282 100644 --- a/src/sources/sqlite.lisp +++ b/src/sources/sqlite.lisp @@ -189,6 +189,7 @@ &key state-before truncate + data-only schema-only create-tables include-drop @@ -222,7 +223,7 @@ (pg-dbname (target-db sqlite))) ;; if asked, first drop/create the tables on the PostgreSQL side - (when create-tables + (when (and (or create-tables schema-only) (not data-only)) (log-message :notice "~:[~;DROP then ~]CREATE TABLES" include-drop) (with-stats-collection (pg-dbname "create, truncate" :state state-before @@ -232,8 +233,6 @@ (loop for (table-name . columns) in all-columns - when (or (null only-tables) - (member table-name only-tables :test #'equal)) do (let ((table-source (make-instance 'copy-sqlite @@ -255,7 +254,7 @@ ;; index build requires much more time than the others our ;; index build might get unsync: indexes for different tables ;; will get built in parallel --- not a big problem. - (when create-indexes + (when (and create-indexes (not data-only)) (let* ((indexes (cdr (assoc table-name all-indexes :test #'string=)))) (create-indexes-in-kernel pg-dbname indexes @@ -265,7 +264,7 @@ ;; don't forget to reset sequences, but only when we did actually import ;; the data. - (when (and (not schema-only) reset-sequences) + (when reset-sequences (let ((tables (or only-tables (mapcar #'car all-columns)))) (log-message :notice "Reset sequences") @@ -279,8 +278,9 @@ (let ((lp:*kernel* idx-kernel)) ;; wait until the indexes are done being built... ;; don't forget accounting for that waiting time. - (with-stats-collection (pg-dbname "index build completion" :state *state*) - (loop for idx in all-indexes do (lp:receive-result idx-channel))) + (when (and create-indexes (not data-only)) + (with-stats-collection (pg-dbname "index build completion" :state *state*) + (loop for idx in all-indexes do (lp:receive-result idx-channel)))) (lp:end-kernel)) ;; and report the total time spent on the operation diff --git a/src/utils.lisp b/src/utils.lisp index 19bebfc..ed46c78 100644 --- a/src/utils.lisp +++ b/src/utils.lisp @@ -85,7 +85,11 @@ (< 0 days) days (< 0 hours) hours (< 0 mins) mins - (+ secs (- seconds secs))))) + (+ secs (- (multiple-value-bind (r q) + (truncate seconds 60) + (declare (ignore r)) + q) + secs))))) ;;; ;;; Data Structures to maintain information about loading state @@ -192,7 +196,7 @@ ;;; Pretty print a report while doing bulk operations ;;; (defvar *header-line* - "~&------------------------------ --------- --------- --------- ---------") + "~&------------------------------ --------- --------- --------- --------------") (defvar *header-tname-format* "~&~30@a") (defvar *header-stats-format* " ~9@a ~9@a ~9@a ~14@a") @@ -287,6 +291,14 @@ (+ (if before (pgloader.utils::pgstate-secs before) 0) (if finally (pgloader.utils::pgstate-secs finally) 0))) + ;; if the parallel tasks took longer than the rest cumulated, the total + ;; waiting time actually was parallel - before + (when (< (pgloader.utils::pgstate-secs state) + (pgloader.utils::pgstate-secs parallel)) + (setf (pgloader.utils::pgstate-secs state) + (- (pgloader.utils::pgstate-secs parallel) + (pgloader.utils::pgstate-secs before)))) + ;; and report the Grand Total (report-pgstate-stats state legend)) diff --git a/test/sakila.load b/test/sakila.load index 304384b..ffe9552 100644 --- a/test/sakila.load +++ b/test/sakila.load @@ -2,7 +2,8 @@ load database from mysql://root@localhost/sakila into postgresql://localhost:54393/sakila - WITH drop tables, create tables, create indexes, reset sequences, foreign keys + WITH include drop, create tables, no truncate, + create indexes, reset sequences, foreign keys SET maintenance_work_mem to '128MB', work_mem to '12MB', search_path to 'sakila' diff --git a/test/sqlite.load b/test/sqlite.load index 89bbac6..1cecc0b 100644 --- a/test/sqlite.load +++ b/test/sqlite.load @@ -1,7 +1,7 @@ load database from sqlite:///Users/dim/Downloads/lastfm_tags.db - into postgresql://127.0.0.1:54393/tags?tids + into postgresql://127.0.0.1:54393/tags - with drop tables, create tables, create indexes, reset sequences + with include drop, create tables, create indexes, reset sequences - set work_mem to '16MB', maintenance_work_mem to '512 MB'; + set work_mem to '16MB', maintenance_work_mem to '512 MB'; \ No newline at end of file