diff --git a/src/load/copy-data.lisp b/src/load/copy-data.lisp index 5b19c11..bb209ea 100644 --- a/src/load/copy-data.lisp +++ b/src/load/copy-data.lisp @@ -9,6 +9,11 @@ (defmethod queue-raw-data ((copy copy) rawq concurrency) "Stream data as read by the map-queue method on the COPY argument into QUEUE, as given." + (log-message :notice "COPY ~a ~@[with ~d rows estimated~] [~a/~a]" + (format-table-name (target copy)) + (table-row-count-estimate (target copy)) + (lp:kernel-worker-index) + (lp:kernel-worker-count)) (log-message :debug "Reader started for ~a" (format-table-name (target copy))) (let* ((start-time (get-internal-real-time)) (row-count 0) @@ -93,7 +98,6 @@ (trivial-backtrace:print-backtrace condition :output nil)) (lp::invoke-transfer-error condition)))) - (log-message :notice "COPY ~a" table-name) ;; Check for Read Concurrency Support from our source (when (and multiple-readers (< 1 concurrency)) diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp index b7203d1..c12e050 100644 --- a/src/load/migrate-database.lisp +++ b/src/load/migrate-database.lisp @@ -255,6 +255,24 @@ (setf (catalog-distribution-rules catalog) (citus-distribute-schema catalog distribute)))) +(defun optimize-table-copy-ordering (catalog) + "Return a list of tables to copy over in optimized order" + (let ((table-list (copy-list (table-list catalog))) + (view-list (copy-list (view-list catalog)))) + ;; when materialized views are not supported, view-list is empty here + (cond + ((notevery #'zerop (mapcar #'table-row-count-estimate table-list)) + (let ((sorted-table-list + (sort table-list #'> :key #'table-row-count-estimate))) + (log-message :notice + "Processing tables in this order: ~{~a: ~d rows~^, ~}" + (loop :for table :in (append table-list view-list) + :collect (format-table-name table) + :collect (table-row-count-estimate table))) + (nconc sorted-table-list view-list))) + (t + (nconc table-list view-list))))) + ;;; ;;; Generic enough implementation of the copy-database method. @@ -414,10 +432,7 @@ (return-from copy-database))) (loop - :for table :in (append (table-list catalog) - ;; when materialized views are not supported, - ;; view-list is empty here - (view-list catalog)) + :for table :in (optimize-table-copy-ordering catalog) :do (let ((table-source (instanciate-table-copy-object copy table))) ;; first COPY the data from source to PostgreSQL, using copy-kernel @@ -472,8 +487,21 @@ (when (and create-indexes (zerop (gethash table writers-count))) - (log-message :notice "DONE copying ~a" - (format-table-name table)) + + (let* ((stats pgloader.monitor::*sections*) + (section (get-state-section stats :data)) + (table-stats (pgstate-get-label section table)) + (pprint-secs + (pgloader.state::format-interval seconds nil))) + ;; in CCL we have access to the *sections* dynamic + ;; binding from another thread, in SBCL we access + ;; an empty copy. + (log-message :notice + "DONE copying ~a in ~a~@[ for ~d rows~]" + (format-table-name table) + pprint-secs + (when table-stats + (pgtable-rows table-stats)))) (alexandria:appendf pkeys (create-indexes-in-kernel (target-db copy) diff --git a/src/package.lisp b/src/package.lisp index 65feb0e..21df0f6 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -96,6 +96,7 @@ #:table-comment #:table-storage-parameter-list #:table-tablespace + #:table-row-count-estimate #:table-field-list #:table-column-list #:table-index-list diff --git a/src/sources/common/api.lisp b/src/sources/common/api.lisp index 51f45a1..2c2ba3d 100644 --- a/src/sources/common/api.lisp +++ b/src/sources/common/api.lisp @@ -153,6 +153,9 @@ (defgeneric fetch-foreign-keys (catalog db-copy &key including excluding) (:documentation "Get the list of foreign keys from the source database.")) +(defgeneric fetch-table-row-count (catalog db-copy &key including excluding) + (:documentation "Retrieve and set the row count estimate for given tables.")) + (defgeneric fetch-comments (catalog db-copy &key including excluding) (:documentation "Get the list of comments from the source database.")) diff --git a/src/sources/mysql/mysql-schema.lisp b/src/sources/mysql/mysql-schema.lisp index 296963d..99603a5 100644 --- a/src/sources/mysql/mysql-schema.lisp +++ b/src/sources/mysql/mysql-schema.lisp @@ -155,6 +155,27 @@ :finally (return schema))) +;;; +;;; MySQL table row count estimate +;;; +(defmethod fetch-table-row-count ((schema schema) + (mysql copy-mysql) + &key + including + excluding) + "Retrieve and set the row count estimate for given MySQL tables." + (loop + :for (table-name count) + :in (mysql-query (sql "/mysql/list-table-rows.sql" + (db-name *connection*) + including ; do we print the clause? + including + excluding ; do we print the clause? + excluding)) + :do (let* ((table (find-table schema table-name))) + (when table + (setf (table-row-count-estimate table) (parse-integer count)))))) + ;;; ;;; Queries to get the MySQL comments. diff --git a/src/sources/mysql/mysql.lisp b/src/sources/mysql/mysql.lisp index 9b2826b..9331ec1 100644 --- a/src/sources/mysql/mysql.lisp +++ b/src/sources/mysql/mysql.lisp @@ -165,6 +165,11 @@ Illegal ~a character starting at position ~a~@[: ~a~].~%" :including including :excluding excluding) + ;; fetch tables row count estimate + (fetch-table-row-count schema mysql + :including including + :excluding excluding) + ;; fetch view (and their columns) metadata, covering comments too (let* ((view-names (unless (eq :all materialize-views) (mapcar #'matview-source-name materialize-views))) diff --git a/src/sources/mysql/sql/list-table-rows.sql b/src/sources/mysql/sql/list-table-rows.sql new file mode 100644 index 0000000..82f58c8 --- /dev/null +++ b/src/sources/mysql/sql/list-table-rows.sql @@ -0,0 +1,12 @@ +-- params: db-name +-- including +-- filter-list-to-where-clause incuding +-- excluding +-- filter-list-to-where-clause excluding + SELECT table_name, + cast(data_length/avg_row_length as integer) + FROM information_schema.tables + WHERE table_schema = '~a' + and table_type = 'BASE TABLE' + ~:[~*~;and (~{table_name ~a~^ or ~})~] + ~:[~*~;and (~{table_name ~a~^ and ~})~]; diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp index ccbb736..a74d676 100644 --- a/src/utils/catalog.lisp +++ b/src/utils/catalog.lisp @@ -49,10 +49,16 @@ (defstruct table source-name name schema oid comment storage-parameter-list tablespace + (row-count-estimate 0 :type fixnum) ;; field is for SOURCE + field-list ;; column is for TARGET + column-list + index-list + fkey-list + trigger-list ;; citus is an extra slot for citus support - field-list column-list index-list fkey-list trigger-list citus-rule) + citus-rule) (defstruct matview source-name name schema definition)