From 187565b181ab13ae6ad1bbcb7f5d5f644e03dd70 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sun, 11 Oct 2015 21:35:19 +0200 Subject: [PATCH] Add read/write separate stats. Add metrics to devise where the time is spent in current pgloader code so that it's possible to then optimize away the batch processing as we do it today. Given the following extract of the measures, it seems that doing the data transformations in the reader thread isn't so bright an idea. More to come. table name total time read write ----------------- -------------- --------- --------- extract 2.014s before load 0.050s fetch 0.000s ----------------- -------------- --------- --------- geolite.location 16.090s 15.933s 5.732s geolite.blocks 28.896s 28.795s 5.312s ----------------- -------------- --------- --------- after load 37.772s ----------------- -------------- --------- --------- Total import time 1m25.082s 44.728s 11.044s --- src/pgsql/pgsql.lisp | 7 ++++-- src/queue.lisp | 13 +++++++---- src/utils/monitor.lisp | 4 +++- src/utils/report.lisp | 51 +++++++++++++++++++++++++----------------- src/utils/state.lisp | 30 +++++++++++++++++++++---- 5 files changed, 74 insertions(+), 31 deletions(-) diff --git a/src/pgsql/pgsql.lisp b/src/pgsql/pgsql.lisp index 42464a6..10484cc 100644 --- a/src/pgsql/pgsql.lisp +++ b/src/pgsql/pgsql.lisp @@ -60,14 +60,17 @@ (loop for (mesg batch read oversized?) = (lq:pop-queue queue) until (eq mesg :end-of-data) - for rows = (copy-batch unqualified-table-name columns batch read) + for (rows ws) = (multiple-value-bind (result secs) + (timing + (copy-batch unqualified-table-name columns batch read)) + (list result secs)) do (progn ;; The SBCL implementation needs some Garbage Collection ;; decision making help... and now is a pretty good time. #+sbcl (when oversized? (sb-ext:gc :full t)) (log-message :debug "copy-batch ~a ~d row~:p~:[~; [oversized]~]" unqualified-table-name rows oversized?) - (update-stats :data table-name :rows rows))) + (update-stats :data table-name :rows rows :ws ws))) (when disable-triggers (enable-triggers unqualified-table-name))))) diff --git a/src/queue.lisp b/src/queue.lisp index b6cf10e..e66c99e 100644 --- a/src/queue.lisp +++ b/src/queue.lisp @@ -9,6 +9,7 @@ ;;; pushes the data down to PostgreSQL using the COPY protocol. ;;; (defstruct batch + (start (get-internal-real-time) :type fixnum) (data (make-array *copy-batch-rows* :element-type 'simple-string) :type (vector simple-string *)) (count 0 :type fixnum) @@ -33,8 +34,10 @@ (when (or (= (batch-count *current-batch*) *copy-batch-rows*) oversized?) ;; close current batch, prepare next one - (with-slots (data count bytes) *current-batch* - (update-stats :data (target copy) :read count) + (with-slots (start data count bytes) *current-batch* + (update-stats :data (target copy) + :read count + :rs (pgloader.monitor::elapsed-time-since start)) (lq:push-queue (list :batch data count oversized?) queue)) (setf *current-batch* (make-batch)))) @@ -69,10 +72,12 @@ pre-formatted))) ;; we might have the last batch to send over now - (with-slots (data count) *current-batch* + (with-slots (start data count) *current-batch* (when (< 0 count) (log-message :debug "Sending last batch (~d rows)" count) - (update-stats :data (target copy) :read count) + (update-stats :data (target copy) + :read count + :rs (pgloader.monitor::elapsed-time-since start)) (lq:push-queue (list :batch data count nil) queue)))) ;; signal we're done diff --git a/src/utils/monitor.lisp b/src/utils/monitor.lisp index 6f08b1e..aa342cc 100644 --- a/src/utils/monitor.lisp +++ b/src/utils/monitor.lisp @@ -219,7 +219,9 @@ (update-stats-label event) :read (update-stats-read event) :rows (update-stats-rows event) - :secs (update-stats-secs event))) + :secs (update-stats-secs event) + :rs (update-stats-rs event) + :ws (update-stats-ws event))) (bad-row (%process-bad-row (bad-row-label event) diff --git a/src/utils/report.lisp b/src/utils/report.lisp index f3df033..6e63bc2 100644 --- a/src/utils/report.lisp +++ b/src/utils/report.lisp @@ -5,14 +5,14 @@ (in-package :pgloader.state) (defvar *header-line* - "~&~v@{~A~:*~} --------- --------- --------- --------------") + "~&~v@{~A~:*~} --------- --------- --------- -------------- --------- ---------") (defvar *header* "~&") (defvar *footer* "~&") (defvar *end-of-line-format* "~%") (defvar *max-length-table-name* 30) (defvar *header-tname-format* "~&~v@a") -(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~14@a") +(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~14@a ~@[~9@a~] ~@[~9@a~]") (defvar *header-cols-format* (concatenate 'string *header-tname-format* *header-stats-format*)) (defvar *header-cols-names* '("table name" "read" "imported" "errors" "time")) @@ -22,12 +22,13 @@ (:header "~&" :footer "~%" :end-of-line-format "~%" - :header-line "~&~v@{~A~:*~} --------- --------- --------- --------------" + :header-line "~&~v@{~A~:*~} --------- --------- --------- -------------- --------- ---------" :header-tname-format "~&~v@a" - :header-stats-format " ~9@a ~9@a ~9@a ~14@a" - :header-cols-format "~&~v@a ~9@a ~9@a ~9@a ~14@a" - :header-cols-names ("table name" "read" "imported" "errors" "time"))) + :header-stats-format " ~9@a ~9@a ~9@a ~14@a ~@[~9@a~] ~@[~9@a~]" + :header-cols-format "~&~v@a ~9@a ~9@a ~9@a ~14@a ~9@a ~9@a" + :header-cols-names ("table name" "read" "imported" "errors" + "total time" "read" "write"))) (:csv (:header "~&" @@ -35,8 +36,8 @@ :end-of-line-format "~%" :header-line "~*~*" :header-tname-format "~&~*~s;" - :header-stats-format "~s;~s;~s;~s" - :header-cols-format "~&~*~s;~s;~s;~s;~s" + :header-stats-format "~s;~s;~s;~s~*~*" + :header-cols-format "~&~*~s;~s;~s;~s;~s;~s;~s" :header-cols-names ("table name" "read" "imported" "errors" "time"))) (:copy @@ -45,7 +46,7 @@ :end-of-line-format "~%" :header-line "~&~*~*" :header-tname-format "~&~*~a " - :header-stats-format "~s ~s ~s ~s" + :header-stats-format "~s ~s ~s ~s~*~*" :header-cols-format "~*~*~*~*~*~*" ; skip it :header-cols-names ("table name" "read" "imported" "errors" "time"))) @@ -55,7 +56,7 @@ :end-of-line-format ",~%" :header-line "~&~*~*" :header-tname-format "~& {\"table-name\": ~*~s," - :header-stats-format "\"read\":~s,\"imported\":~s,\"errors\":~s,\"time\":~s}" + :header-stats-format "\"read\":~s,\"imported\":~s,\"errors\":~s,\"time\":~s~@[,\"read\":~s~]~@[,\"write\":~s~]}" :header-cols-format "~*~*~*~*~*~*" ; skip it :header-cols-names ("table name" "read" "imported" "errors" "time"))))) @@ -102,31 +103,38 @@ *header-tname-format* *max-length-table-name* table-name)) -(defun report-results (read rows errors seconds &optional (eol t)) - (format *report-stream* *header-stats-format* read rows errors seconds) +(defun report-results (read rows errors seconds rs ws &optional (eol t)) + (format *report-stream* *header-stats-format* read rows errors seconds rs ws) (when eol (format *report-stream* *end-of-line-format*))) -(defun report-footer (legend read rows errors seconds) +(defun report-footer (legend read rows errors seconds &optional rs ws) (format *report-stream* *header-line* *max-length-table-name* "-") (format *report-stream* "~{~}" *header-tname-format* (list* *max-length-table-name* (list legend))) - (report-results read rows errors (format-interval seconds nil) nil) + (report-results read rows errors + (format-interval seconds nil) + (when (and rs (not (= rs 0.0))) (format-interval rs nil)) + (when (and ws (not (= rs 0.0))) (format-interval ws nil)) + nil) (format *report-stream* *footer*)) ;;; ;;; Pretty print a report from a pgtable and pgstats counters ;;; (defun report-pgtable-stats (pgstate name) - (with-slots (read rows errs secs) (pgstate-get-table pgstate name) - (report-results read rows errs (format-interval secs nil)))) + (with-slots (read rows errs secs rs ws) (pgstate-get-label pgstate name) + (report-results read rows errs + (format-interval secs nil) + (when (and rs (not (= rs 0.0))) (format-interval rs nil)) + (when (and ws (not (= ws 0.0))) (format-interval ws nil))))) (defun report-pgstate-stats (pgstate legend) - (with-slots (read rows errs secs) pgstate - (report-footer legend read rows errs secs))) + (with-slots (read rows errs secs rs ws) pgstate + (report-footer legend read rows errs secs rs ws))) ;;; ;;; Pretty print the whole summary from a state @@ -138,12 +146,15 @@ for table-name in (reverse (pgstate-tabnames pgstate)) for pgtable = (gethash table-name (pgstate-tables pgstate)) do - (with-slots (read rows errs secs) pgtable + (with-slots (read rows errs secs rs ws) pgtable (format *report-stream* *header-tname-format* *max-length-table-name* (format-table-name table-name)) - (report-results read rows errs (format-interval secs nil))) + (report-results read rows errs + (format-interval secs nil) + (when (and rs (not (= rs 0.0))) (format-interval rs nil)) + (when (and ws (not (= ws 0.0))) (format-interval ws nil)))) finally (when footer (report-pgstate-stats pgstate footer)))) diff --git a/src/utils/state.lisp b/src/utils/state.lisp index 46232b7..e2a35f2 100644 --- a/src/utils/state.lisp +++ b/src/utils/state.lisp @@ -14,6 +14,8 @@ (rows 0 :type fixnum) ; how many rows did we write (errs 0 :type fixnum) ; how many errors did we see (secs 0.0 :type float) ; how many seconds did it take + (rs 0.0 :type float) ; seconds spent reading + (ws 0.0 :type float) ; seconds spent writing reject-data reject-logs) ; files where to find reject data (defstruct pgstate @@ -22,7 +24,9 @@ (read 0 :type fixnum) (rows 0 :type fixnum) (errs 0 :type fixnum) - (secs 0.0 :type float)) + (secs 0.0 :type float) + (rs 0.0 :type float) + (ws 0.0 :type float)) (defun format-table-name (table-name) "TABLE-NAME might be a CONS of a schema name and a table name." @@ -89,7 +93,7 @@ pgtable))) -(defun pgstate-setf (pgstate name &key read rows errs secs) +(defun pgstate-setf (pgstate name &key read rows errs secs rs ws) (let ((pgtable (pgstate-get-label pgstate name))) (when read (setf (pgtable-read pgtable) read) @@ -103,9 +107,15 @@ (when secs (setf (pgtable-secs pgtable) secs) (incf (pgstate-secs pgstate) secs)) + (when rs + (setf (pgtable-rs pgtable) rs) + (incf (pgstate-rs pgstate) rs)) + (when ws + (setf (pgtable-ws pgtable) ws) + (incf (pgstate-ws pgstate) ws)) pgtable)) -(defun pgstate-incf (pgstate name &key read rows errs secs) +(defun pgstate-incf (pgstate name &key read rows errs secs rs ws) (let ((pgtable (pgstate-get-label pgstate name))) (when read (incf (pgtable-read pgtable) read) @@ -119,9 +129,15 @@ (when secs (incf (pgtable-secs pgtable) secs) (incf (pgstate-secs pgstate) secs)) + (when rs + (incf (pgtable-rs pgtable) rs) + (incf (pgstate-rs pgstate) rs)) + (when ws + (incf (pgtable-ws pgtable) ws) + (incf (pgstate-ws pgstate) ws)) pgtable)) -(defun pgstate-decf (pgstate name &key read rows errs secs) +(defun pgstate-decf (pgstate name &key read rows errs secs rs ws) (let ((pgtable (pgstate-get-label pgstate name))) (when read (decf (pgtable-read pgtable) read) @@ -132,6 +148,12 @@ (when errs (decf (pgtable-errs pgtable) errs) (decf (pgstate-errs pgstate) errs)) + (when rs + (decf (pgtable-rs pgtable) rs) + (decf (pgstate-rs pgstate) rs)) + (when ws + (decf (pgtable-ws pgtable) ws) + (decf (pgstate-ws pgstate) ws)) (when secs (decf (pgtable-secs pgtable) secs) (decf (pgstate-secs pgstate) secs))