Add read/write separate stats.

Add metrics to devise where the time is spent in current pgloader code
so that it's possible to then optimize away the batch processing as we
do it today.

Given the following extract of the measures, it seems that doing the
data transformations in the reader thread isn't so bright an idea. More
to come.

          table name         total time       read     write
   -----------------     --------------  --------- ---------
             extract             2.014s
         before load             0.050s
               fetch             0.000s
   -----------------     --------------  --------- ---------
    geolite.location            16.090s    15.933s    5.732s
      geolite.blocks            28.896s    28.795s    5.312s
   -----------------     --------------  --------- ---------
          after load            37.772s
   -----------------     --------------  --------- ---------
   Total import time          1m25.082s    44.728s   11.044s
This commit is contained in:
Dimitri Fontaine 2015-10-11 21:35:19 +02:00
parent c3726ce07a
commit 187565b181
5 changed files with 74 additions and 31 deletions

View File

@ -60,14 +60,17 @@
(loop
for (mesg batch read oversized?) = (lq:pop-queue queue)
until (eq mesg :end-of-data)
for rows = (copy-batch unqualified-table-name columns batch read)
for (rows ws) = (multiple-value-bind (result secs)
(timing
(copy-batch unqualified-table-name columns batch read))
(list result secs))
do (progn
;; The SBCL implementation needs some Garbage Collection
;; decision making help... and now is a pretty good time.
#+sbcl (when oversized? (sb-ext:gc :full t))
(log-message :debug "copy-batch ~a ~d row~:p~:[~; [oversized]~]"
unqualified-table-name rows oversized?)
(update-stats :data table-name :rows rows)))
(update-stats :data table-name :rows rows :ws ws)))
(when disable-triggers (enable-triggers unqualified-table-name)))))

View File

@ -9,6 +9,7 @@
;;; pushes the data down to PostgreSQL using the COPY protocol.
;;;
(defstruct batch
(start (get-internal-real-time) :type fixnum)
(data (make-array *copy-batch-rows* :element-type 'simple-string)
:type (vector simple-string *))
(count 0 :type fixnum)
@ -33,8 +34,10 @@
(when (or (= (batch-count *current-batch*) *copy-batch-rows*)
oversized?)
;; close current batch, prepare next one
(with-slots (data count bytes) *current-batch*
(update-stats :data (target copy) :read count)
(with-slots (start data count bytes) *current-batch*
(update-stats :data (target copy)
:read count
:rs (pgloader.monitor::elapsed-time-since start))
(lq:push-queue (list :batch data count oversized?) queue))
(setf *current-batch* (make-batch))))
@ -69,10 +72,12 @@
pre-formatted)))
;; we might have the last batch to send over now
(with-slots (data count) *current-batch*
(with-slots (start data count) *current-batch*
(when (< 0 count)
(log-message :debug "Sending last batch (~d rows)" count)
(update-stats :data (target copy) :read count)
(update-stats :data (target copy)
:read count
:rs (pgloader.monitor::elapsed-time-since start))
(lq:push-queue (list :batch data count nil) queue))))
;; signal we're done

View File

@ -219,7 +219,9 @@
(update-stats-label event)
:read (update-stats-read event)
:rows (update-stats-rows event)
:secs (update-stats-secs event)))
:secs (update-stats-secs event)
:rs (update-stats-rs event)
:ws (update-stats-ws event)))
(bad-row
(%process-bad-row (bad-row-label event)

View File

@ -5,14 +5,14 @@
(in-package :pgloader.state)
(defvar *header-line*
"~&~v@{~A~:*~} --------- --------- --------- --------------")
"~&~v@{~A~:*~} --------- --------- --------- -------------- --------- ---------")
(defvar *header* "~&")
(defvar *footer* "~&")
(defvar *end-of-line-format* "~%")
(defvar *max-length-table-name* 30)
(defvar *header-tname-format* "~&~v@a")
(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~14@a")
(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~14@a ~@[~9@a~] ~@[~9@a~]")
(defvar *header-cols-format* (concatenate 'string *header-tname-format*
*header-stats-format*))
(defvar *header-cols-names* '("table name" "read" "imported" "errors" "time"))
@ -22,12 +22,13 @@
(:header "~&"
:footer "~%"
:end-of-line-format "~%"
:header-line "~&~v@{~A~:*~} --------- --------- --------- --------------"
:header-line "~&~v@{~A~:*~} --------- --------- --------- -------------- --------- ---------"
:header-tname-format "~&~v@a"
:header-stats-format " ~9@a ~9@a ~9@a ~14@a"
:header-cols-format "~&~v@a ~9@a ~9@a ~9@a ~14@a"
:header-cols-names ("table name" "read" "imported" "errors" "time")))
:header-stats-format " ~9@a ~9@a ~9@a ~14@a ~@[~9@a~] ~@[~9@a~]"
:header-cols-format "~&~v@a ~9@a ~9@a ~9@a ~14@a ~9@a ~9@a"
:header-cols-names ("table name" "read" "imported" "errors"
"total time" "read" "write")))
(:csv
(:header "~&"
@ -35,8 +36,8 @@
:end-of-line-format "~%"
:header-line "~*~*"
:header-tname-format "~&~*~s;"
:header-stats-format "~s;~s;~s;~s"
:header-cols-format "~&~*~s;~s;~s;~s;~s"
:header-stats-format "~s;~s;~s;~s~*~*"
:header-cols-format "~&~*~s;~s;~s;~s;~s;~s;~s"
:header-cols-names ("table name" "read" "imported" "errors" "time")))
(:copy
@ -45,7 +46,7 @@
:end-of-line-format "~%"
:header-line "~&~*~*"
:header-tname-format "~&~*~a "
:header-stats-format "~s ~s ~s ~s"
:header-stats-format "~s ~s ~s ~s~*~*"
:header-cols-format "~*~*~*~*~*~*" ; skip it
:header-cols-names ("table name" "read" "imported" "errors" "time")))
@ -55,7 +56,7 @@
:end-of-line-format ",~%"
:header-line "~&~*~*"
:header-tname-format "~& {\"table-name\": ~*~s,"
:header-stats-format "\"read\":~s,\"imported\":~s,\"errors\":~s,\"time\":~s}"
:header-stats-format "\"read\":~s,\"imported\":~s,\"errors\":~s,\"time\":~s~@[,\"read\":~s~]~@[,\"write\":~s~]}"
:header-cols-format "~*~*~*~*~*~*" ; skip it
:header-cols-names ("table name" "read" "imported" "errors" "time")))))
@ -102,31 +103,38 @@
*header-tname-format*
*max-length-table-name* table-name))
(defun report-results (read rows errors seconds &optional (eol t))
(format *report-stream* *header-stats-format* read rows errors seconds)
(defun report-results (read rows errors seconds rs ws &optional (eol t))
(format *report-stream* *header-stats-format* read rows errors seconds rs ws)
(when eol
(format *report-stream* *end-of-line-format*)))
(defun report-footer (legend read rows errors seconds)
(defun report-footer (legend read rows errors seconds &optional rs ws)
(format *report-stream* *header-line* *max-length-table-name* "-")
(format *report-stream*
"~{~}"
*header-tname-format*
(list* *max-length-table-name*
(list legend)))
(report-results read rows errors (format-interval seconds nil) nil)
(report-results read rows errors
(format-interval seconds nil)
(when (and rs (not (= rs 0.0))) (format-interval rs nil))
(when (and ws (not (= rs 0.0))) (format-interval ws nil))
nil)
(format *report-stream* *footer*))
;;;
;;; Pretty print a report from a pgtable and pgstats counters
;;;
(defun report-pgtable-stats (pgstate name)
(with-slots (read rows errs secs) (pgstate-get-table pgstate name)
(report-results read rows errs (format-interval secs nil))))
(with-slots (read rows errs secs rs ws) (pgstate-get-label pgstate name)
(report-results read rows errs
(format-interval secs nil)
(when (and rs (not (= rs 0.0))) (format-interval rs nil))
(when (and ws (not (= ws 0.0))) (format-interval ws nil)))))
(defun report-pgstate-stats (pgstate legend)
(with-slots (read rows errs secs) pgstate
(report-footer legend read rows errs secs)))
(with-slots (read rows errs secs rs ws) pgstate
(report-footer legend read rows errs secs rs ws)))
;;;
;;; Pretty print the whole summary from a state
@ -138,12 +146,15 @@
for table-name in (reverse (pgstate-tabnames pgstate))
for pgtable = (gethash table-name (pgstate-tables pgstate))
do
(with-slots (read rows errs secs) pgtable
(with-slots (read rows errs secs rs ws) pgtable
(format *report-stream*
*header-tname-format*
*max-length-table-name*
(format-table-name table-name))
(report-results read rows errs (format-interval secs nil)))
(report-results read rows errs
(format-interval secs nil)
(when (and rs (not (= rs 0.0))) (format-interval rs nil))
(when (and ws (not (= ws 0.0))) (format-interval ws nil))))
finally (when footer
(report-pgstate-stats pgstate footer))))

View File

@ -14,6 +14,8 @@
(rows 0 :type fixnum) ; how many rows did we write
(errs 0 :type fixnum) ; how many errors did we see
(secs 0.0 :type float) ; how many seconds did it take
(rs 0.0 :type float) ; seconds spent reading
(ws 0.0 :type float) ; seconds spent writing
reject-data reject-logs) ; files where to find reject data
(defstruct pgstate
@ -22,7 +24,9 @@
(read 0 :type fixnum)
(rows 0 :type fixnum)
(errs 0 :type fixnum)
(secs 0.0 :type float))
(secs 0.0 :type float)
(rs 0.0 :type float)
(ws 0.0 :type float))
(defun format-table-name (table-name)
"TABLE-NAME might be a CONS of a schema name and a table name."
@ -89,7 +93,7 @@
pgtable)))
(defun pgstate-setf (pgstate name &key read rows errs secs)
(defun pgstate-setf (pgstate name &key read rows errs secs rs ws)
(let ((pgtable (pgstate-get-label pgstate name)))
(when read
(setf (pgtable-read pgtable) read)
@ -103,9 +107,15 @@
(when secs
(setf (pgtable-secs pgtable) secs)
(incf (pgstate-secs pgstate) secs))
(when rs
(setf (pgtable-rs pgtable) rs)
(incf (pgstate-rs pgstate) rs))
(when ws
(setf (pgtable-ws pgtable) ws)
(incf (pgstate-ws pgstate) ws))
pgtable))
(defun pgstate-incf (pgstate name &key read rows errs secs)
(defun pgstate-incf (pgstate name &key read rows errs secs rs ws)
(let ((pgtable (pgstate-get-label pgstate name)))
(when read
(incf (pgtable-read pgtable) read)
@ -119,9 +129,15 @@
(when secs
(incf (pgtable-secs pgtable) secs)
(incf (pgstate-secs pgstate) secs))
(when rs
(incf (pgtable-rs pgtable) rs)
(incf (pgstate-rs pgstate) rs))
(when ws
(incf (pgtable-ws pgtable) ws)
(incf (pgstate-ws pgstate) ws))
pgtable))
(defun pgstate-decf (pgstate name &key read rows errs secs)
(defun pgstate-decf (pgstate name &key read rows errs secs rs ws)
(let ((pgtable (pgstate-get-label pgstate name)))
(when read
(decf (pgtable-read pgtable) read)
@ -132,6 +148,12 @@
(when errs
(decf (pgtable-errs pgtable) errs)
(decf (pgstate-errs pgstate) errs))
(when rs
(decf (pgtable-rs pgtable) rs)
(decf (pgstate-rs pgstate) rs))
(when ws
(decf (pgtable-ws pgtable) ws)
(decf (pgstate-ws pgstate) ws))
(when secs
(decf (pgtable-secs pgtable) secs)
(decf (pgstate-secs pgstate) secs))