pgloader/utils.lisp
2013-10-03 17:42:09 +02:00

315 lines
10 KiB
Common Lisp

;;;
;;; Random utilities
;;;
(in-package :pgloader.utils)
;;;
;;; Logs
;;;
;;; First define the log categories
(defcategory :critical)
(defcategory :error (or :error :critical))
(defcategory :warning (or :warning :error))
(defcategory :notice (or :notice :warning))
(defcategory :info (or :info :notice))
(defcategory :debug (or :debug :info))
;; Now define the Logger
(setf (log-manager)
(make-instance 'log-manager :message-class 'formatted-message))
;; Start a messenger to store our message into
(defun start-logger (&key (log-filename *log-filename*))
"Start the parch log manager and messenger."
(let ((log-pathname (typecase log-filename
(string (pathname log-filename))
(t log-filename))))
(ensure-directories-exist (directory-namestring log-filename))
(cl-log:start-messenger 'text-file-messenger
:filter *log-min-messages*
:filename log-filename)
(cl-log:start-messenger 'text-stream-messenger
:filter *client-min-messages*
:stream *standard-output*)
(format t "~&Now logging in '~a'.~%" log-pathname)
(log-message :notice "Starting pgloader, log system is ready.")))
;; monkey patch the print-object method for cl-log timestamp
(defconstant +nsec+ (* 1000 1000 1000)
"How many nanoseconds do you find in a second")
(defun fraction-to-nsecs (fraction)
"FRACTION is a number of internal-time-units-per-second, return nsecs"
(declare (inline fraction-to-nsecs) (fixnum fraction))
(floor (/ (* fraction +nsec+) internal-time-units-per-second)))
(defmethod print-object ((self cl-log:timestamp) stream)
"we want to print human readable timestamps"
(let ((log-local-time
(local-time:universal-to-timestamp
(cl-log:timestamp-universal-time self)
:nsec (fraction-to-nsecs (cl-log:timestamp-fraction self)))))
(local-time:format-timestring stream log-local-time)))
;;;
;;; Timing Macro
;;;
(defun elapsed-time-since (start)
"Return how many seconds ticked between START and now"
(/ (- (get-internal-real-time) start)
internal-time-units-per-second))
(defmacro timing (&body forms)
"return both how much real time was spend in body and its result"
(let ((start (gensym))
(end (gensym))
(result (gensym)))
`(let* ((,start (get-internal-real-time))
(,result (progn ,@forms))
(,end (get-internal-real-time)))
(values ,result (/ (- ,end ,start) internal-time-units-per-second)))))
;;;
;;; Timing Formating
;;;
(defun format-interval (seconds &optional (stream t))
"Output the number of seconds in a human friendly way"
(multiple-value-bind (years months days hours mins secs millisecs)
(date:decode-interval (date:encode-interval :second seconds))
(format
stream
"~:[~*~;~d years ~]~:[~*~;~d months ~]~:[~*~;~d days ~]~:[~*~;~dh~]~:[~*~;~dm~]~d.~ds"
(< 0 years) years
(< 0 months) months
(< 0 days) days
(< 0 hours) hours
(< 0 mins) mins
secs (truncate millisecs))))
;;;
;;; Data Structures to maintain information about loading state
;;;
(defstruct pgtable
name
(read 0 :type fixnum) ; how many rows did we read
(rows 0 :type fixnum) ; how many rows did we write
(errs 0 :type fixnum) ; how many errors did we see
(secs 0.0 :type float) ; how many seconds did it take
reject-data reject-logs) ; files where to find reject data
(defstruct pgstate
(tables (make-hash-table :test 'equal))
(read 0 :type fixnum)
(rows 0 :type fixnum)
(errs 0 :type fixnum)
(secs 0.0 :type float))
(defun pgstate-get-table (pgstate name)
(gethash name (pgstate-tables pgstate)))
(defun pgstate-add-table (pgstate dbname table-name)
"Instanciate a new pgtable structure to hold our stats, and return it."
(or (pgstate-get-table pgstate table-name)
(let* ((table (setf (gethash table-name (pgstate-tables pgstate))
(make-pgtable :name table-name)))
(reject-dir (pathname-directory
(merge-pathnames
(format nil "~a/" dbname) *reject-path-root*)))
(data-pathname
(make-pathname :directory reject-dir :name table-name :type "dat"))
(logs-pathname
(make-pathname :directory reject-dir :name table-name :type "log")))
;; create the per-database directory if it does not exists yet
(ensure-directories-exist (directory-namestring data-pathname))
;; rename the existing files if there are some
(with-open-file (data data-pathname
:direction :output
:if-exists :rename
:if-does-not-exist nil))
(with-open-file (logs logs-pathname
:direction :output
:if-exists :rename
:if-does-not-exist nil))
;; set the properties to the right pathnames
(setf (pgtable-reject-data table) data-pathname
(pgtable-reject-logs table) logs-pathname)
table)))
(defun pgstate-setf (pgstate name &key read rows errs secs)
(let ((pgtable (pgstate-get-table pgstate name)))
(when read
(setf (pgtable-read pgtable) read)
(incf (pgstate-read pgstate) read))
(when rows
(setf (pgtable-rows pgtable) rows)
(incf (pgstate-rows pgstate) rows))
(when errs
(setf (pgtable-errs pgtable) errs)
(incf (pgstate-errs pgstate) errs))
(when secs
(setf (pgtable-secs pgtable) secs)
(incf (pgstate-secs pgstate) secs))
pgtable))
(defun pgstate-incf (pgstate name &key read rows errs secs)
(let ((pgtable (pgstate-get-table pgstate name)))
(when read
(incf (pgtable-read pgtable) read)
(incf (pgstate-read pgstate) read))
(when rows
(incf (pgtable-rows pgtable) rows)
(incf (pgstate-rows pgstate) rows))
(when errs
(incf (pgtable-errs pgtable) errs)
(incf (pgstate-errs pgstate) errs))
(when secs
(incf (pgtable-secs pgtable) secs)
(incf (pgstate-secs pgstate) secs))
pgtable))
(defun pgstate-decf (pgstate name &key read rows errs secs)
(let ((pgtable (pgstate-get-table pgstate name)))
(when read
(decf (pgtable-read pgtable) read)
(decf (pgstate-read pgstate) read))
(when rows
(decf (pgtable-rows pgtable) rows)
(decf (pgstate-rows pgstate) rows))
(when errs
(decf (pgtable-errs pgtable) errs)
(decf (pgstate-errs pgstate) errs))
(when secs
(decf (pgtable-secs pgtable) secs)
(decf (pgstate-secs pgstate) secs))
pgtable))
;;;
;;; Pretty print a report while doing bulk operations
;;;
(defvar *header-line*
"~&------------------------------ --------- --------- --------- ---------")
(defvar *header-tname-format* "~&~30@a")
(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~9@a")
(defvar *header-cols-format* (concatenate 'string *header-tname-format*
*header-stats-format*))
(defvar *header-cols-names* '("table name" "read" "imported" "errors" "time"))
(defun report-header ()
(apply #'format t *header-cols-format* *header-cols-names*)
(format t *header-line*))
(defun report-table-name (table-name)
(format t *header-tname-format* table-name))
(defun report-results (read rows errors seconds)
(format t *header-stats-format* read rows errors (format-interval seconds nil)))
(defun report-footer (legend read rows errors seconds)
(format t *header-line*)
(apply #'format t *header-cols-format*
(list legend read rows errors (format-interval seconds nil))))
;;;
;;; Pretty print a report from a pgtable and pgstats counters
;;;
(defun report-pgtable-stats (pgstate name)
(with-slots (read rows errs secs) (pgstate-get-table pgstate name)
(report-results read rows errs secs)))
(defun report-pgstate-stats (pgstate legend)
(with-slots (read rows errs secs) pgstate
(report-footer legend read rows errs secs)))
;;;
;;; Pretty print the whole summary from a state
;;;
(defun report-summary (&key ((:state pgstate) *state*) (header t) footer)
"Report a whole summary."
(when header (report-header))
(loop
for table-name being the hash-keys in (pgstate-tables pgstate)
using (hash-value pgtable)
do
(with-slots (read rows errs secs) pgtable
(format t *header-cols-format* table-name read rows errs secs))
finally (when footer
(report-pgstate-stats pgstate footer))))
(defmacro with-stats-collection ((dbname table-name
&key
summary
use-result-as-rows
((:state pgstate) *state*))
&body forms)
"Measure time spent in running BODY into STATE, accounting the seconds to
given DBNAME and TABLE-NAME"
(let ((result (gensym "result"))
(secs (gensym "secs")))
`(prog2
(pgstate-add-table ,pgstate ,dbname ,table-name)
(multiple-value-bind (,result ,secs)
(timing ,@forms)
(if ,use-result-as-rows
(pgstate-incf ,pgstate ,table-name :rows ,result :secs ,secs)
(pgstate-incf ,pgstate ,table-name :secs ,secs))
,result)
(when ,summary (report-summary)))))
;;;
;;; File utils
;;;
(defun slurp-file-into-string (filename)
"Return given filename's whole content as a string."
(with-open-file (stream filename
:direction :input
:external-format :utf-8)
(let ((seq (make-array (file-length stream)
:element-type 'character
:fill-pointer t)))
;; apparently the fastest way at that is read-sequence
;; http://www.ymeme.com/slurping-a-file-common-lisp-83.html
(setf (fill-pointer seq) (read-sequence seq stream))
seq)))
;;;
;;; Camel Case converter
;;;
(defun camelCase-to-colname (string)
"Transform input STRING into a suitable column name.
lahmanID lahman_id
playerID player_id
birthYear birth_year"
(coerce
(loop
for first = t then nil
for char across string
for previous-upper-p = nil then char-upper-p
for char-upper-p = (eq char (char-upcase char))
for new-word = (and (not first) char-upper-p (not previous-upper-p))
when (and new-word (not (char= char #\_))) collect #\_
collect (char-downcase char))
'string))
;;;
;;; lparallel
;;;
(defun make-kernel (worker-count
&key (bindings
`((*pgconn-host* . ,*pgconn-host*)
(*pgconn-port* . ,*pgconn-port*)
(*pgconn-user* . ,*pgconn-user*)
(*pgconn-pass* . ,*pgconn-pass*)
(*pg-settings* . ',*pg-settings*)
(*myconn-host* . ,*myconn-host*)
(*myconn-port* . ,*myconn-port*)
(*myconn-user* . ,*myconn-user*)
(*myconn-pass* . ,*myconn-pass*)
(*state* . ,*state*))))
"Wrapper around lparallel:make-kernel that sets our usual bindings."
(lp:make-kernel worker-count :bindings bindings))