mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
315 lines
10 KiB
Common Lisp
315 lines
10 KiB
Common Lisp
;;;
|
|
;;; Random utilities
|
|
;;;
|
|
(in-package :pgloader.utils)
|
|
|
|
;;;
|
|
;;; Logs
|
|
;;;
|
|
;;; First define the log categories
|
|
(defcategory :critical)
|
|
(defcategory :error (or :error :critical))
|
|
(defcategory :warning (or :warning :error))
|
|
(defcategory :notice (or :notice :warning))
|
|
(defcategory :info (or :info :notice))
|
|
(defcategory :debug (or :debug :info))
|
|
|
|
;; Now define the Logger
|
|
(setf (log-manager)
|
|
(make-instance 'log-manager :message-class 'formatted-message))
|
|
|
|
;; Start a messenger to store our message into
|
|
(defun start-logger (&key (log-filename *log-filename*))
|
|
"Start the parch log manager and messenger."
|
|
(let ((log-pathname (typecase log-filename
|
|
(string (pathname log-filename))
|
|
(t log-filename))))
|
|
(ensure-directories-exist (directory-namestring log-filename))
|
|
(cl-log:start-messenger 'text-file-messenger
|
|
:filter *log-min-messages*
|
|
:filename log-filename)
|
|
(cl-log:start-messenger 'text-stream-messenger
|
|
:filter *client-min-messages*
|
|
:stream *standard-output*)
|
|
(format t "~&Now logging in '~a'.~%" log-pathname)
|
|
(log-message :notice "Starting pgloader, log system is ready.")))
|
|
|
|
;; monkey patch the print-object method for cl-log timestamp
|
|
(defconstant +nsec+ (* 1000 1000 1000)
|
|
"How many nanoseconds do you find in a second")
|
|
|
|
(defun fraction-to-nsecs (fraction)
|
|
"FRACTION is a number of internal-time-units-per-second, return nsecs"
|
|
(declare (inline fraction-to-nsecs) (fixnum fraction))
|
|
(floor (/ (* fraction +nsec+) internal-time-units-per-second)))
|
|
|
|
(defmethod print-object ((self cl-log:timestamp) stream)
|
|
"we want to print human readable timestamps"
|
|
(let ((log-local-time
|
|
(local-time:universal-to-timestamp
|
|
(cl-log:timestamp-universal-time self)
|
|
:nsec (fraction-to-nsecs (cl-log:timestamp-fraction self)))))
|
|
(local-time:format-timestring stream log-local-time)))
|
|
|
|
;;;
|
|
;;; Timing Macro
|
|
;;;
|
|
(defun elapsed-time-since (start)
|
|
"Return how many seconds ticked between START and now"
|
|
(/ (- (get-internal-real-time) start)
|
|
internal-time-units-per-second))
|
|
|
|
(defmacro timing (&body forms)
|
|
"return both how much real time was spend in body and its result"
|
|
(let ((start (gensym))
|
|
(end (gensym))
|
|
(result (gensym)))
|
|
`(let* ((,start (get-internal-real-time))
|
|
(,result (progn ,@forms))
|
|
(,end (get-internal-real-time)))
|
|
(values ,result (/ (- ,end ,start) internal-time-units-per-second)))))
|
|
|
|
;;;
|
|
;;; Timing Formating
|
|
;;;
|
|
(defun format-interval (seconds &optional (stream t))
|
|
"Output the number of seconds in a human friendly way"
|
|
(multiple-value-bind (years months days hours mins secs millisecs)
|
|
(date:decode-interval (date:encode-interval :second seconds))
|
|
(format
|
|
stream
|
|
"~:[~*~;~d years ~]~:[~*~;~d months ~]~:[~*~;~d days ~]~:[~*~;~dh~]~:[~*~;~dm~]~d.~ds"
|
|
(< 0 years) years
|
|
(< 0 months) months
|
|
(< 0 days) days
|
|
(< 0 hours) hours
|
|
(< 0 mins) mins
|
|
secs (truncate millisecs))))
|
|
|
|
;;;
|
|
;;; Data Structures to maintain information about loading state
|
|
;;;
|
|
(defstruct pgtable
|
|
name
|
|
(read 0 :type fixnum) ; how many rows did we read
|
|
(rows 0 :type fixnum) ; how many rows did we write
|
|
(errs 0 :type fixnum) ; how many errors did we see
|
|
(secs 0.0 :type float) ; how many seconds did it take
|
|
reject-data reject-logs) ; files where to find reject data
|
|
|
|
(defstruct pgstate
|
|
(tables (make-hash-table :test 'equal))
|
|
(read 0 :type fixnum)
|
|
(rows 0 :type fixnum)
|
|
(errs 0 :type fixnum)
|
|
(secs 0.0 :type float))
|
|
|
|
(defun pgstate-get-table (pgstate name)
|
|
(gethash name (pgstate-tables pgstate)))
|
|
|
|
(defun pgstate-add-table (pgstate dbname table-name)
|
|
"Instanciate a new pgtable structure to hold our stats, and return it."
|
|
(or (pgstate-get-table pgstate table-name)
|
|
(let* ((table (setf (gethash table-name (pgstate-tables pgstate))
|
|
(make-pgtable :name table-name)))
|
|
(reject-dir (pathname-directory
|
|
(merge-pathnames
|
|
(format nil "~a/" dbname) *reject-path-root*)))
|
|
(data-pathname
|
|
(make-pathname :directory reject-dir :name table-name :type "dat"))
|
|
(logs-pathname
|
|
(make-pathname :directory reject-dir :name table-name :type "log")))
|
|
|
|
;; create the per-database directory if it does not exists yet
|
|
(ensure-directories-exist (directory-namestring data-pathname))
|
|
|
|
;; rename the existing files if there are some
|
|
(with-open-file (data data-pathname
|
|
:direction :output
|
|
:if-exists :rename
|
|
:if-does-not-exist nil))
|
|
|
|
(with-open-file (logs logs-pathname
|
|
:direction :output
|
|
:if-exists :rename
|
|
:if-does-not-exist nil))
|
|
|
|
;; set the properties to the right pathnames
|
|
(setf (pgtable-reject-data table) data-pathname
|
|
(pgtable-reject-logs table) logs-pathname)
|
|
table)))
|
|
|
|
(defun pgstate-setf (pgstate name &key read rows errs secs)
|
|
(let ((pgtable (pgstate-get-table pgstate name)))
|
|
(when read
|
|
(setf (pgtable-read pgtable) read)
|
|
(incf (pgstate-read pgstate) read))
|
|
(when rows
|
|
(setf (pgtable-rows pgtable) rows)
|
|
(incf (pgstate-rows pgstate) rows))
|
|
(when errs
|
|
(setf (pgtable-errs pgtable) errs)
|
|
(incf (pgstate-errs pgstate) errs))
|
|
(when secs
|
|
(setf (pgtable-secs pgtable) secs)
|
|
(incf (pgstate-secs pgstate) secs))
|
|
pgtable))
|
|
|
|
(defun pgstate-incf (pgstate name &key read rows errs secs)
|
|
(let ((pgtable (pgstate-get-table pgstate name)))
|
|
(when read
|
|
(incf (pgtable-read pgtable) read)
|
|
(incf (pgstate-read pgstate) read))
|
|
(when rows
|
|
(incf (pgtable-rows pgtable) rows)
|
|
(incf (pgstate-rows pgstate) rows))
|
|
(when errs
|
|
(incf (pgtable-errs pgtable) errs)
|
|
(incf (pgstate-errs pgstate) errs))
|
|
(when secs
|
|
(incf (pgtable-secs pgtable) secs)
|
|
(incf (pgstate-secs pgstate) secs))
|
|
pgtable))
|
|
|
|
(defun pgstate-decf (pgstate name &key read rows errs secs)
|
|
(let ((pgtable (pgstate-get-table pgstate name)))
|
|
(when read
|
|
(decf (pgtable-read pgtable) read)
|
|
(decf (pgstate-read pgstate) read))
|
|
(when rows
|
|
(decf (pgtable-rows pgtable) rows)
|
|
(decf (pgstate-rows pgstate) rows))
|
|
(when errs
|
|
(decf (pgtable-errs pgtable) errs)
|
|
(decf (pgstate-errs pgstate) errs))
|
|
(when secs
|
|
(decf (pgtable-secs pgtable) secs)
|
|
(decf (pgstate-secs pgstate) secs))
|
|
pgtable))
|
|
|
|
;;;
|
|
;;; Pretty print a report while doing bulk operations
|
|
;;;
|
|
(defvar *header-line*
|
|
"~&------------------------------ --------- --------- --------- ---------")
|
|
|
|
(defvar *header-tname-format* "~&~30@a")
|
|
(defvar *header-stats-format* " ~9@a ~9@a ~9@a ~9@a")
|
|
(defvar *header-cols-format* (concatenate 'string *header-tname-format*
|
|
*header-stats-format*))
|
|
(defvar *header-cols-names* '("table name" "read" "imported" "errors" "time"))
|
|
|
|
(defun report-header ()
|
|
(apply #'format t *header-cols-format* *header-cols-names*)
|
|
(format t *header-line*))
|
|
|
|
(defun report-table-name (table-name)
|
|
(format t *header-tname-format* table-name))
|
|
|
|
(defun report-results (read rows errors seconds)
|
|
(format t *header-stats-format* read rows errors (format-interval seconds nil)))
|
|
|
|
(defun report-footer (legend read rows errors seconds)
|
|
(format t *header-line*)
|
|
(apply #'format t *header-cols-format*
|
|
(list legend read rows errors (format-interval seconds nil))))
|
|
|
|
;;;
|
|
;;; Pretty print a report from a pgtable and pgstats counters
|
|
;;;
|
|
(defun report-pgtable-stats (pgstate name)
|
|
(with-slots (read rows errs secs) (pgstate-get-table pgstate name)
|
|
(report-results read rows errs secs)))
|
|
|
|
(defun report-pgstate-stats (pgstate legend)
|
|
(with-slots (read rows errs secs) pgstate
|
|
(report-footer legend read rows errs secs)))
|
|
|
|
;;;
|
|
;;; Pretty print the whole summary from a state
|
|
;;;
|
|
(defun report-summary (&key ((:state pgstate) *state*) (header t) footer)
|
|
"Report a whole summary."
|
|
(when header (report-header))
|
|
(loop
|
|
for table-name being the hash-keys in (pgstate-tables pgstate)
|
|
using (hash-value pgtable)
|
|
do
|
|
(with-slots (read rows errs secs) pgtable
|
|
(format t *header-cols-format* table-name read rows errs secs))
|
|
finally (when footer
|
|
(report-pgstate-stats pgstate footer))))
|
|
|
|
(defmacro with-stats-collection ((dbname table-name
|
|
&key
|
|
summary
|
|
use-result-as-rows
|
|
((:state pgstate) *state*))
|
|
&body forms)
|
|
"Measure time spent in running BODY into STATE, accounting the seconds to
|
|
given DBNAME and TABLE-NAME"
|
|
(let ((result (gensym "result"))
|
|
(secs (gensym "secs")))
|
|
`(prog2
|
|
(pgstate-add-table ,pgstate ,dbname ,table-name)
|
|
(multiple-value-bind (,result ,secs)
|
|
(timing ,@forms)
|
|
(if ,use-result-as-rows
|
|
(pgstate-incf ,pgstate ,table-name :rows ,result :secs ,secs)
|
|
(pgstate-incf ,pgstate ,table-name :secs ,secs))
|
|
,result)
|
|
(when ,summary (report-summary)))))
|
|
|
|
;;;
|
|
;;; File utils
|
|
;;;
|
|
(defun slurp-file-into-string (filename)
|
|
"Return given filename's whole content as a string."
|
|
(with-open-file (stream filename
|
|
:direction :input
|
|
:external-format :utf-8)
|
|
(let ((seq (make-array (file-length stream)
|
|
:element-type 'character
|
|
:fill-pointer t)))
|
|
;; apparently the fastest way at that is read-sequence
|
|
;; http://www.ymeme.com/slurping-a-file-common-lisp-83.html
|
|
(setf (fill-pointer seq) (read-sequence seq stream))
|
|
seq)))
|
|
|
|
;;;
|
|
;;; Camel Case converter
|
|
;;;
|
|
(defun camelCase-to-colname (string)
|
|
"Transform input STRING into a suitable column name.
|
|
lahmanID lahman_id
|
|
playerID player_id
|
|
birthYear birth_year"
|
|
(coerce
|
|
(loop
|
|
for first = t then nil
|
|
for char across string
|
|
for previous-upper-p = nil then char-upper-p
|
|
for char-upper-p = (eq char (char-upcase char))
|
|
for new-word = (and (not first) char-upper-p (not previous-upper-p))
|
|
when (and new-word (not (char= char #\_))) collect #\_
|
|
collect (char-downcase char))
|
|
'string))
|
|
|
|
;;;
|
|
;;; lparallel
|
|
;;;
|
|
(defun make-kernel (worker-count
|
|
&key (bindings
|
|
`((*pgconn-host* . ,*pgconn-host*)
|
|
(*pgconn-port* . ,*pgconn-port*)
|
|
(*pgconn-user* . ,*pgconn-user*)
|
|
(*pgconn-pass* . ,*pgconn-pass*)
|
|
(*pg-settings* . ',*pg-settings*)
|
|
(*myconn-host* . ,*myconn-host*)
|
|
(*myconn-port* . ,*myconn-port*)
|
|
(*myconn-user* . ,*myconn-user*)
|
|
(*myconn-pass* . ,*myconn-pass*)
|
|
(*state* . ,*state*))))
|
|
"Wrapper around lparallel:make-kernel that sets our usual bindings."
|
|
(lp:make-kernel worker-count :bindings bindings))
|