mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-08 15:27:00 +02:00
201 lines
6.4 KiB
Common Lisp
201 lines
6.4 KiB
Common Lisp
;;;
|
|
;;; Tools to handle PostgreSQL data format
|
|
;;;
|
|
(in-package :pgloader.pgsql)
|
|
|
|
;;;
|
|
;;; Quick utilities to get rid of later.
|
|
;;;
|
|
(defparameter *pgconn*
|
|
'("gdb" "none" "localhost" :port 5432)
|
|
"Connection string to the local database")
|
|
|
|
(defun get-connection-string (dbname)
|
|
(cons dbname *pgconn*))
|
|
|
|
;;;
|
|
;;; PostgreSQL Tools connecting to a database
|
|
;;;
|
|
(defun truncate-table (dbname table-name)
|
|
"Truncate given TABLE-NAME in database DBNAME"
|
|
(pomo:with-connection (get-connection-string dbname)
|
|
(pomo:execute (format nil "truncate ~a;" table-name))))
|
|
|
|
(defun list-databases (&optional (username "postgres"))
|
|
"Connect to a local database and get the database list"
|
|
(pomo:with-connection
|
|
(list "postgres" username "none" "localhost" :port 5432)
|
|
(loop for (dbname) in (pomo:query
|
|
"select datname
|
|
from pg_database
|
|
where datname !~ 'postgres|template'")
|
|
collect dbname)))
|
|
|
|
(defun list-tables (dbname)
|
|
"Return an alist of tables names and list of columns to pay attention to."
|
|
(pomo:with-connection
|
|
(get-connection-string dbname)
|
|
|
|
(loop for (relname colarray) in (pomo:query "
|
|
select relname, array_agg(case when typname in ('date', 'timestamptz')
|
|
then attnum end
|
|
order by attnum)
|
|
from pg_class c
|
|
join pg_namespace n on n.oid = c.relnamespace
|
|
left join pg_attribute a on c.oid = a.attrelid
|
|
join pg_type t on t.oid = a.atttypid
|
|
where c.relkind = 'r'
|
|
and attnum > 0
|
|
and n.nspname = 'public'
|
|
group by relname
|
|
")
|
|
collect (cons relname (loop
|
|
for attnum across colarray
|
|
unless (eq attnum :NULL)
|
|
collect attnum)))))
|
|
|
|
(defun get-date-columns (table-name pgsql-table-list)
|
|
"Given a PGSQL-TABLE-LIST as per function list-tables, return a list of
|
|
row numbers containing dates (those have to be reformated)"
|
|
(cdr (assoc table-name pgsql-table-list)))
|
|
|
|
;;;
|
|
;;; PostgreSQL formating tools
|
|
;;;
|
|
(defun fix-zero-date (datestr)
|
|
(cond
|
|
((null datestr) nil)
|
|
((string= datestr "") nil)
|
|
((string= datestr "0000-00-00") nil)
|
|
((string= datestr "0000-00-00 00:00:00") nil)
|
|
(t datestr)))
|
|
|
|
(defun reformat-null-value (value)
|
|
"cl-mysql returns nil for NULL and cl-postgres wants :NULL"
|
|
(if (null value) :NULL value))
|
|
|
|
(defun reformat-row (row &key date-columns)
|
|
"Reformat row as given by MySQL in a format compatible with cl-postgres"
|
|
(loop
|
|
for i from 1
|
|
for col in row
|
|
for no-zero-date-col = (if (member i date-columns)
|
|
(fix-zero-date col)
|
|
col)
|
|
collect (reformat-null-value no-zero-date-col)))
|
|
|
|
;;;
|
|
;;; Format row to PostgreSQL COPY format, the TEXT variant.
|
|
;;;
|
|
(defun format-row (stream row &key date-columns)
|
|
"Add a ROW in the STREAM, formating ROW in PostgreSQL COPY TEXT format.
|
|
|
|
See http://www.postgresql.org/docs/9.2/static/sql-copy.html#AEN66609 for
|
|
details about the format, and format specs."
|
|
(let* (*print-circle* *print-pretty*)
|
|
(loop
|
|
for i from 1
|
|
for (col . more?) on row
|
|
for preprocessed-col = (if (member i date-columns)
|
|
(fix-zero-date col)
|
|
col)
|
|
do (if (null preprocessed-col)
|
|
(format stream "~a~:[~;~c~]" "\\N" more? #\Tab)
|
|
(progn
|
|
;; From PostgreSQL docs:
|
|
;;
|
|
;; In particular, the following characters must be preceded
|
|
;; by a backslash if they appear as part of a column value:
|
|
;; backslash itself, newline, carriage return, and the
|
|
;; current delimiter character.
|
|
(loop
|
|
for char across preprocessed-col
|
|
do (case char
|
|
(#\\ (format stream "\\\\")) ; 2 chars here
|
|
(#\Space (princ #\Space stream))
|
|
(#\Newline (format stream "\\n")) ; 2 chars here
|
|
(#\Return (format stream "\\r")) ; 2 chars here
|
|
(#\Tab (format stream "\\t")) ; 2 chars here
|
|
(#\Backspace (format stream "\\b")) ; 2 chars here
|
|
(#\Page (format stream "\\f")) ; 2 chars here
|
|
(t (format stream "~c" char))))
|
|
(format stream "~:[~;~c~]" more? #\Tab))))
|
|
(format stream "~%")))
|
|
|
|
;;;
|
|
;;; Read a file format in PostgreSQL COPY TEXT format, and call given
|
|
;;; function on each line.
|
|
;;;
|
|
(defun map-rows (dbname table-name filename process-row-fn)
|
|
"Load data from a text file in PostgreSQL COPY TEXT format.
|
|
|
|
Each row is pre-processed then PROCESS-ROW-FN is called with the row as a
|
|
list as its only parameter.
|
|
|
|
Finally returns how many rows where read and processed."
|
|
(with-open-file
|
|
;; we just ignore files that don't exist
|
|
(input filename
|
|
:direction :input
|
|
:if-does-not-exist nil)
|
|
(when input
|
|
;; read in the text file, split it into columns, process NULL columns
|
|
;; the way postmodern expects them, and call PROCESS-ROW-FN on them
|
|
(loop
|
|
for line = (read-line input nil)
|
|
for row = (mapcar (lambda (x)
|
|
;; we want Postmodern compliant NULLs
|
|
(if (string= "\\N" x) :null x))
|
|
;; splitting is easy, it's always on #\Tab
|
|
;; see format-row-for-copy for details
|
|
(sq:split-sequence #\Tab line))
|
|
while line
|
|
counting line into count
|
|
do (funcall process-row-fn row)
|
|
finally (return count)))))
|
|
|
|
;;;
|
|
;;; Read a file in PostgreSQL COPY TEXT format and load it into a PostgreSQL
|
|
;;; table using the COPY protocol.
|
|
;;;
|
|
(defun copy-from-file (dbname table-name filename
|
|
&key
|
|
(truncate t))
|
|
"Load data from clean COPY TEXT file to PostgreSQL"
|
|
(when truncate (truncate-table dbname table-name))
|
|
|
|
(let* ((conspec (remove :port (get-connection-string dbname)))
|
|
(stream
|
|
(cl-postgres:open-db-writer conspec table-name nil)))
|
|
|
|
(unwind-protect
|
|
;; read csv in the file and push it directly through the db writer
|
|
;; in COPY streaming mode
|
|
(map-rows dbname table-name filename
|
|
(lambda (row)
|
|
(cl-postgres:db-write-row stream row)))
|
|
(cl-postgres:close-db-writer stream))))
|
|
|
|
;;;
|
|
;;; Pop data from a lparallel.queue queue instance, reformat it assuming
|
|
;;; data in there are from cl-mysql, and copy it to a PostgreSQL table.
|
|
;;;
|
|
(defun copy-from-queue (dbname table-name dataq
|
|
&key
|
|
(truncate t)
|
|
date-columns)
|
|
"Fetch data from the QUEUE until we see :end-of-data"
|
|
(when truncate (truncate-table dbname table-name))
|
|
|
|
(let* ((conspec (remove :port (get-connection-string dbname)))
|
|
(stream
|
|
(cl-postgres:open-db-writer conspec table-name nil)))
|
|
(unwind-protect
|
|
(pgloader.queue:map-pop-queue
|
|
dataq (lambda (row)
|
|
(cl-postgres:db-write-row
|
|
stream
|
|
(reformat-row row :date-columns date-columns))))
|
|
(cl-postgres:close-db-writer stream))))
|
|
|