pgloader/db3.lisp
2013-09-24 11:05:57 +02:00

159 lines
5.3 KiB
Common Lisp
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;;
;;; Tools to handle the DBF file format
;;;
(in-package :pgloader.db3)
(defvar *db3-pgsql-type-mapping*
'(("C" . "text") ; ignore field-length
("N" . "numeric") ; handle both integers and floats
("L" . "boolean") ; PostgreSQL compatible representation
("D" . "date") ; no TimeZone in DB3 files
("M" . "text"))) ; not handled yet
(defun convert-db3-type-to-pgsql (type length)
"Convert a DB3 field type into a PostgreSQL data type."
;; we just ignore the length as we use text here
(declare (ignore length))
(cdr (assoc type *db3-pgsql-type-mapping* :test #'string=)))
(defun db3-create-table (input
&optional (table-name (pathname-name input)))
"Return a CREATE TABLE suitable for PostgreSQL from reading the given db3
file headers"
(with-open-file (stream input
:direction :input
:element-type '(unsigned-byte 8))
(with-output-to-string (s)
(let ((db3 (make-instance 'db3:db3)))
(db3:load-header db3 stream)
(format s "create table if not exists ~a (~%" table-name)
(loop
for (field . more?) on (db3::fields db3)
for (name type) =
(list (db3::field-name field)
(convert-db3-type-to-pgsql (db3::field-type field)
(db3::field-length field)))
do (format s "~4T~a ~25T~a~:[~;,~]~%" name type more?))
(format s ");")))))
(defun logical-to-boolean (value)
"Convert a DB3 logical value to a PostgreSQL boolean."
(declare (inline))
(if (string= value "?") nil value))
(defun db3-trim-string (value)
"DB3 Strings a right padded with spaces, fix that."
(declare (inline))
(string-right-trim '(#\Space) value))
(defun db3-date-to-pgsql-date (value)
"Convert a DB3 date to a PostgreSQL date."
(declare (inline))
(let ((year (subseq value 0 4))
(month (subseq value 4 6))
(day (subseq value 6 8)))
(format nil "~a-~a-~a" year month day)))
(defun transforms (input)
"Return the list of transforms to apply to each row of data in order to
convert values to PostgreSQL format"
(with-open-file (stream input
:direction :input
:element-type '(unsigned-byte 8))
(let ((db3 (make-instance 'db3:db3)))
(db3:load-header db3 stream)
(loop
for field in (db3::fields db3)
for type = (db3::field-type field)
collect
(cond ((string= type "L") #'logical-to-boolean)
((string= type "C") #'db3-trim-string)
((string= type "D") #'db3-date-to-pgsql-date)
(t nil))))))
;;;
;;; Integration with pgloader
;;;
(defun map-rows (filename &key process-row-fn)
"Extract DB3 data and call PROCESS-ROW-FN function with a single
argument (a list of column values) for each row."
(with-open-file (stream filename
:direction :input
:element-type '(unsigned-byte 8))
(let ((db3 (make-instance 'db3:db3)))
(db3:load-header db3 stream)
(loop
with count = (db3:record-count db3)
repeat count
for row-array = (db3:load-record db3 stream)
do (funcall process-row-fn (coerce row-array 'list))
finally (return count)))))
(defun copy-to (db3-filename pgsql-copy-filename)
"Extract data from DB3 file into a PotgreSQL COPY TEXT formated file"
(with-open-file (text-file pgsql-copy-filename
:direction :output
:if-exists :supersede
:external-format :utf-8)
(let ((transforms (transforms db3-filename)))
(map-rows db3-filename
:process-row-fn
(lambda (row)
(pgloader.pgsql:format-row text-file
row
:transforms transforms))))))
;;;
;;; Export MySQL data to our lparallel data queue. All the work is done in
;;; other basic layers, simple enough function.
;;;
(defun copy-to-queue (filename dataq table-name)
"Copy data from DB3 file FILENAME into queue DATAQ"
(let ((read (pgloader.queue:map-push-queue dataq #'map-rows filename)))
(pgstate-incf *state* table-name :read read)))
(defun stream-file (filename
&key
dbname
(table-name (pathname-name filename))
create-table
truncate)
"Open the DB3 and stream its content to a PostgreSQL database."
(with-pgsql-transaction (dbname)
(when create-table
(let ((create-table-sql (db3-create-table filename)))
(log-message :notice "Create table \"~a\"" table-name)
(log-message :info "~a" create-table-sql)
(pgsql-execute create-table-sql)))
(when (and truncate (not create-table))
;; we don't TRUNCATE a table we just CREATEd
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
(log-message :notice "~a" truncate-sql)
(pgsql-execute truncate-sql))))
(let* ((*state* (make-pgstate))
(lp:*kernel* (make-kernel 2))
(channel (lp:make-channel))
(dataq (lq:make-queue :fixed-capacity 4096)))
(with-stats-collection (dbname table-name :state *state* :summary t)
(log-message :notice "COPY \"~a\" from '~a'" table-name filename)
(lp:submit-task channel #'copy-to-queue filename dataq table-name)
;; and start another task to push that data from the queue to PostgreSQL
(lp:submit-task channel
#'pgloader.pgsql:copy-from-queue
dbname table-name dataq
:truncate truncate
:transforms (transforms filename))
;; now wait until both the tasks are over, and kill the kernel
(loop for tasks below 2 do (lp:receive-result channel)
finally
(log-message :info "COPY \"~a\" done." table-name)
(lp:end-kernel)))))