;;; ;;; Tools to handle the DBF file format ;;; (in-package :pgloader.db3) (defvar *db3-pgsql-type-mapping* '(("C" . "text") ; ignore field-length ("N" . "numeric") ; handle both integers and floats ("L" . "boolean") ; PostgreSQL compatible representation ("D" . "date") ; no TimeZone in DB3 files ("M" . "text"))) ; not handled yet (defun convert-db3-type-to-pgsql (type length) "Convert a DB3 field type into a PostgreSQL data type." ;; we just ignore the length as we use text here (declare (ignore length)) (cdr (assoc type *db3-pgsql-type-mapping* :test #'string=))) (defun db3-create-table (input &optional (table-name (pathname-name input))) "Return a CREATE TABLE suitable for PostgreSQL from reading the given db3 file headers" (with-open-file (stream input :direction :input :element-type '(unsigned-byte 8)) (with-output-to-string (s) (let ((db3 (make-instance 'db3:db3))) (db3:load-header db3 stream) (format s "create table if not exists ~a (~%" table-name) (loop for (field . more?) on (db3::fields db3) for (name type) = (list (db3::field-name field) (convert-db3-type-to-pgsql (db3::field-type field) (db3::field-length field))) do (format s "~4T~a ~25T~a~:[~;,~]~%" name type more?)) (format s ");"))))) (defun logical-to-boolean (value) "Convert a DB3 logical value to a PostgreSQL boolean." (declare (inline)) (if (string= value "?") nil value)) (defun db3-trim-string (value) "DB3 Strings a right padded with spaces, fix that." (declare (inline)) (string-right-trim '(#\Space) value)) (defun db3-date-to-pgsql-date (value) "Convert a DB3 date to a PostgreSQL date." (declare (inline)) (let ((year (subseq value 0 4)) (month (subseq value 4 6)) (day (subseq value 6 8))) (format nil "~a-~a-~a" year month day))) (defun transforms (input) "Return the list of transforms to apply to each row of data in order to convert values to PostgreSQL format" (with-open-file (stream input :direction :input :element-type '(unsigned-byte 8)) (let ((db3 (make-instance 'db3:db3))) (db3:load-header db3 stream) (loop for field in (db3::fields db3) for type = (db3::field-type field) collect (cond ((string= type "L") #'logical-to-boolean) ((string= type "C") #'db3-trim-string) ((string= type "D") #'db3-date-to-pgsql-date) (t nil)))))) ;;; ;;; Integration with pgloader ;;; (defun map-rows (filename &key process-row-fn) "Extract DB3 data and call PROCESS-ROW-FN function with a single argument (a list of column values) for each row." (with-open-file (stream filename :direction :input :element-type '(unsigned-byte 8)) (let ((db3 (make-instance 'db3:db3))) (db3:load-header db3 stream) (loop with count = (db3:record-count db3) repeat count for row-array = (db3:load-record db3 stream) do (funcall process-row-fn (coerce row-array 'list)) finally (return count))))) (defun copy-to (db3-filename pgsql-copy-filename) "Extract data from DB3 file into a PotgreSQL COPY TEXT formated file" (with-open-file (text-file pgsql-copy-filename :direction :output :if-exists :supersede :external-format :utf-8) (let ((transforms (transforms db3-filename))) (map-rows db3-filename :process-row-fn (lambda (row) (pgloader.pgsql:format-row text-file row :transforms transforms)))))) ;;; ;;; Export MySQL data to our lparallel data queue. All the work is done in ;;; other basic layers, simple enough function. ;;; (defun copy-to-queue (filename dataq table-name) "Copy data from DB3 file FILENAME into queue DATAQ" (let ((read (pgloader.queue:map-push-queue dataq #'map-rows filename))) (pgstate-incf *state* table-name :read read))) (defun stream-file (filename &key dbname (table-name (pathname-name filename)) create-table truncate) "Open the DB3 and stream its content to a PostgreSQL database." (with-pgsql-transaction (dbname) (when create-table (let ((create-table-sql (db3-create-table filename))) (log-message :notice "Create table \"~a\"" table-name) (log-message :info "~a" create-table-sql) (pgsql-execute create-table-sql))) (when (and truncate (not create-table)) ;; we don't TRUNCATE a table we just CREATEd (let ((truncate-sql (format nil "TRUNCATE ~a;" table-name))) (log-message :notice "~a" truncate-sql) (pgsql-execute truncate-sql)))) (let* ((*state* (make-pgstate)) (lp:*kernel* (make-kernel 2)) (channel (lp:make-channel)) (dataq (lq:make-queue :fixed-capacity 4096))) (with-stats-collection (dbname table-name :state *state* :summary t) (log-message :notice "COPY \"~a\" from '~a'" table-name filename) (lp:submit-task channel #'copy-to-queue filename dataq table-name) ;; and start another task to push that data from the queue to PostgreSQL (lp:submit-task channel #'pgloader.pgsql:copy-from-queue dbname table-name dataq :truncate truncate :transforms (transforms filename)) ;; now wait until both the tasks are over, and kill the kernel (loop for tasks below 2 do (lp:receive-result channel) finally (log-message :info "COPY \"~a\" done." table-name) (lp:end-kernel)))))