From 39fc78e08ff7ad3e0e814022dc115b742bcc267b Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sun, 14 Apr 2019 21:46:04 +0200 Subject: [PATCH] Implement user-defined casting rules support for DB3. The casting support for DB3 was hand-crafted and didn't get upgraded to using the current CAST grammar and facilities, for no other reasons than lack of time and interest. It so happens what implementing it now fixes two bug reports. Bug #938 is about conversion defaulting to "not null" column, and that's due to the usage of the internal pgloader catalogs where the target column's nullable field is NIL by default, which doesn't make much sense. With support for user-defined casting rules, the default is nullable columns, so that's kind of a free fix. Fixes #927. Fixes #938. --- pgloader.asd | 1 + src/package.lisp | 4 +- src/parsers/command-dbf.lisp | 10 ++-- src/sources/db3/db3-cast-rules.lisp | 72 +++++++++++++++++++++++++++++ src/sources/db3/db3-schema.lisp | 55 +--------------------- test/dbf.load | 5 +- 6 files changed, 88 insertions(+), 59 deletions(-) create mode 100644 src/sources/db3/db3-cast-rules.lisp diff --git a/pgloader.asd b/pgloader.asd index e120e30..9b1034c 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -140,6 +140,7 @@ :depends-on ("common" "csv") :components ((:file "db3-schema") + (:file "db3-cast-rules") (:file "db3" :depends-on ("db3-schema")))) (:module "ixf" diff --git a/src/package.lisp b/src/package.lisp index 0de7e96..e8edb71 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -704,6 +704,7 @@ #:create-tables #:format-vector-row) (:export #:dbf-connection + #:*db3-default-cast-rules* #:copy-db3 #:map-rows #:copy-to @@ -850,7 +851,8 @@ #:*sqlite-default-cast-rules*) (:import-from #:pgloader.source.db3 #:copy-db3 - #:dbf-connection) + #:dbf-connection + #:*db3-default-cast-rules*) (:import-from #:pgloader.source.ixf #:copy-ixf #:ixf-connection) diff --git a/src/parsers/command-dbf.lisp b/src/parsers/command-dbf.lisp index 89d4764..6e1749b 100644 --- a/src/parsers/command-dbf.lisp +++ b/src/parsers/command-dbf.lisp @@ -57,6 +57,7 @@ (defrule load-dbf-optional-clauses (* (or dbf-options gucs + casts before-load after-load)) (:lambda (clauses-list) @@ -93,10 +94,12 @@ &key target-table-name (encoding :ascii) - gucs before after options + gucs casts before after options &allow-other-keys) `(lambda () - (let* (,@(pgsql-connection-bindings pg-db-conn gucs) + (let* ((*default-cast-rules* ',*db3-default-cast-rules*) + (*cast-rules* ',casts) + ,@(pgsql-connection-bindings pg-db-conn gucs) ,@(batch-control-bindings options) ,@(identifier-case-binding options) (on-error-stop (getf ',options :on-error-stop)) @@ -124,7 +127,7 @@ (defrule load-dbf-file load-dbf-command (:lambda (command) (bind (((source encoding pg-db-uri table-name - &key options gucs before after) command)) + &key options gucs casts before after) command)) (cond (*dry-run* (lisp-code-for-dbf-dry-run source pg-db-uri)) (t @@ -132,6 +135,7 @@ :target-table-name table-name :encoding encoding :gucs gucs + :casts casts :before before :after after :options options)))))) diff --git a/src/sources/db3/db3-cast-rules.lisp b/src/sources/db3/db3-cast-rules.lisp new file mode 100644 index 0000000..0ae96cb --- /dev/null +++ b/src/sources/db3/db3-cast-rules.lisp @@ -0,0 +1,72 @@ +;;; +;;; Tools to handle MySQL data type casting rules +;;; + +(in-package :pgloader.source.db3) + +;;; +;;; The default DB3 Type Casting Rules +;;; +(defparameter *db3-default-cast-rules* + `((:source (:type "C") + :target (:type "text") + :using db3-trim-string) + + (:source (:type "N") + :target (:type "numeric") + :using db3-numeric-to-pgsql-numeric) + + (:source (:type "L") + :target (:type "boolean") + :using logical-to-boolean) + + (:source (:type "D") + :target (:type "date") + :using db3-date-to-pgsql-date) + + (:source (:type "M") + :target (:type "text") + :using db3-trim-string)) + "Data Type Casting rules to migrate from DB3 to PostgreSQL") + +(defstruct (db3-field + (:constructor make-db3-field (name type length))) + name type length default (nullable t) extra) + +(defmethod cast ((field db3-field) &key table) + "Return the PostgreSQL type definition given the DB3 one." + (let ((table-name (table-name table))) + (with-slots (name type length default nullable extra) field + (apply-casting-rules table-name name type type default nullable extra)))) + +;;; +;;; Transformation functions +;;; +(declaim (inline logical-to-boolean + db3-trim-string + db3-numeric-to-pgsql-numeric + db3-date-to-pgsql-date)) + +(defun logical-to-boolean (value) + "Convert a DB3 logical value to a PostgreSQL boolean." + (if (string= value "?") nil value)) + +(defun db3-trim-string (value) + "DB3 Strings a right padded with spaces, fix that." + (string-right-trim '(#\Space) value)) + +(defun db3-numeric-to-pgsql-numeric (value) + "DB3 numerics should be good to go, but might contain spaces." + (let ((trimmed-string (string-right-trim '(#\Space) value))) + (unless (string= "" trimmed-string) + trimmed-string))) + +(defun db3-date-to-pgsql-date (value) + "Convert a DB3 date to a PostgreSQL date." + (when (and value (string/= "" value) (= 8 (length value))) + (let ((year (parse-integer (subseq value 0 4) :junk-allowed t)) + (month (parse-integer (subseq value 4 6) :junk-allowed t)) + (day (parse-integer (subseq value 6 8) :junk-allowed t))) + (when (and year month day) + (format nil "~4,'0d-~2,'0d-~2,'0d" year month day))))) + diff --git a/src/sources/db3/db3-schema.lisp b/src/sources/db3/db3-schema.lisp index 1be657b..130d992 100644 --- a/src/sources/db3/db3-schema.lisp +++ b/src/sources/db3/db3-schema.lisp @@ -33,63 +33,10 @@ (setf (fd-db3 clone) (fd-db3 c)) clone)) -(defvar *db3-pgsql-type-mapping* - '(("C" . "text") ; ignore field-length - ("N" . "numeric") ; handle both integers and floats - ("L" . "boolean") ; PostgreSQL compatible representation - ("D" . "date") ; no TimeZone in DB3 files - ("M" . "text"))) ; not handled yet - -(defstruct (db3-field - (:constructor make-db3-field (name type length))) - name type length) - (defun list-all-columns (db3 table) "Return the list of columns for the given DB3-FILE-NAME." (loop :for field :in (db3::fields db3) :do (add-field table (make-db3-field (db3::field-name field) - (db3::field-type field) + (string (db3::field-type field)) (db3::field-length field))))) - -(defmethod cast ((field db3-field) &key &allow-other-keys) - "Return the PostgreSQL type definition given the DB3 one." - (let* ((type (db3-field-type field)) - (transform - (cond ((string= type "C") #'db3-trim-string) - ((string= type "N") #'db3-numeric-to-pgsql-numeric) - ((string= type "L") #'logical-to-boolean) - ((string= type "D") #'db3-date-to-pgsql-date) - (t nil)))) - (make-column :name (apply-identifier-case (db3-field-name field)) - :type-name (cdr (assoc type *db3-pgsql-type-mapping* - :test #'string=)) - :transform transform))) - -(declaim (inline logical-to-boolean - db3-trim-string - db3-date-to-pgsql-date)) - -(defun logical-to-boolean (value) - "Convert a DB3 logical value to a PostgreSQL boolean." - (if (string= value "?") nil value)) - -(defun db3-trim-string (value) - "DB3 Strings a right padded with spaces, fix that." - (string-right-trim '(#\Space) value)) - -(defun db3-numeric-to-pgsql-numeric (value) - "DB3 numerics should be good to go, but might contain spaces." - (let ((trimmed-string (string-right-trim '(#\Space) value))) - (unless (string= "" trimmed-string) - trimmed-string))) - -(defun db3-date-to-pgsql-date (value) - "Convert a DB3 date to a PostgreSQL date." - (when (and value (string/= "" value) (= 8 (length value))) - (let ((year (parse-integer (subseq value 0 4) :junk-allowed t)) - (month (parse-integer (subseq value 4 6) :junk-allowed t)) - (day (parse-integer (subseq value 6 8) :junk-allowed t))) - (when (and year month day) - (format nil "~4,'0d-~2,'0d-~2,'0d" year month day))))) - diff --git a/test/dbf.load b/test/dbf.load index 09d790a..4296d1a 100644 --- a/test/dbf.load +++ b/test/dbf.load @@ -6,4 +6,7 @@ LOAD DBF FROM data/reg2013.dbf with encoding cp850 INTO postgresql:///pgloader?public.reg2013 - WITH truncate, create table, disable triggers; + WITH truncate, create table, disable triggers + + CAST column reg2013.region to integer, + column reg2013.tncc to smallint;