Implement user-defined casting rules support for DB3.

The casting support for DB3 was hand-crafted and didn't get upgraded to
using the current CAST grammar and facilities, for no other reasons than
lack of time and interest. It so happens what implementing it now fixes two
bug reports.

Bug #938 is about conversion defaulting to "not null" column, and that's due
to the usage of the internal pgloader catalogs where the target column's
nullable field is NIL by default, which doesn't make much sense. With
support for user-defined casting rules, the default is nullable columns, so
that's kind of a free fix.

Fixes #927.
Fixes #938.
This commit is contained in:
Dimitri Fontaine 2019-04-14 21:46:04 +02:00
parent efe70ba3c3
commit 39fc78e08f
6 changed files with 88 additions and 59 deletions

View File

@ -140,6 +140,7 @@
:depends-on ("common" "csv")
:components
((:file "db3-schema")
(:file "db3-cast-rules")
(:file "db3" :depends-on ("db3-schema"))))
(:module "ixf"

View File

@ -704,6 +704,7 @@
#:create-tables
#:format-vector-row)
(:export #:dbf-connection
#:*db3-default-cast-rules*
#:copy-db3
#:map-rows
#:copy-to
@ -850,7 +851,8 @@
#:*sqlite-default-cast-rules*)
(:import-from #:pgloader.source.db3
#:copy-db3
#:dbf-connection)
#:dbf-connection
#:*db3-default-cast-rules*)
(:import-from #:pgloader.source.ixf
#:copy-ixf
#:ixf-connection)

View File

@ -57,6 +57,7 @@
(defrule load-dbf-optional-clauses (* (or dbf-options
gucs
casts
before-load
after-load))
(:lambda (clauses-list)
@ -93,10 +94,12 @@
&key
target-table-name
(encoding :ascii)
gucs before after options
gucs casts before after options
&allow-other-keys)
`(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
(let* ((*default-cast-rules* ',*db3-default-cast-rules*)
(*cast-rules* ',casts)
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options)
,@(identifier-case-binding options)
(on-error-stop (getf ',options :on-error-stop))
@ -124,7 +127,7 @@
(defrule load-dbf-file load-dbf-command
(:lambda (command)
(bind (((source encoding pg-db-uri table-name
&key options gucs before after) command))
&key options gucs casts before after) command))
(cond (*dry-run*
(lisp-code-for-dbf-dry-run source pg-db-uri))
(t
@ -132,6 +135,7 @@
:target-table-name table-name
:encoding encoding
:gucs gucs
:casts casts
:before before
:after after
:options options))))))

View File

@ -0,0 +1,72 @@
;;;
;;; Tools to handle MySQL data type casting rules
;;;
(in-package :pgloader.source.db3)
;;;
;;; The default DB3 Type Casting Rules
;;;
(defparameter *db3-default-cast-rules*
`((:source (:type "C")
:target (:type "text")
:using db3-trim-string)
(:source (:type "N")
:target (:type "numeric")
:using db3-numeric-to-pgsql-numeric)
(:source (:type "L")
:target (:type "boolean")
:using logical-to-boolean)
(:source (:type "D")
:target (:type "date")
:using db3-date-to-pgsql-date)
(:source (:type "M")
:target (:type "text")
:using db3-trim-string))
"Data Type Casting rules to migrate from DB3 to PostgreSQL")
(defstruct (db3-field
(:constructor make-db3-field (name type length)))
name type length default (nullable t) extra)
(defmethod cast ((field db3-field) &key table)
"Return the PostgreSQL type definition given the DB3 one."
(let ((table-name (table-name table)))
(with-slots (name type length default nullable extra) field
(apply-casting-rules table-name name type type default nullable extra))))
;;;
;;; Transformation functions
;;;
(declaim (inline logical-to-boolean
db3-trim-string
db3-numeric-to-pgsql-numeric
db3-date-to-pgsql-date))
(defun logical-to-boolean (value)
"Convert a DB3 logical value to a PostgreSQL boolean."
(if (string= value "?") nil value))
(defun db3-trim-string (value)
"DB3 Strings a right padded with spaces, fix that."
(string-right-trim '(#\Space) value))
(defun db3-numeric-to-pgsql-numeric (value)
"DB3 numerics should be good to go, but might contain spaces."
(let ((trimmed-string (string-right-trim '(#\Space) value)))
(unless (string= "" trimmed-string)
trimmed-string)))
(defun db3-date-to-pgsql-date (value)
"Convert a DB3 date to a PostgreSQL date."
(when (and value (string/= "" value) (= 8 (length value)))
(let ((year (parse-integer (subseq value 0 4) :junk-allowed t))
(month (parse-integer (subseq value 4 6) :junk-allowed t))
(day (parse-integer (subseq value 6 8) :junk-allowed t)))
(when (and year month day)
(format nil "~4,'0d-~2,'0d-~2,'0d" year month day)))))

View File

@ -33,63 +33,10 @@
(setf (fd-db3 clone) (fd-db3 c))
clone))
(defvar *db3-pgsql-type-mapping*
'(("C" . "text") ; ignore field-length
("N" . "numeric") ; handle both integers and floats
("L" . "boolean") ; PostgreSQL compatible representation
("D" . "date") ; no TimeZone in DB3 files
("M" . "text"))) ; not handled yet
(defstruct (db3-field
(:constructor make-db3-field (name type length)))
name type length)
(defun list-all-columns (db3 table)
"Return the list of columns for the given DB3-FILE-NAME."
(loop
:for field :in (db3::fields db3)
:do (add-field table (make-db3-field (db3::field-name field)
(db3::field-type field)
(string (db3::field-type field))
(db3::field-length field)))))
(defmethod cast ((field db3-field) &key &allow-other-keys)
"Return the PostgreSQL type definition given the DB3 one."
(let* ((type (db3-field-type field))
(transform
(cond ((string= type "C") #'db3-trim-string)
((string= type "N") #'db3-numeric-to-pgsql-numeric)
((string= type "L") #'logical-to-boolean)
((string= type "D") #'db3-date-to-pgsql-date)
(t nil))))
(make-column :name (apply-identifier-case (db3-field-name field))
:type-name (cdr (assoc type *db3-pgsql-type-mapping*
:test #'string=))
:transform transform)))
(declaim (inline logical-to-boolean
db3-trim-string
db3-date-to-pgsql-date))
(defun logical-to-boolean (value)
"Convert a DB3 logical value to a PostgreSQL boolean."
(if (string= value "?") nil value))
(defun db3-trim-string (value)
"DB3 Strings a right padded with spaces, fix that."
(string-right-trim '(#\Space) value))
(defun db3-numeric-to-pgsql-numeric (value)
"DB3 numerics should be good to go, but might contain spaces."
(let ((trimmed-string (string-right-trim '(#\Space) value)))
(unless (string= "" trimmed-string)
trimmed-string)))
(defun db3-date-to-pgsql-date (value)
"Convert a DB3 date to a PostgreSQL date."
(when (and value (string/= "" value) (= 8 (length value)))
(let ((year (parse-integer (subseq value 0 4) :junk-allowed t))
(month (parse-integer (subseq value 4 6) :junk-allowed t))
(day (parse-integer (subseq value 6 8) :junk-allowed t)))
(when (and year month day)
(format nil "~4,'0d-~2,'0d-~2,'0d" year month day)))))

View File

@ -6,4 +6,7 @@
LOAD DBF
FROM data/reg2013.dbf with encoding cp850
INTO postgresql:///pgloader?public.reg2013
WITH truncate, create table, disable triggers;
WITH truncate, create table, disable triggers
CAST column reg2013.region to integer,
column reg2013.tncc to smallint;