diff --git a/pgloader.1.md b/pgloader.1.md index 954c2e3..00602e6 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -1009,6 +1009,13 @@ The `database` command accepts the following clauses and options: be found un the `pgloader.transforms` Common Lisp package. See above for details. + It's possible to augment a default cast rule (such as one that + applies against `ENUM` data type for example) with a *transformation + function* by omitting entirely the `type` parts of the casting rule, + as in the following example: + + column enumerate.foo using empty-string-to-null + - *MATERIALIZE VIEWS* This clause allows you to implement custom data processing at the data diff --git a/src/parser.lisp b/src/parser.lisp index ebe4ccf..b8c61a6 100644 --- a/src/parser.lisp +++ b/src/parser.lisp @@ -718,7 +718,7 @@ for (key value) on target by #'cddr append (list key (if (eq :type key) (getf source :type) value))))) -(defrule cast-rule (and cast-source cast-def (? cast-function)) +(defrule cast-rule (and cast-source (? cast-def) (? cast-function)) (:lambda (cast) (destructuring-bind (source target function) cast (list :source source diff --git a/src/sources/mysql-cast-rules.lisp b/src/sources/mysql-cast-rules.lisp index 73a2d87..954fa83 100644 --- a/src/sources/mysql-cast-rules.lisp +++ b/src/sources/mysql-cast-rules.lisp @@ -294,41 +294,38 @@ (let* ((typemod (parse-column-typemod dtype ctype)) (not-null (string-equal nullable "NO")) (auto-increment (string= "auto_increment" extra)) - (source (append (list :table-name table-name) - (list :column-name column-name) - (list :type dtype) - (list :ctype ctype) - (when typemod (list :typemod typemod)) - (list :default default) - (list :not-null not-null) - (list :auto-increment auto-increment)))) - (loop - for rule in rules - for target? = (cast-rule-matches rule source) - until target? - finally - (return - (destructuring-bind (&key target using &allow-other-keys) - target? - (list :transform-fn using - :pgtype (format-pgsql-type source target using))))))) - -(defun get-transform-function (dtype ctype default nullable extra) - "Apply given RULES and return the tranform function needed for this column" - (destructuring-bind (&key transform-fn &allow-other-keys) - (apply-casting-rules dtype ctype default nullable extra) - transform-fn)) + (source `(:table-name ,table-name + :column-name ,column-name + :type ,dtype + :ctype ,ctype + ,@(when typemod (list :typemod typemod)) + :default ,default + :not-null ,not-null + :auto-increment ,auto-increment))) + (let (first-match-using) + (loop + for rule in rules + for (target using) = (destructuring-bind (&key target using) + (cast-rule-matches rule source) + (list target using)) + do (when (and (null target) using (null first-match-using)) + (setf first-match-using using)) + until target + finally + (return + (list :transform-fn (or first-match-using using) + :pgtype (format-pgsql-type source target using))))))) (defun cast (table-name column-name dtype ctype default nullable extra) "Convert a MySQL datatype to a PostgreSQL datatype. DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example that would be int and int(7) or varchar and varchar(25)." - (destructuring-bind (&key pgtype &allow-other-keys) + (destructuring-bind (&key pgtype transform-fn &allow-other-keys) (apply-casting-rules dtype ctype default nullable extra :table-name table-name :column-name column-name) - pgtype)) + (values pgtype transform-fn))) (defun list-transforms (columns) "Return the list of transformation functions to apply to a given table." @@ -351,7 +348,11 @@ that would be int and int(7) or varchar and varchar(25)." :using nil) (:source (:type "char" :typemod (= (car typemod) 1)) - :target (:type "char" :drop-typemod nil)))) + :target (:type "char" :drop-typemod nil)) + + (:source (:column ("table" . "g")) + :target nil + :using pgloader.transforms::empty-string-to-null))) (columns ;; name dtype ctype default nullable extra @@ -387,7 +388,8 @@ that would be int and int(7) or varchar and varchar(25)." (loop for (name dtype ctype nullable default extra) in columns for mycol = (make-mysql-column "table" name dtype ctype nullable default extra) - for pgtype = (cast "table" name dtype ctype nullable default extra) - for fn = (car (list-transforms (list mycol))) + for (pgtype fn) = (multiple-value-bind (pgcol fn) + (cast "table" name dtype ctype nullable default extra) + (list pgcol fn)) do (format t "~a: ~a~30T~a~65T~:[~;using ~a~]~%" name ctype pgtype fn fn)))) diff --git a/src/sources/mysql.lisp b/src/sources/mysql.lisp index 6b6ba24..8556bbf 100644 --- a/src/sources/mysql.lisp +++ b/src/sources/mysql.lisp @@ -7,6 +7,12 @@ (defclass copy-mysql (copy) () (:documentation "pgloader MySQL Data Source")) +(defun cast-mysql-column-definition-to-pgsql (mysql-column) + "Return the PostgreSQL column definition from the MySQL one." + (with-slots (table-name name dtype ctype default nullable extra) + mysql-column + (cast table-name name dtype ctype default nullable extra))) + (defmethod initialize-instance :after ((source copy-mysql) &key) "Add a default value for transforms in case it's not been provided." (let* ((source-db (slot-value source 'source-db)) @@ -37,8 +43,15 @@ (unless (slot-boundp source 'fields) (setf (slot-value source 'fields) fields)) - (unless transforms - (setf (slot-value source 'transforms) (list-transforms fields)))))) + (loop for field in fields + for (column fn) = (multiple-value-bind (column fn) + (cast-mysql-column-definition-to-pgsql field) + (list column fn)) + collect column into columns + collect fn into fns + finally (progn (setf (slot-value source 'columns) columns) + (unless transforms + (setf (slot-value source 'transforms) fns))))))) ;;; diff --git a/src/transforms.lisp b/src/transforms.lisp index a5c59b0..2ef4355 100644 --- a/src/transforms.lisp +++ b/src/transforms.lisp @@ -20,6 +20,7 @@ ip-range convert-mysql-point float-to-string + empty-string-to-null set-to-enum-array right-trim byte-vector-to-bytea)) @@ -140,6 +141,10 @@ "Transform a MySQL SET value into a PostgreSQL ENUM Array" (format nil "{~a}" set-string)) +(defun empty-string-to-null (string) + "MySQL ENUM sometimes return an empty string rather than a NULL." + (if (string= string "") nil string)) + (defun right-trim (string) "Remove whitespaces at end of STRING." (declare (type simple-string string)) diff --git a/test/parse/hans.goeuro.load b/test/parse/hans.goeuro.load index 9b77e4f..61866d4 100644 --- a/test/parse/hans.goeuro.load +++ b/test/parse/hans.goeuro.load @@ -12,9 +12,9 @@ LOAD DATABASE -- column bools.a to boolean drop typemod using tinyint-to-boolean, -- override char(1) to varchar(1), just use char(1) here. - type char when (= precision 1) to char keep typemod + type char when (= precision 1) to char keep typemod, - -- column enumerate.foo to boolenum using empty-string-to-null + column enumerate.foo using empty-string-to-null MATERIALIZE VIEWS d as $$