From be0738d70a4c5279e0b95e8bca5f3d9f1acf1633 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 19 Sep 2013 15:02:02 +0200 Subject: [PATCH] Implement casting of MySQL ENUM types. --- mysql-cast-rules.lisp | 108 ++++++++++++++++++++++++++++++++---------- mysql.lisp | 14 +++++- 2 files changed, 96 insertions(+), 26 deletions(-) diff --git a/mysql-cast-rules.lisp b/mysql-cast-rules.lisp index f204161..3a19faa 100644 --- a/mysql-cast-rules.lisp +++ b/mysql-cast-rules.lisp @@ -4,8 +4,39 @@ (in-package :pgloader.mysql) +;;; +;;; Some functions to deal with ENUM types +;;; +(defun explode-mysql-enum (ctype) + "Convert MySQL ENUM expression into a list of labels." + ;; from: "ENUM('small', 'medium', 'large')" + ;; to: ("small" "medium" "large") + (mapcar (lambda (x) (string-trim "' )" x)) + (sq:split-sequence #\, ctype :start (position #\' ctype)))) + +(defun get-enum-type-name (table-name column-name) + "Return the Type Name we're going to use in PostgreSQL." + (format nil "~a_~a" table-name column-name)) + +(defun get-create-enum (table-name column-name ctype) + "Return a PostgreSQL CREATE ENUM TYPE statement from MySQL enum column." + (with-output-to-string (s) + (format s "CREATE TYPE ~a AS ENUM (~{'~a'~^, ~});" + (get-enum-type-name table-name column-name) + (explode-mysql-enum ctype)))) + +(defun cast-enum (table-name column-name type ctype typemod) + "Cast MySQL inline ENUM type to using a PostgreSQL named type. + + The type naming is hardcoded to be table-name_column-name" + (declare (ignore type ctype typemod)) + (format nil "~a_~a" table-name column-name)) + +;;; +;;; The default MySQL Type Casting Rules +;;; (defparameter *default-cast-rules* - '((:source (:type "int" :auto-increment t :typemod (< 10)) + `((:source (:type "int" :auto-increment t :typemod (< 10)) :target (:type "serial")) (:source (:type "int" :auto-increment t :typemod (>= 10)) @@ -28,19 +59,30 @@ (:source (:type "date" :default "0000-00-00") :target (:type "date" :drop-default t)) - (:source (:type "datetime") :target (:type "timestamptz"))) + (:source (:type "datetime") + :target (:type "timestamptz")) + + (:source (:type "enum") + :target (:type ,#'cast-enum))) "Data Type Casting rules to migrate from MySQL to PostgreSQL") (defvar *cast-rules* nil "Specific casting rules added in the command.") -(defun parse-column-typemod (column-type) - "Given int(7), returns the number 7." - (let ((splits (sq:split-sequence-if (lambda (c) (member c '(#\( #\)))) - column-type - :remove-empty-subseqs t))) - (if (= 1 (length splits)) - nil - (parse-integer (nth 1 splits))))) +;;; +;;; Handling typmod in the general case, don't apply to ENUM types +;;; +(defun parse-column-typemod (data-type column-type) + "Given int(7), returns the number 7. + + Beware that some data-type are using a typmod looking definition for + things that are not typmods at all: enum." + (unless (string= "enum" data-type) + (let ((splits (sq:split-sequence-if (lambda (c) (member c '(#\( #\)))) + column-type + :remove-empty-subseqs t))) + (if (= 1 (length splits)) + nil + (parse-integer (nth 1 splits)))))) (defun typemod-expr-matches-p (rule-typemod-expr typemod) "Check if an expression such as (< 10) matches given typemod." @@ -60,14 +102,16 @@ ((:not-null rule-source-not-null) nil n-s-p) ((:auto-increment rule-source-auto-increment) nil ai-s-p)) rule-source - (destructuring-bind (&key type + (destructuring-bind (&key table-name + column-name + type ctype typemod default not-null auto-increment) source - (declare (ignore ctype)) + (declare (ignore table-name column-name ctype)) (when (and (string= type rule-source-type) @@ -79,8 +123,11 @@ (defun format-pgsql-type (source target using) "Returns a string suitable for a PostgreSQL type definition" - (destructuring-bind (&key ((:type source-type)) - ctype + (destructuring-bind (&key ((:table-name source-table-name)) + ((:column-name source-column-name)) + ((:type source-type)) + ((:ctype source-ctype)) + ((:typemod source-typemod)) default not-null &allow-other-keys) @@ -91,7 +138,11 @@ target (format nil "~a~:[~; not null~]~:[~; default '~a'~]" - type + (typecase type + (function (funcall type + source-table-name source-column-name + source-type source-ctype source-typemod)) + (t type)) (and not-null (not drop-not-null)) (and default (not drop-default)) ;; apply the transformation function to the default value @@ -101,16 +152,20 @@ ;; ;; prefer char(24) over just char, that is the column type over the ;; data type. - ctype))) + source-ctype))) (defun apply-casting-rules (dtype ctype default nullable extra - &optional (rules (append *cast-rules* - *default-cast-rules*))) + &key + table-name column-name ; ENUM support + (rules (append *cast-rules* + *default-cast-rules*))) "Apply the given RULES to the MySQL SOURCE type definition" - (let* ((typemod (parse-column-typemod ctype)) + (let* ((typemod (parse-column-typemod dtype ctype)) (not-null (string-equal nullable "NO")) (auto-increment (string= "auto_increment" extra)) - (source (append (list :type dtype) + (source (append (list :table-name table-name) + (list :column-name column-name) + (list :type dtype) (list :ctype ctype) (when typemod (list :typemod typemod)) (list :default default) @@ -133,13 +188,15 @@ (apply-casting-rules dtype ctype default nullable extra) transform-fn)) -(defun cast (dtype ctype default nullable extra) +(defun cast (table-name column-name dtype ctype default nullable extra) "Convert a MySQL datatype to a PostgreSQL datatype. DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example that would be int and int(7) or varchar and varchar(25)." (destructuring-bind (&key pgtype &allow-other-keys) - (apply-casting-rules dtype ctype default nullable extra) + (apply-casting-rules dtype ctype default nullable extra + :table-name table-name + :column-name column-name) pgtype)) (defun transforms (columns) @@ -184,11 +241,12 @@ that would be int and int(7) or varchar and varchar(25)." ("c" "varchar" "varchar(25)" nil nil nil) ("d" "tinyint" "tinyint(4)" "0" nil nil) ("e" "datetime" "datetime" "0000-00-00 00:00:00" nil nil) - ("f" "date" "date" "0000-00-00" "NO" nil)))) + ("f" "date" "date" "0000-00-00" "NO" nil) + ("g" "enum" "ENUM('a', 'b')" nil nil nil)))) (loop for (name dtype ctype nullable default extra) in columns - for pgtype = (cast dtype ctype nullable default extra) + for pgtype = (cast "table" name dtype ctype nullable default extra) for fn in (transforms columns) do - (format t "~a~14T~a~45T~:[~;using ~a~]~%" ctype pgtype fn fn)))) + (format t "~a~18T~a~45T~:[~;using ~a~]~%" ctype pgtype fn fn)))) diff --git a/mysql.lisp b/mysql.lisp index 3b50f81..3391a07 100644 --- a/mysql.lisp +++ b/mysql.lisp @@ -72,13 +72,23 @@ order by table_name, ordinal_position" dbname))) ;; free resources (cl-mysql:disconnect))) +(defun get-create-type (table-name cols &key include-drop) + "MySQL declares ENUM types inline, PostgreSQL wants explicit CREATE TYPE." + (loop + for (name dtype ctype default nullable extra) in cols + when (string-equal "enum" dtype) + collect (when include-drop + (let ((type-name (get-enum-type-name table-name name))) + (format nil "DROP TYPE IF EXISTS ~a;" type-name))) + and collect (get-create-enum table-name name ctype))) + (defun get-create-table (table-name cols) "Return a PostgreSQL CREATE TABLE statement from MySQL columns" (with-output-to-string (s) (format s "CREATE TABLE ~a ~%(~%" table-name) (loop for ((name dtype ctype default nullable extra) . last?) on cols - for pg-coldef = (cast dtype ctype default nullable extra) + for pg-coldef = (cast table-name name dtype ctype default nullable extra) do (format s " ~a ~22t ~a~:[~;,~]~%" name pg-coldef last?)) (format s ");~%"))) @@ -90,7 +100,9 @@ order by table_name, ordinal_position" dbname))) "Return the list of CREATE TABLE statements to run against PostgreSQL" (loop for (table-name . cols) in all-columns + for extra-types = (get-create-type table-name cols :include-drop include-drop) when include-drop collect (get-drop-table-if-exists table-name) + when extra-types append extra-types collect (get-create-table table-name cols))) (defun pgsql-create-tables (dbname all-columns