diff --git a/pgloader.1 b/pgloader.1 index a5bf238..81e27b6 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1796,6 +1796,15 @@ This option allows to control which encoding to parse the SQLite text data with\ . .IP "" 0 +. +.IP "\(bu" 4 +\fICAST\fR +. +.IP +The cast clause allows to specify custom casting rules, either to overload the default casting rules or to amend them with special cases\. +. +.IP +Please refer to the MySQL CAST clause for details\. . .IP "\(bu" 4 \fIINCLUDING ONLY TABLE NAMES MATCHING\fR @@ -1833,6 +1842,77 @@ EXCLUDING TABLE NAMES MATCHING ~ . .IP "" 0 +. +.IP "" 0 +. +.SS "DEFAULT SQLite CASTING RULES" +When migrating from SQLite the following Casting Rules are provided: +. +.P +Numbers: +. +.IP "\(bu" 4 +type tinyint to smallint +. +.IP "\(bu" 4 +type float to float using float\-to\-string +. +.IP "\(bu" 4 +type real to real using float\-to\-string +. +.IP "\(bu" 4 +type double to double precision using float\-to\-string +. +.IP "\(bu" 4 +type numeric to numeric using float\-to\-string +. +.IP "" 0 +. +.P +Texts: +. +.IP "\(bu" 4 +type character to text drop typemod +. +.IP "\(bu" 4 +type varchar to text drop typemod +. +.IP "\(bu" 4 +type nvarchar to text drop typemod +. +.IP "\(bu" 4 +type char to text drop typemod +. +.IP "\(bu" 4 +type nchar to text drop typemod +. +.IP "\(bu" 4 +type nvarchar to text drop typemod +. +.IP "\(bu" 4 +type clob to text drop typemod +. +.IP "" 0 +. +.P +Binary: +. +.IP "\(bu" 4 +type blob to bytea +. +.IP "" 0 +. +.P +Date: +. +.IP "\(bu" 4 +type datetime to timestamptz using sqlite\-timestamp\-to\-timestamp +. +.IP "\(bu" 4 +type timestamp to timestamptz using sqlite\-timestamp\-to\-timestamp +. +.IP "\(bu" 4 +type timestamptz to timestamptz using sqlite\-timestamp\-to\-timestamp . .IP "" 0 . diff --git a/pgloader.1.md b/pgloader.1.md index 7fd8dc1..629bf76 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -1438,6 +1438,13 @@ The `sqlite` command accepts the following clauses and options: This option allows to control which encoding to parse the SQLite text data with. Defaults to UTF-8. + - *CAST* + + The cast clause allows to specify custom casting rules, either to + overload the default casting rules or to amend them with special cases. + + Please refer to the MySQL CAST clause for details. + - *INCLUDING ONLY TABLE NAMES MATCHING* Introduce a comma separated list of table names or *regular expression* @@ -1455,6 +1462,40 @@ The `sqlite` command accepts the following clauses and options: EXCLUDING TABLE NAMES MATCHING ~ +### DEFAULT SQLite CASTING RULES + +When migrating from SQLite the following Casting Rules are provided: + +Numbers: + + - type tinyint to smallint + + - type float to float using float-to-string + - type real to real using float-to-string + - type double to double precision using float-to-string + - type numeric to numeric using float-to-string + +Texts: + + - type character to text drop typemod + - type varchar to text drop typemod + - type nvarchar to text drop typemod + - type char to text drop typemod + - type nchar to text drop typemod + - type nvarchar to text drop typemod + - type clob to text drop typemod + +Binary: + + - type blob to bytea + +Date: + + - type datetime to timestamptz using sqlite-timestamp-to-timestamp + - type timestamp to timestamptz using sqlite-timestamp-to-timestamp + - type timestamptz to timestamptz using sqlite-timestamp-to-timestamp + + ## TRANSFORMATION FUNCTIONS Some data types are implemented in a different enough way that a diff --git a/src/package.lisp b/src/package.lisp index 01a2ea8..f3e461e 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -2,6 +2,11 @@ ;;; ;;; To avoid circular files dependencies, define all the packages here ;;; +(defpackage #:pgloader.transforms + (:use #:cl) + (:export #:precision + #:scale + #:intern-symbol)) (defpackage #:pgloader.logs (:use #:cl #:pgloader.params) @@ -55,6 +60,7 @@ #:report-full-summary #:with-stats-collection #:camelCase-to-colname + #:unquote #:make-kernel #:list-encodings-and-aliases #:show-encodings @@ -102,6 +108,7 @@ (defpackage #:pgloader.sources (:use #:cl #:pgloader.params #:pgloader.utils) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.parse-date #:parse-date-string #:parse-date-format) @@ -118,12 +125,19 @@ #:copy-to-queue #:copy-to #:copy-database + + ;; file based utils for CSV, fixed etc #:filter-column-list #:with-open-file-or-stream #:get-pathname #:get-absolute-pathname #:project-fields - #:reformat-then-process)) + #:reformat-then-process + + ;; database cast machinery + #:*default-cast-rules* + #:*cast-rules* + #:cast)) (defpackage #:pgloader.queue (:use #:cl #:pgloader.params) @@ -157,17 +171,6 @@ (:use #:cl) (:export #:read-queries)) -(defpackage #:pgloader.parser - (:use #:cl #:esrap #:metabang.bind - #:pgloader.params #:pgloader.utils #:pgloader.sql) - (:import-from #:alexandria #:read-file-into-string) - (:import-from #:pgloader.pgsql - #:with-pgsql-transaction - #:pgsql-execute) - (:export #:parse-commands - #:run-commands - #:with-database-uri)) - ;; ;; Specific source handling @@ -232,6 +235,7 @@ (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.sources #:pgloader.queue) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql #:with-pgsql-transaction #:pgsql-execute @@ -251,8 +255,7 @@ #:create-indexes-in-kernel #:format-vector-row) (:export #:copy-mysql - #:*cast-rules* - #:*default-cast-rules* + #:*mysql-default-cast-rules* #:with-mysql-connection #:map-rows #:copy-to @@ -267,6 +270,7 @@ (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.sources #:pgloader.queue) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql #:with-pgsql-transaction #:pgsql-execute @@ -280,6 +284,7 @@ #:format-pgsql-create-index #:create-indexes-in-kernel) (:export #:copy-sqlite + #:*sqlite-default-cast-rules* #:map-rows #:copy-to #:copy-from @@ -307,6 +312,26 @@ #:expand-archive #:get-matching-filenames)) + +;;; +;;; The Command Parser +;;; +(defpackage #:pgloader.parser + (:use #:cl #:esrap #:metabang.bind + #:pgloader.params #:pgloader.utils #:pgloader.sql) + (:import-from #:alexandria #:read-file-into-string) + (:import-from #:pgloader.pgsql + #:with-pgsql-transaction + #:pgsql-execute) + (:import-from #:pgloader.sources + #:*default-cast-rules* + #:*cast-rules*) + (:import-from #:pgloader.mysql #:*mysql-default-cast-rules*) + (:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*) + (:export #:parse-commands + #:run-commands + #:with-database-uri)) + ;; diff --git a/src/parsers/parser.lisp b/src/parsers/parser.lisp index 762d885..41a8438 100644 --- a/src/parsers/parser.lisp +++ b/src/parsers/parser.lisp @@ -1004,7 +1004,8 @@ (*state* (or *state* (pgloader.utils:make-pgstate))) (state-idx (pgloader.utils:make-pgstate)) (state-after (pgloader.utils:make-pgstate)) - (pgloader.mysql:*cast-rules* ',casts) + (*default-cast-rules* ',*mysql-default-cast-rules*) + (*cast-rules* ',casts) ,@(mysql-connection-bindings my-db-uri) ,@(pgsql-connection-bindings pg-db-uri gucs) ,@(batch-control-bindings options) @@ -1106,6 +1107,7 @@ load database (defrule load-sqlite-optional-clauses (* (or sqlite-options gucs + casts including excluding)) (:lambda (clauses-list) @@ -1122,6 +1124,7 @@ load database (bind (((sqlite-uri pg-db-uri &key gucs + casts ((:sqlite-options options)) ((:including incl)) ((:excluding excl))) source) @@ -1129,6 +1132,8 @@ load database `(lambda () (let* ((state-before (pgloader.utils:make-pgstate)) (*state* (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*sqlite-default-cast-rules*) + (*cast-rules* ',casts) ,@(pgsql-connection-bindings pg-db-uri gucs) ,@(batch-control-bindings options) (db diff --git a/src/sources.lisp b/src/sources.lisp index a8aa8b1..d6b19e7 100644 --- a/src/sources.lisp +++ b/src/sources.lisp @@ -345,3 +345,185 @@ e))))) (when projected-vector (funcall process-row-fn projected-vector))))))) + + +;;; +;;; Type casting machinery, to share among all database kind sources. +;;; + +;; +;; The special variables *default-cast-rules* and *cast-rules* must be bound +;; by specific database commands with proper values at run-time. +;; +(defvar *default-cast-rules* nil "Default casting rules.") +(defvar *cast-rules* nil "Specific casting rules added in the command.") + +;;; +;;; Handling typmod in the general case, don't apply to ENUM types +;;; +(defun parse-column-typemod (data-type column-type) + "Given int(7), returns the number 7. + + Beware that some data-type are using a typmod looking definition for + things that are not typmods at all: enum." + (unless (or (string= "enum" data-type) + (string= "set" data-type)) + (let ((start-1 (position #\( column-type)) ; just before start position + (end (position #\) column-type))) ; just before end position + (when start-1 + (destructuring-bind (a &optional b) + (mapcar #'parse-integer + (sq:split-sequence #\, column-type + :start (+ 1 start-1) :end end)) + (cons a b)))))) + +(defun typemod-expr-to-function (expr) + "Transform given EXPR into a callable function object." + `(lambda (typemod) + (destructuring-bind (precision &optional (scale 0)) typemod + (declare (ignorable precision scale)) + ,expr))) + +(defun typemod-expr-matches-p (rule-typemod-expr typemod) + "Check if an expression such as (< 10) matches given typemod." + (funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod)) + +(defun cast-rule-matches (rule source) + "Returns the target datatype if the RULE matches the SOURCE, or nil" + (destructuring-bind (&key ((:source rule-source)) + ((:target rule-target)) + using) + rule + (destructuring-bind + ;; it's either :type or :column, just cope with both thanks to + ;; &allow-other-keys + (&key ((:type rule-source-type) nil t-s-p) + ((:column rule-source-column) nil c-s-p) + ((:typemod typemod-expr) nil tm-s-p) + ((:default rule-source-default) nil d-s-p) + ((:not-null rule-source-not-null) nil n-s-p) + ((:auto-increment rule-source-auto-increment) nil ai-s-p) + &allow-other-keys) + rule-source + (destructuring-bind (&key table-name + column-name + type + ctype + typemod + default + not-null + auto-increment) + source + (declare (ignore ctype)) + (when + (and + (or (and t-s-p (string= type rule-source-type)) + (and c-s-p + (string-equal table-name (car rule-source-column)) + (string-equal column-name (cdr rule-source-column)))) + (or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod)) + (or (null d-s-p) (string= default rule-source-default)) + (or (null n-s-p) (eq not-null rule-source-not-null)) + (or (null ai-s-p) (eq auto-increment rule-source-auto-increment))) + (list :using using :target rule-target)))))) + +(defun format-pgsql-default-value (default &optional using-cast-fn) + "Returns suitably quoted default value for CREATE TABLE command." + (cond + ((null default) "NULL") + ((string= "NULL" default) default) + ((string= "CURRENT_TIMESTAMP" default) default) + (t + ;; apply the transformation function to the default value + (if using-cast-fn (format-pgsql-default-value + (funcall using-cast-fn default)) + (format nil "'~a'" default))))) + +(defun format-pgsql-type (source target using) + "Returns a string suitable for a PostgreSQL type definition" + (destructuring-bind (&key ((:table-name source-table-name)) + ((:column-name source-column-name)) + ((:type source-type)) + ((:ctype source-ctype)) + ((:typemod source-typemod)) + ((:default source-default)) + ((:not-null source-not-null)) + &allow-other-keys) + source + (if target + (destructuring-bind (&key type + drop-default + drop-not-null + (drop-typemod t) + &allow-other-keys) + target + (let ((type-name + (typecase type + (function (funcall type + source-table-name source-column-name + source-type source-ctype source-typemod)) + (t type))) + (pg-typemod + (when source-typemod + (destructuring-bind (a . b) source-typemod + (format nil "(~a~:[~*~;,~a~])" a b b))))) + (format nil + "~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]" + type-name + (and source-typemod (not drop-typemod)) + pg-typemod + (and source-not-null (not drop-not-null)) + (and source-default (not drop-default)) + (format-pgsql-default-value source-default using)))) + + ;; NO MATCH + ;; + ;; prefer char(24) over just char, that is the column type over the + ;; data type. + (format nil "~a~:[~; not null~]~:[~; default ~a~]" + source-ctype + source-not-null + source-default + (format-pgsql-default-value source-default using))))) + +(defun apply-casting-rules (dtype ctype default nullable extra + &key + table-name column-name ; ENUM support + (rules (append *cast-rules* + *default-cast-rules*))) + "Apply the given RULES to the MySQL SOURCE type definition" + (let* ((typemod (parse-column-typemod dtype ctype)) + (not-null (string-equal nullable "NO")) + (auto-increment (string= "auto_increment" extra)) + (source `(:table-name ,table-name + :column-name ,column-name + :type ,dtype + :ctype ,ctype + ,@(when typemod (list :typemod typemod)) + :default ,default + :not-null ,not-null + :auto-increment ,auto-increment))) + (let (first-match-using) + (loop + for rule in rules + for (target using) = (destructuring-bind (&key target using) + (cast-rule-matches rule source) + (list target using)) + do (when (and (null target) using (null first-match-using)) + (setf first-match-using using)) + until target + finally + (return + (list :transform-fn (or first-match-using using) + :pgtype (format-pgsql-type source target using))))))) + +(defun cast (table-name column-name dtype ctype default nullable extra) + "Convert a MySQL datatype to a PostgreSQL datatype. + +DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example +that would be int and int(7) or varchar and varchar(25)." + (destructuring-bind (&key pgtype transform-fn &allow-other-keys) + (apply-casting-rules dtype ctype default nullable extra + :table-name table-name + :column-name column-name) + (values pgtype transform-fn))) diff --git a/src/sources/ixf.lisp b/src/sources/ixf.lisp index c0daa0d..f608156 100644 --- a/src/sources/ixf.lisp +++ b/src/sources/ixf.lisp @@ -20,7 +20,7 @@ (#. ixf:+char+ . "text") (#. ixf:+varchar+ . "text"))) -(defun cast (ixf-type) +(defun cast-ixf-type (ixf-type) "Return the PostgreSQL type name for a given IXF type name." (cdr (assoc ixf-type *ixf-pgsql-type-mapping*))) @@ -31,7 +31,7 @@ (type-definition (format nil "~a~:[ not null~;~]~:[~*~; default ~a~]" - (cast (ixf:ixf-column-type col)) + (cast-ixf-type (ixf:ixf-column-type col)) (ixf:ixf-column-nullable col) (ixf:ixf-column-has-default col) (ixf:ixf-column-default col)))) @@ -71,7 +71,7 @@ (setf (slot-value source 'transforms) (loop :for field :in fields :collect - (let ((coltype (cast (ixf:ixf-column-type field)))) + (let ((coltype (cast-ixf-type (ixf:ixf-column-type field)))) ;; ;; The IXF driver we use maps the data type and gets ;; back proper CL typed objects, where we only want to diff --git a/src/sources/mysql/mysql-cast-rules.lisp b/src/sources/mysql/mysql-cast-rules.lisp index bf90820..e60ed99 100644 --- a/src/sources/mysql/mysql-cast-rules.lisp +++ b/src/sources/mysql/mysql-cast-rules.lisp @@ -5,7 +5,7 @@ (in-package :pgloader.mysql) ;;; -;;; Some functions to deal with ENUM types +;;; Some functions to deal with ENUM and SET types ;;; (defun explode-mysql-enum (ctype) "Convert MySQL ENUM expression into a list of labels." @@ -44,7 +44,7 @@ ;;; ;;; The default MySQL Type Casting Rules ;;; -(defparameter *default-cast-rules* +(defparameter *mysql-default-cast-rules* `((:source (:type "int" :auto-increment t :typemod (< precision 10)) :target (:type "serial")) @@ -155,186 +155,12 @@ :using pgloader.transforms::convert-mysql-point)) "Data Type Casting rules to migrate from MySQL to PostgreSQL") -(defvar *cast-rules* nil "Specific casting rules added in the command.") - + ;;; -;;; Handling typmod in the general case, don't apply to ENUM types +;;; MySQL specific testing. +;;; +;;; TODO: move that to general testing. ;;; -(defun parse-column-typemod (data-type column-type) - "Given int(7), returns the number 7. - - Beware that some data-type are using a typmod looking definition for - things that are not typmods at all: enum." - (unless (or (string= "enum" data-type) - (string= "set" data-type)) - (let ((start-1 (position #\( column-type)) ; just before start position - (end (position #\) column-type))) ; just before end position - (when start-1 - (destructuring-bind (a &optional b) - (mapcar #'parse-integer - (sq:split-sequence #\, column-type - :start (+ 1 start-1) :end end)) - (cons a b)))))) - -(defun typemod-expr-to-function (expr) - "Transform given EXPR into a callable function object." - `(lambda (typemod) - (destructuring-bind (precision &optional (scale 0)) typemod - (declare (ignorable precision scale)) - ;; - ;; The command parser interns symbols into the pgloader.transforms - ;; package, whereas the default casting rules are defined in the - ;; pgloader.mysql package. Have a compatibility layer here for the - ;; generated code. - ;; - (let ((pgloader.transforms::precision precision)) - (declare (ignorable pgloader.transforms::precision)) - ,expr)))) - -(defun typemod-expr-matches-p (rule-typemod-expr typemod) - "Check if an expression such as (< 10) matches given typemod." - (funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod)) - -(defun cast-rule-matches (rule source) - "Returns the target datatype if the RULE matches the SOURCE, or nil" - (destructuring-bind (&key ((:source rule-source)) - ((:target rule-target)) - using) - rule - (destructuring-bind - ;; it's either :type or :column, just cope with both thanks to - ;; &allow-other-keys - (&key ((:type rule-source-type) nil t-s-p) - ((:column rule-source-column) nil c-s-p) - ((:typemod typemod-expr) nil tm-s-p) - ((:default rule-source-default) nil d-s-p) - ((:not-null rule-source-not-null) nil n-s-p) - ((:auto-increment rule-source-auto-increment) nil ai-s-p) - &allow-other-keys) - rule-source - (destructuring-bind (&key table-name - column-name - type - ctype - typemod - default - not-null - auto-increment) - source - (declare (ignore ctype)) - (when - (and - (or (and t-s-p (string= type rule-source-type)) - (and c-s-p - (string-equal table-name (car rule-source-column)) - (string-equal column-name (cdr rule-source-column)))) - (or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod)) - (or (null d-s-p) (string= default rule-source-default)) - (or (null n-s-p) (eq not-null rule-source-not-null)) - (or (null ai-s-p) (eq auto-increment rule-source-auto-increment))) - (list :using using :target rule-target)))))) - -(defun format-pgsql-default-value (default &optional using-cast-fn) - "Returns suitably quoted default value for CREATE TABLE command." - (cond - ((null default) "NULL") - ((string= "NULL" default) default) - ((string= "CURRENT_TIMESTAMP" default) default) - (t - ;; apply the transformation function to the default value - (if using-cast-fn (format-pgsql-default-value - (funcall using-cast-fn default)) - (format nil "'~a'" default))))) - -(defun format-pgsql-type (source target using) - "Returns a string suitable for a PostgreSQL type definition" - (destructuring-bind (&key ((:table-name source-table-name)) - ((:column-name source-column-name)) - ((:type source-type)) - ((:ctype source-ctype)) - ((:typemod source-typemod)) - ((:default source-default)) - ((:not-null source-not-null)) - &allow-other-keys) - source - (if target - (destructuring-bind (&key type - drop-default - drop-not-null - (drop-typemod t) - &allow-other-keys) - target - (let ((type-name - (typecase type - (function (funcall type - source-table-name source-column-name - source-type source-ctype source-typemod)) - (t type))) - (pg-typemod - (when source-typemod - (destructuring-bind (a . b) source-typemod - (format nil "(~a~:[~*~;,~a~])" a b b))))) - (format nil - "~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]" - type-name - (and source-typemod (not drop-typemod)) - pg-typemod - (and source-not-null (not drop-not-null)) - (and source-default (not drop-default)) - (format-pgsql-default-value source-default using)))) - - ;; NO MATCH - ;; - ;; prefer char(24) over just char, that is the column type over the - ;; data type. - (format nil "~a~:[~; not null~]~:[~; default ~a~]" - source-ctype - source-not-null - source-default - (format-pgsql-default-value source-default using))))) - -(defun apply-casting-rules (dtype ctype default nullable extra - &key - table-name column-name ; ENUM support - (rules (append *cast-rules* - *default-cast-rules*))) - "Apply the given RULES to the MySQL SOURCE type definition" - (let* ((typemod (parse-column-typemod dtype ctype)) - (not-null (string-equal nullable "NO")) - (auto-increment (string= "auto_increment" extra)) - (source `(:table-name ,table-name - :column-name ,column-name - :type ,dtype - :ctype ,ctype - ,@(when typemod (list :typemod typemod)) - :default ,default - :not-null ,not-null - :auto-increment ,auto-increment))) - (let (first-match-using) - (loop - for rule in rules - for (target using) = (destructuring-bind (&key target using) - (cast-rule-matches rule source) - (list target using)) - do (when (and (null target) using (null first-match-using)) - (setf first-match-using using)) - until target - finally - (return - (list :transform-fn (or first-match-using using) - :pgtype (format-pgsql-type source target using))))))) - -(defun cast (table-name column-name dtype ctype default nullable extra) - "Convert a MySQL datatype to a PostgreSQL datatype. - -DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example -that would be int and int(7) or varchar and varchar(25)." - (destructuring-bind (&key pgtype transform-fn &allow-other-keys) - (apply-casting-rules dtype ctype default nullable extra - :table-name table-name - :column-name column-name) - (values pgtype transform-fn))) - (defun test-casts () "Just test some cases for the casts" (let ((*cast-rules* diff --git a/src/sources/sqlite.lisp b/src/sources/sqlite.lisp index e4f678d..e1f2ac6 100644 --- a/src/sources/sqlite.lisp +++ b/src/sources/sqlite.lisp @@ -7,83 +7,93 @@ (defvar *sqlite-db* nil "The SQLite database connection handler.") -;; -;; The SQLite drive we use maps the CFFI data type mapping functions and -;; gets back proper CL typed objects, where we only want to deal with text. -;; -(defvar *sqlite-to-pgsql* - '(("float" . pgloader.transforms::float-to-string) - ("real" . pgloader.transforms::float-to-string) - ("double precision" . pgloader.transforms::float-to-string) - ("numeric" . pgloader.transforms::float-to-string) - ("text" . nil) - ("bytea" . pgloader.transforms::byte-vector-to-bytea) - ("timestamp" . pgloader.transforms::sqlite-timestamp-to-timestamp) - ("timestamptz" . pgloader.transforms::sqlite-timestamp-to-timestamp)) - "Transformation functions to use when migrating from SQLite to PostgreSQL.") +(defparameter *sqlite-default-cast-rules* + `((:source (:type "character") :target (:type "text" :drop-typemod t)) + (:source (:type "varchar") :target (:type "text" :drop-typemod t)) + (:source (:type "nvarchar") :target (:type "text" :drop-typemod t)) + (:source (:type "char") :target (:type "text" :drop-typemod t)) + (:source (:type "nchar") :target (:type "text" :drop-typemod t)) + (:source (:type "clob") :target (:type "text" :drop-typemod t)) + + (:source (:type "tinyint") :target (:type "smallint")) + + (:source (:type "float") :target (:type "float") + :using pgloader.transforms::float-to-string) + + (:source (:type "real") :target (:type "real") + :using pgloader.transforms::float-to-string) + + (:source (:type "double") :target (:type "double precision") + :using pgloader.transforms::float-to-string) + + (:source (:type "numeric") :target (:type "numeric") + :using pgloader.transforms::float-to-string) + + (:source (:type "blob") :target (:type "bytea") + :using pgloader.transforms::byte-vector-to-bytea) + + (:source (:type "datetime") :target (:type "timestamptz") + :using pgloader.transforms::sqlite-timestamp-to-timestamp) + + (:source (:type "timestamp") :target (:type "timestamp") + :using pgloader.transforms::sqlite-timestamp-to-timestamp) + + (:source (:type "timestamptz") :target (:type "timestamptz") + :using pgloader.transforms::sqlite-timestamp-to-timestamp)) + "Data Type Casting to migrate from SQLite to PostgreSQL") ;;; ;;; SQLite tools connecting to a database ;;; (defstruct (coldef - (:constructor make-coldef (seq name type nullable default pk-id))) - seq name type nullable default pk-id) + (:constructor make-coldef (table-name + seq name dtype ctype + nullable default pk-id))) + table-name seq name dtype ctype nullable default pk-id) -(defun cast (sqlite-type-name) - "Return the PostgreSQL type name for a given SQLite type name." - (let* ((tokens (remove-if (lambda (token) - (member token '("unsigned" "short") +(defun normalize (sqlite-type-name) + "SQLite only has a notion of what MySQL calls column_type, or ctype in the + CAST machinery. Transform it to the data_type, or dtype." + (let* ((sqlite-type-name (string-downcase sqlite-type-name)) + (tokens (remove-if (lambda (token) + (member token '("unsigned" "short" + "varying" "native") :test #'string-equal)) - (sq:split-sequence #\Space sqlite-type-name))) - (sqlite-type-name (first tokens))) + (sq:split-sequence #\Space sqlite-type-name)))) (assert (= 1 (length tokens))) - (cond ((and (<= 8 (length sqlite-type-name)) - (string-equal sqlite-type-name "nvarchar" :end1 8)) "text") + (first tokens))) - ((string-equal sqlite-type-name "tinyint") "smallint") - ((string-equal sqlite-type-name "datetime") "timestamptz") - ((string-equal sqlite-type-name "double") "double precision") - ((string-equal sqlite-type-name "blob") "bytea") - ((string-equal sqlite-type-name "clob") "text") +(defun ctype-to-dtype (sqlite-type-name) + "In SQLite we only get the ctype, e.g. int(7), but here we want the base + data type behind it, e.g. int." + (let* ((ctype (normalize sqlite-type-name)) + (paren-pos (position #\( ctype))) + (if paren-pos (subseq ctype 0 paren-pos) ctype))) - (t sqlite-type-name)))) +(defun cast-sqlite-column-definition-to-pgsql (sqlite-column) + "Return the PostgreSQL column definition from the MySQL one." + (multiple-value-bind (column fn) + (with-slots (table-name name dtype ctype default nullable) + sqlite-column + (cast table-name name dtype ctype default nullable nil)) + ;; the SQLite driver smartly maps data to the proper CL type, but the + ;; pgloader API only wants to see text representations to send down the + ;; COPY protocol. + (values column (or fn (lambda (val) (if val (format nil "~a" val) :null)))))) -(defun transformation-function (pgsql-type-name) - "Return the transformation function to use to switch a SQLite value to a - PostgreSQL value of type PGSQL-TYPE-NAME." - (let* ((type-name - (cond ((and (<= 7 (length pgsql-type-name)) - (string-equal "numeric" pgsql-type-name :end2 7)) - "numeric") - (t pgsql-type-name))) - (transform (assoc type-name *sqlite-to-pgsql* :test #'string=))) - (if transform - (cdr transform) - (compile nil (lambda (c) (when c (format nil "~a" c))))))) - -(defun format-pgsql-default-value (col) - "Return the PostgreSQL representation for the default value of COL." - (declare (type coldef col)) - (let ((default (coldef-default col))) - (cond - ((null default) "NULL") - ((string= "NULL" default) default) - ((string= "CURRENT_TIMESTAMP" default) default) - (t - ;; apply the transformation function to the default value - (let ((fn (transformation-function (cast (coldef-type col))))) - (if fn (funcall fn default) (format nil "'~a'" default))))))) +(defmethod cast-to-bytea-p ((col coldef)) + "Returns a generalized boolean, non-nil when the column is casted to a + PostgreSQL bytea column." + (string= "bytea" (cast-sqlite-column-definition-to-pgsql col))) (defmethod format-pgsql-column ((col coldef) &key identifier-case) "Return a string representing the PostgreSQL column definition." (let* ((column-name (apply-identifier-case (coldef-name col) identifier-case)) (type-definition - (format nil - "~a~:[~; not null~]~@[ default ~a~]" - (cast (coldef-type col)) - (coldef-nullable col) - (format-pgsql-default-value col)))) + (with-slots (table-name name dtype ctype nullable default) + col + (cast table-name name dtype ctype default nullable nil)))) (format nil "~a ~22t ~a" column-name type-definition))) (defun list-tables (&optional (db *sqlite-db*)) @@ -99,7 +109,14 @@ (let ((sql (format nil "PRAGMA table_info(~a)" table-name))) (loop for (seq name type nullable default pk-id) in (sqlite:execute-to-list db sql) - collect (make-coldef seq name type (= 1 nullable) default pk-id)))) + collect (make-coldef table-name + seq + name + (ctype-to-dtype (normalize type)) + (normalize type) + (= 1 nullable) + (unquote default) + pk-id)))) (defun list-all-columns (&optional (db *sqlite-db*)) "Get the list of SQLite column definitions per table." @@ -168,12 +185,15 @@ (unless (slot-boundp source 'fields) (setf (slot-value source 'fields) fields)) - (unless transforms - (setf (slot-value source 'transforms) - (loop for field in fields - collect - (let ((coltype (cast (coldef-type field)))) - (transformation-function coltype)))))))) + (loop for field in fields + for (column fn) = (multiple-value-bind (column fn) + (cast-sqlite-column-definition-to-pgsql field) + (list column fn)) + collect column into columns + collect fn into fns + finally (progn (setf (slot-value source 'columns) columns) + (unless transforms + (setf (slot-value source 'transforms) fns))))))) ;;; Map a function to each row extracted from SQLite ;;; @@ -182,10 +202,7 @@ argument (a list of column values) for each row" (let ((sql (format nil "SELECT * FROM ~a" (source sqlite))) (blobs-p - (coerce (mapcar (lambda (field) - (string-equal "bytea" (cast (coldef-type field)))) - (fields sqlite)) - 'vector))) + (coerce (mapcar #'cast-to-bytea-p (fields sqlite)) 'vector))) (handler-case (loop with statement = (sqlite:prepare-statement (db sqlite) sql) diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp index 51a0e77..ee3be9d 100644 --- a/src/utils/transforms.lisp +++ b/src/utils/transforms.lisp @@ -5,10 +5,6 @@ ;;; up in the pgloader.transforms package, when using the default USING ;;; syntax for transformations. -(defpackage #:pgloader.transforms - (:use #:cl) - (:export #:intern-symbol)) - (in-package :pgloader.transforms) (declaim (inline intern-symbol diff --git a/src/utils/utils.lisp b/src/utils/utils.lisp index 45904b5..46e9a24 100644 --- a/src/utils/utils.lisp +++ b/src/utils/utils.lisp @@ -61,3 +61,15 @@ when (and new-word (not (char= char #\_))) collect #\_ collect (char-downcase char)) 'string)) + +;;; +;;; Unquote SQLite default values, might be useful elsewhere +;;; +(defun unquote (string &optional (quote #\')) + "Given '0', returns 0." + (declare (type (or null simple-string) string)) + (when string + (let ((l (length string))) + (if (char= quote (aref string 0) (aref string (1- l))) + (subseq string 1 (1- l)) + string)))) diff --git a/test/sqlite.load b/test/sqlite.load index c7ffe04..5ff3504 100644 --- a/test/sqlite.load +++ b/test/sqlite.load @@ -4,4 +4,6 @@ load database -- with include drop, create tables, create indexes, reset sequences + cast column character.f1 to text drop typemod + set work_mem to '16MB', maintenance_work_mem to '512 MB'; \ No newline at end of file diff --git a/test/sqlite/sqlite.db b/test/sqlite/sqlite.db index f1f49f0..e33bd3c 100644 Binary files a/test/sqlite/sqlite.db and b/test/sqlite/sqlite.db differ