From 22f4317a30a981aae403578341893fe4106edb4e Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 13 Oct 2014 00:52:55 +0200 Subject: [PATCH] Add support for the CAST rule to SQLite sources. This allows users to benefit from the same flexible machinery when using SQLite as when using MySQL, and also allows to add some more default cast rules too. --- pgloader.1 | 80 ++++++++++ pgloader.1.md | 41 ++++++ src/package.lisp | 53 +++++-- src/parsers/parser.lisp | 7 +- src/sources.lisp | 182 +++++++++++++++++++++++ src/sources/ixf.lisp | 6 +- src/sources/mysql/mysql-cast-rules.lisp | 186 +----------------------- src/sources/sqlite.lisp | 159 +++++++++++--------- src/utils/transforms.lisp | 4 - src/utils/utils.lisp | 12 ++ test/sqlite.load | 2 + test/sqlite/sqlite.db | Bin 65536 -> 69632 bytes 12 files changed, 459 insertions(+), 273 deletions(-) diff --git a/pgloader.1 b/pgloader.1 index a5bf238..81e27b6 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1796,6 +1796,15 @@ This option allows to control which encoding to parse the SQLite text data with\ . .IP "" 0 +. +.IP "\(bu" 4 +\fICAST\fR +. +.IP +The cast clause allows to specify custom casting rules, either to overload the default casting rules or to amend them with special cases\. +. +.IP +Please refer to the MySQL CAST clause for details\. . .IP "\(bu" 4 \fIINCLUDING ONLY TABLE NAMES MATCHING\fR @@ -1833,6 +1842,77 @@ EXCLUDING TABLE NAMES MATCHING ~ . .IP "" 0 +. +.IP "" 0 +. +.SS "DEFAULT SQLite CASTING RULES" +When migrating from SQLite the following Casting Rules are provided: +. +.P +Numbers: +. +.IP "\(bu" 4 +type tinyint to smallint +. +.IP "\(bu" 4 +type float to float using float\-to\-string +. +.IP "\(bu" 4 +type real to real using float\-to\-string +. +.IP "\(bu" 4 +type double to double precision using float\-to\-string +. +.IP "\(bu" 4 +type numeric to numeric using float\-to\-string +. +.IP "" 0 +. +.P +Texts: +. +.IP "\(bu" 4 +type character to text drop typemod +. +.IP "\(bu" 4 +type varchar to text drop typemod +. +.IP "\(bu" 4 +type nvarchar to text drop typemod +. +.IP "\(bu" 4 +type char to text drop typemod +. +.IP "\(bu" 4 +type nchar to text drop typemod +. +.IP "\(bu" 4 +type nvarchar to text drop typemod +. +.IP "\(bu" 4 +type clob to text drop typemod +. +.IP "" 0 +. +.P +Binary: +. +.IP "\(bu" 4 +type blob to bytea +. +.IP "" 0 +. +.P +Date: +. +.IP "\(bu" 4 +type datetime to timestamptz using sqlite\-timestamp\-to\-timestamp +. +.IP "\(bu" 4 +type timestamp to timestamptz using sqlite\-timestamp\-to\-timestamp +. +.IP "\(bu" 4 +type timestamptz to timestamptz using sqlite\-timestamp\-to\-timestamp . .IP "" 0 . diff --git a/pgloader.1.md b/pgloader.1.md index 7fd8dc1..629bf76 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -1438,6 +1438,13 @@ The `sqlite` command accepts the following clauses and options: This option allows to control which encoding to parse the SQLite text data with. Defaults to UTF-8. + - *CAST* + + The cast clause allows to specify custom casting rules, either to + overload the default casting rules or to amend them with special cases. + + Please refer to the MySQL CAST clause for details. + - *INCLUDING ONLY TABLE NAMES MATCHING* Introduce a comma separated list of table names or *regular expression* @@ -1455,6 +1462,40 @@ The `sqlite` command accepts the following clauses and options: EXCLUDING TABLE NAMES MATCHING ~ +### DEFAULT SQLite CASTING RULES + +When migrating from SQLite the following Casting Rules are provided: + +Numbers: + + - type tinyint to smallint + + - type float to float using float-to-string + - type real to real using float-to-string + - type double to double precision using float-to-string + - type numeric to numeric using float-to-string + +Texts: + + - type character to text drop typemod + - type varchar to text drop typemod + - type nvarchar to text drop typemod + - type char to text drop typemod + - type nchar to text drop typemod + - type nvarchar to text drop typemod + - type clob to text drop typemod + +Binary: + + - type blob to bytea + +Date: + + - type datetime to timestamptz using sqlite-timestamp-to-timestamp + - type timestamp to timestamptz using sqlite-timestamp-to-timestamp + - type timestamptz to timestamptz using sqlite-timestamp-to-timestamp + + ## TRANSFORMATION FUNCTIONS Some data types are implemented in a different enough way that a diff --git a/src/package.lisp b/src/package.lisp index 01a2ea8..f3e461e 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -2,6 +2,11 @@ ;;; ;;; To avoid circular files dependencies, define all the packages here ;;; +(defpackage #:pgloader.transforms + (:use #:cl) + (:export #:precision + #:scale + #:intern-symbol)) (defpackage #:pgloader.logs (:use #:cl #:pgloader.params) @@ -55,6 +60,7 @@ #:report-full-summary #:with-stats-collection #:camelCase-to-colname + #:unquote #:make-kernel #:list-encodings-and-aliases #:show-encodings @@ -102,6 +108,7 @@ (defpackage #:pgloader.sources (:use #:cl #:pgloader.params #:pgloader.utils) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.parse-date #:parse-date-string #:parse-date-format) @@ -118,12 +125,19 @@ #:copy-to-queue #:copy-to #:copy-database + + ;; file based utils for CSV, fixed etc #:filter-column-list #:with-open-file-or-stream #:get-pathname #:get-absolute-pathname #:project-fields - #:reformat-then-process)) + #:reformat-then-process + + ;; database cast machinery + #:*default-cast-rules* + #:*cast-rules* + #:cast)) (defpackage #:pgloader.queue (:use #:cl #:pgloader.params) @@ -157,17 +171,6 @@ (:use #:cl) (:export #:read-queries)) -(defpackage #:pgloader.parser - (:use #:cl #:esrap #:metabang.bind - #:pgloader.params #:pgloader.utils #:pgloader.sql) - (:import-from #:alexandria #:read-file-into-string) - (:import-from #:pgloader.pgsql - #:with-pgsql-transaction - #:pgsql-execute) - (:export #:parse-commands - #:run-commands - #:with-database-uri)) - ;; ;; Specific source handling @@ -232,6 +235,7 @@ (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.sources #:pgloader.queue) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql #:with-pgsql-transaction #:pgsql-execute @@ -251,8 +255,7 @@ #:create-indexes-in-kernel #:format-vector-row) (:export #:copy-mysql - #:*cast-rules* - #:*default-cast-rules* + #:*mysql-default-cast-rules* #:with-mysql-connection #:map-rows #:copy-to @@ -267,6 +270,7 @@ (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.sources #:pgloader.queue) + (:import-from #:pgloader.transforms #:precision #:scale) (:import-from #:pgloader.pgsql #:with-pgsql-transaction #:pgsql-execute @@ -280,6 +284,7 @@ #:format-pgsql-create-index #:create-indexes-in-kernel) (:export #:copy-sqlite + #:*sqlite-default-cast-rules* #:map-rows #:copy-to #:copy-from @@ -307,6 +312,26 @@ #:expand-archive #:get-matching-filenames)) + +;;; +;;; The Command Parser +;;; +(defpackage #:pgloader.parser + (:use #:cl #:esrap #:metabang.bind + #:pgloader.params #:pgloader.utils #:pgloader.sql) + (:import-from #:alexandria #:read-file-into-string) + (:import-from #:pgloader.pgsql + #:with-pgsql-transaction + #:pgsql-execute) + (:import-from #:pgloader.sources + #:*default-cast-rules* + #:*cast-rules*) + (:import-from #:pgloader.mysql #:*mysql-default-cast-rules*) + (:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*) + (:export #:parse-commands + #:run-commands + #:with-database-uri)) + ;; diff --git a/src/parsers/parser.lisp b/src/parsers/parser.lisp index 762d885..41a8438 100644 --- a/src/parsers/parser.lisp +++ b/src/parsers/parser.lisp @@ -1004,7 +1004,8 @@ (*state* (or *state* (pgloader.utils:make-pgstate))) (state-idx (pgloader.utils:make-pgstate)) (state-after (pgloader.utils:make-pgstate)) - (pgloader.mysql:*cast-rules* ',casts) + (*default-cast-rules* ',*mysql-default-cast-rules*) + (*cast-rules* ',casts) ,@(mysql-connection-bindings my-db-uri) ,@(pgsql-connection-bindings pg-db-uri gucs) ,@(batch-control-bindings options) @@ -1106,6 +1107,7 @@ load database (defrule load-sqlite-optional-clauses (* (or sqlite-options gucs + casts including excluding)) (:lambda (clauses-list) @@ -1122,6 +1124,7 @@ load database (bind (((sqlite-uri pg-db-uri &key gucs + casts ((:sqlite-options options)) ((:including incl)) ((:excluding excl))) source) @@ -1129,6 +1132,8 @@ load database `(lambda () (let* ((state-before (pgloader.utils:make-pgstate)) (*state* (pgloader.utils:make-pgstate)) + (*default-cast-rules* ',*sqlite-default-cast-rules*) + (*cast-rules* ',casts) ,@(pgsql-connection-bindings pg-db-uri gucs) ,@(batch-control-bindings options) (db diff --git a/src/sources.lisp b/src/sources.lisp index a8aa8b1..d6b19e7 100644 --- a/src/sources.lisp +++ b/src/sources.lisp @@ -345,3 +345,185 @@ e))))) (when projected-vector (funcall process-row-fn projected-vector))))))) + + +;;; +;;; Type casting machinery, to share among all database kind sources. +;;; + +;; +;; The special variables *default-cast-rules* and *cast-rules* must be bound +;; by specific database commands with proper values at run-time. +;; +(defvar *default-cast-rules* nil "Default casting rules.") +(defvar *cast-rules* nil "Specific casting rules added in the command.") + +;;; +;;; Handling typmod in the general case, don't apply to ENUM types +;;; +(defun parse-column-typemod (data-type column-type) + "Given int(7), returns the number 7. + + Beware that some data-type are using a typmod looking definition for + things that are not typmods at all: enum." + (unless (or (string= "enum" data-type) + (string= "set" data-type)) + (let ((start-1 (position #\( column-type)) ; just before start position + (end (position #\) column-type))) ; just before end position + (when start-1 + (destructuring-bind (a &optional b) + (mapcar #'parse-integer + (sq:split-sequence #\, column-type + :start (+ 1 start-1) :end end)) + (cons a b)))))) + +(defun typemod-expr-to-function (expr) + "Transform given EXPR into a callable function object." + `(lambda (typemod) + (destructuring-bind (precision &optional (scale 0)) typemod + (declare (ignorable precision scale)) + ,expr))) + +(defun typemod-expr-matches-p (rule-typemod-expr typemod) + "Check if an expression such as (< 10) matches given typemod." + (funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod)) + +(defun cast-rule-matches (rule source) + "Returns the target datatype if the RULE matches the SOURCE, or nil" + (destructuring-bind (&key ((:source rule-source)) + ((:target rule-target)) + using) + rule + (destructuring-bind + ;; it's either :type or :column, just cope with both thanks to + ;; &allow-other-keys + (&key ((:type rule-source-type) nil t-s-p) + ((:column rule-source-column) nil c-s-p) + ((:typemod typemod-expr) nil tm-s-p) + ((:default rule-source-default) nil d-s-p) + ((:not-null rule-source-not-null) nil n-s-p) + ((:auto-increment rule-source-auto-increment) nil ai-s-p) + &allow-other-keys) + rule-source + (destructuring-bind (&key table-name + column-name + type + ctype + typemod + default + not-null + auto-increment) + source + (declare (ignore ctype)) + (when + (and + (or (and t-s-p (string= type rule-source-type)) + (and c-s-p + (string-equal table-name (car rule-source-column)) + (string-equal column-name (cdr rule-source-column)))) + (or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod)) + (or (null d-s-p) (string= default rule-source-default)) + (or (null n-s-p) (eq not-null rule-source-not-null)) + (or (null ai-s-p) (eq auto-increment rule-source-auto-increment))) + (list :using using :target rule-target)))))) + +(defun format-pgsql-default-value (default &optional using-cast-fn) + "Returns suitably quoted default value for CREATE TABLE command." + (cond + ((null default) "NULL") + ((string= "NULL" default) default) + ((string= "CURRENT_TIMESTAMP" default) default) + (t + ;; apply the transformation function to the default value + (if using-cast-fn (format-pgsql-default-value + (funcall using-cast-fn default)) + (format nil "'~a'" default))))) + +(defun format-pgsql-type (source target using) + "Returns a string suitable for a PostgreSQL type definition" + (destructuring-bind (&key ((:table-name source-table-name)) + ((:column-name source-column-name)) + ((:type source-type)) + ((:ctype source-ctype)) + ((:typemod source-typemod)) + ((:default source-default)) + ((:not-null source-not-null)) + &allow-other-keys) + source + (if target + (destructuring-bind (&key type + drop-default + drop-not-null + (drop-typemod t) + &allow-other-keys) + target + (let ((type-name + (typecase type + (function (funcall type + source-table-name source-column-name + source-type source-ctype source-typemod)) + (t type))) + (pg-typemod + (when source-typemod + (destructuring-bind (a . b) source-typemod + (format nil "(~a~:[~*~;,~a~])" a b b))))) + (format nil + "~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]" + type-name + (and source-typemod (not drop-typemod)) + pg-typemod + (and source-not-null (not drop-not-null)) + (and source-default (not drop-default)) + (format-pgsql-default-value source-default using)))) + + ;; NO MATCH + ;; + ;; prefer char(24) over just char, that is the column type over the + ;; data type. + (format nil "~a~:[~; not null~]~:[~; default ~a~]" + source-ctype + source-not-null + source-default + (format-pgsql-default-value source-default using))))) + +(defun apply-casting-rules (dtype ctype default nullable extra + &key + table-name column-name ; ENUM support + (rules (append *cast-rules* + *default-cast-rules*))) + "Apply the given RULES to the MySQL SOURCE type definition" + (let* ((typemod (parse-column-typemod dtype ctype)) + (not-null (string-equal nullable "NO")) + (auto-increment (string= "auto_increment" extra)) + (source `(:table-name ,table-name + :column-name ,column-name + :type ,dtype + :ctype ,ctype + ,@(when typemod (list :typemod typemod)) + :default ,default + :not-null ,not-null + :auto-increment ,auto-increment))) + (let (first-match-using) + (loop + for rule in rules + for (target using) = (destructuring-bind (&key target using) + (cast-rule-matches rule source) + (list target using)) + do (when (and (null target) using (null first-match-using)) + (setf first-match-using using)) + until target + finally + (return + (list :transform-fn (or first-match-using using) + :pgtype (format-pgsql-type source target using))))))) + +(defun cast (table-name column-name dtype ctype default nullable extra) + "Convert a MySQL datatype to a PostgreSQL datatype. + +DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example +that would be int and int(7) or varchar and varchar(25)." + (destructuring-bind (&key pgtype transform-fn &allow-other-keys) + (apply-casting-rules dtype ctype default nullable extra + :table-name table-name + :column-name column-name) + (values pgtype transform-fn))) diff --git a/src/sources/ixf.lisp b/src/sources/ixf.lisp index c0daa0d..f608156 100644 --- a/src/sources/ixf.lisp +++ b/src/sources/ixf.lisp @@ -20,7 +20,7 @@ (#. ixf:+char+ . "text") (#. ixf:+varchar+ . "text"))) -(defun cast (ixf-type) +(defun cast-ixf-type (ixf-type) "Return the PostgreSQL type name for a given IXF type name." (cdr (assoc ixf-type *ixf-pgsql-type-mapping*))) @@ -31,7 +31,7 @@ (type-definition (format nil "~a~:[ not null~;~]~:[~*~; default ~a~]" - (cast (ixf:ixf-column-type col)) + (cast-ixf-type (ixf:ixf-column-type col)) (ixf:ixf-column-nullable col) (ixf:ixf-column-has-default col) (ixf:ixf-column-default col)))) @@ -71,7 +71,7 @@ (setf (slot-value source 'transforms) (loop :for field :in fields :collect - (let ((coltype (cast (ixf:ixf-column-type field)))) + (let ((coltype (cast-ixf-type (ixf:ixf-column-type field)))) ;; ;; The IXF driver we use maps the data type and gets ;; back proper CL typed objects, where we only want to diff --git a/src/sources/mysql/mysql-cast-rules.lisp b/src/sources/mysql/mysql-cast-rules.lisp index bf90820..e60ed99 100644 --- a/src/sources/mysql/mysql-cast-rules.lisp +++ b/src/sources/mysql/mysql-cast-rules.lisp @@ -5,7 +5,7 @@ (in-package :pgloader.mysql) ;;; -;;; Some functions to deal with ENUM types +;;; Some functions to deal with ENUM and SET types ;;; (defun explode-mysql-enum (ctype) "Convert MySQL ENUM expression into a list of labels." @@ -44,7 +44,7 @@ ;;; ;;; The default MySQL Type Casting Rules ;;; -(defparameter *default-cast-rules* +(defparameter *mysql-default-cast-rules* `((:source (:type "int" :auto-increment t :typemod (< precision 10)) :target (:type "serial")) @@ -155,186 +155,12 @@ :using pgloader.transforms::convert-mysql-point)) "Data Type Casting rules to migrate from MySQL to PostgreSQL") -(defvar *cast-rules* nil "Specific casting rules added in the command.") - + ;;; -;;; Handling typmod in the general case, don't apply to ENUM types +;;; MySQL specific testing. +;;; +;;; TODO: move that to general testing. ;;; -(defun parse-column-typemod (data-type column-type) - "Given int(7), returns the number 7. - - Beware that some data-type are using a typmod looking definition for - things that are not typmods at all: enum." - (unless (or (string= "enum" data-type) - (string= "set" data-type)) - (let ((start-1 (position #\( column-type)) ; just before start position - (end (position #\) column-type))) ; just before end position - (when start-1 - (destructuring-bind (a &optional b) - (mapcar #'parse-integer - (sq:split-sequence #\, column-type - :start (+ 1 start-1) :end end)) - (cons a b)))))) - -(defun typemod-expr-to-function (expr) - "Transform given EXPR into a callable function object." - `(lambda (typemod) - (destructuring-bind (precision &optional (scale 0)) typemod - (declare (ignorable precision scale)) - ;; - ;; The command parser interns symbols into the pgloader.transforms - ;; package, whereas the default casting rules are defined in the - ;; pgloader.mysql package. Have a compatibility layer here for the - ;; generated code. - ;; - (let ((pgloader.transforms::precision precision)) - (declare (ignorable pgloader.transforms::precision)) - ,expr)))) - -(defun typemod-expr-matches-p (rule-typemod-expr typemod) - "Check if an expression such as (< 10) matches given typemod." - (funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod)) - -(defun cast-rule-matches (rule source) - "Returns the target datatype if the RULE matches the SOURCE, or nil" - (destructuring-bind (&key ((:source rule-source)) - ((:target rule-target)) - using) - rule - (destructuring-bind - ;; it's either :type or :column, just cope with both thanks to - ;; &allow-other-keys - (&key ((:type rule-source-type) nil t-s-p) - ((:column rule-source-column) nil c-s-p) - ((:typemod typemod-expr) nil tm-s-p) - ((:default rule-source-default) nil d-s-p) - ((:not-null rule-source-not-null) nil n-s-p) - ((:auto-increment rule-source-auto-increment) nil ai-s-p) - &allow-other-keys) - rule-source - (destructuring-bind (&key table-name - column-name - type - ctype - typemod - default - not-null - auto-increment) - source - (declare (ignore ctype)) - (when - (and - (or (and t-s-p (string= type rule-source-type)) - (and c-s-p - (string-equal table-name (car rule-source-column)) - (string-equal column-name (cdr rule-source-column)))) - (or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod)) - (or (null d-s-p) (string= default rule-source-default)) - (or (null n-s-p) (eq not-null rule-source-not-null)) - (or (null ai-s-p) (eq auto-increment rule-source-auto-increment))) - (list :using using :target rule-target)))))) - -(defun format-pgsql-default-value (default &optional using-cast-fn) - "Returns suitably quoted default value for CREATE TABLE command." - (cond - ((null default) "NULL") - ((string= "NULL" default) default) - ((string= "CURRENT_TIMESTAMP" default) default) - (t - ;; apply the transformation function to the default value - (if using-cast-fn (format-pgsql-default-value - (funcall using-cast-fn default)) - (format nil "'~a'" default))))) - -(defun format-pgsql-type (source target using) - "Returns a string suitable for a PostgreSQL type definition" - (destructuring-bind (&key ((:table-name source-table-name)) - ((:column-name source-column-name)) - ((:type source-type)) - ((:ctype source-ctype)) - ((:typemod source-typemod)) - ((:default source-default)) - ((:not-null source-not-null)) - &allow-other-keys) - source - (if target - (destructuring-bind (&key type - drop-default - drop-not-null - (drop-typemod t) - &allow-other-keys) - target - (let ((type-name - (typecase type - (function (funcall type - source-table-name source-column-name - source-type source-ctype source-typemod)) - (t type))) - (pg-typemod - (when source-typemod - (destructuring-bind (a . b) source-typemod - (format nil "(~a~:[~*~;,~a~])" a b b))))) - (format nil - "~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]" - type-name - (and source-typemod (not drop-typemod)) - pg-typemod - (and source-not-null (not drop-not-null)) - (and source-default (not drop-default)) - (format-pgsql-default-value source-default using)))) - - ;; NO MATCH - ;; - ;; prefer char(24) over just char, that is the column type over the - ;; data type. - (format nil "~a~:[~; not null~]~:[~; default ~a~]" - source-ctype - source-not-null - source-default - (format-pgsql-default-value source-default using))))) - -(defun apply-casting-rules (dtype ctype default nullable extra - &key - table-name column-name ; ENUM support - (rules (append *cast-rules* - *default-cast-rules*))) - "Apply the given RULES to the MySQL SOURCE type definition" - (let* ((typemod (parse-column-typemod dtype ctype)) - (not-null (string-equal nullable "NO")) - (auto-increment (string= "auto_increment" extra)) - (source `(:table-name ,table-name - :column-name ,column-name - :type ,dtype - :ctype ,ctype - ,@(when typemod (list :typemod typemod)) - :default ,default - :not-null ,not-null - :auto-increment ,auto-increment))) - (let (first-match-using) - (loop - for rule in rules - for (target using) = (destructuring-bind (&key target using) - (cast-rule-matches rule source) - (list target using)) - do (when (and (null target) using (null first-match-using)) - (setf first-match-using using)) - until target - finally - (return - (list :transform-fn (or first-match-using using) - :pgtype (format-pgsql-type source target using))))))) - -(defun cast (table-name column-name dtype ctype default nullable extra) - "Convert a MySQL datatype to a PostgreSQL datatype. - -DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example -that would be int and int(7) or varchar and varchar(25)." - (destructuring-bind (&key pgtype transform-fn &allow-other-keys) - (apply-casting-rules dtype ctype default nullable extra - :table-name table-name - :column-name column-name) - (values pgtype transform-fn))) - (defun test-casts () "Just test some cases for the casts" (let ((*cast-rules* diff --git a/src/sources/sqlite.lisp b/src/sources/sqlite.lisp index e4f678d..e1f2ac6 100644 --- a/src/sources/sqlite.lisp +++ b/src/sources/sqlite.lisp @@ -7,83 +7,93 @@ (defvar *sqlite-db* nil "The SQLite database connection handler.") -;; -;; The SQLite drive we use maps the CFFI data type mapping functions and -;; gets back proper CL typed objects, where we only want to deal with text. -;; -(defvar *sqlite-to-pgsql* - '(("float" . pgloader.transforms::float-to-string) - ("real" . pgloader.transforms::float-to-string) - ("double precision" . pgloader.transforms::float-to-string) - ("numeric" . pgloader.transforms::float-to-string) - ("text" . nil) - ("bytea" . pgloader.transforms::byte-vector-to-bytea) - ("timestamp" . pgloader.transforms::sqlite-timestamp-to-timestamp) - ("timestamptz" . pgloader.transforms::sqlite-timestamp-to-timestamp)) - "Transformation functions to use when migrating from SQLite to PostgreSQL.") +(defparameter *sqlite-default-cast-rules* + `((:source (:type "character") :target (:type "text" :drop-typemod t)) + (:source (:type "varchar") :target (:type "text" :drop-typemod t)) + (:source (:type "nvarchar") :target (:type "text" :drop-typemod t)) + (:source (:type "char") :target (:type "text" :drop-typemod t)) + (:source (:type "nchar") :target (:type "text" :drop-typemod t)) + (:source (:type "clob") :target (:type "text" :drop-typemod t)) + + (:source (:type "tinyint") :target (:type "smallint")) + + (:source (:type "float") :target (:type "float") + :using pgloader.transforms::float-to-string) + + (:source (:type "real") :target (:type "real") + :using pgloader.transforms::float-to-string) + + (:source (:type "double") :target (:type "double precision") + :using pgloader.transforms::float-to-string) + + (:source (:type "numeric") :target (:type "numeric") + :using pgloader.transforms::float-to-string) + + (:source (:type "blob") :target (:type "bytea") + :using pgloader.transforms::byte-vector-to-bytea) + + (:source (:type "datetime") :target (:type "timestamptz") + :using pgloader.transforms::sqlite-timestamp-to-timestamp) + + (:source (:type "timestamp") :target (:type "timestamp") + :using pgloader.transforms::sqlite-timestamp-to-timestamp) + + (:source (:type "timestamptz") :target (:type "timestamptz") + :using pgloader.transforms::sqlite-timestamp-to-timestamp)) + "Data Type Casting to migrate from SQLite to PostgreSQL") ;;; ;;; SQLite tools connecting to a database ;;; (defstruct (coldef - (:constructor make-coldef (seq name type nullable default pk-id))) - seq name type nullable default pk-id) + (:constructor make-coldef (table-name + seq name dtype ctype + nullable default pk-id))) + table-name seq name dtype ctype nullable default pk-id) -(defun cast (sqlite-type-name) - "Return the PostgreSQL type name for a given SQLite type name." - (let* ((tokens (remove-if (lambda (token) - (member token '("unsigned" "short") +(defun normalize (sqlite-type-name) + "SQLite only has a notion of what MySQL calls column_type, or ctype in the + CAST machinery. Transform it to the data_type, or dtype." + (let* ((sqlite-type-name (string-downcase sqlite-type-name)) + (tokens (remove-if (lambda (token) + (member token '("unsigned" "short" + "varying" "native") :test #'string-equal)) - (sq:split-sequence #\Space sqlite-type-name))) - (sqlite-type-name (first tokens))) + (sq:split-sequence #\Space sqlite-type-name)))) (assert (= 1 (length tokens))) - (cond ((and (<= 8 (length sqlite-type-name)) - (string-equal sqlite-type-name "nvarchar" :end1 8)) "text") + (first tokens))) - ((string-equal sqlite-type-name "tinyint") "smallint") - ((string-equal sqlite-type-name "datetime") "timestamptz") - ((string-equal sqlite-type-name "double") "double precision") - ((string-equal sqlite-type-name "blob") "bytea") - ((string-equal sqlite-type-name "clob") "text") +(defun ctype-to-dtype (sqlite-type-name) + "In SQLite we only get the ctype, e.g. int(7), but here we want the base + data type behind it, e.g. int." + (let* ((ctype (normalize sqlite-type-name)) + (paren-pos (position #\( ctype))) + (if paren-pos (subseq ctype 0 paren-pos) ctype))) - (t sqlite-type-name)))) +(defun cast-sqlite-column-definition-to-pgsql (sqlite-column) + "Return the PostgreSQL column definition from the MySQL one." + (multiple-value-bind (column fn) + (with-slots (table-name name dtype ctype default nullable) + sqlite-column + (cast table-name name dtype ctype default nullable nil)) + ;; the SQLite driver smartly maps data to the proper CL type, but the + ;; pgloader API only wants to see text representations to send down the + ;; COPY protocol. + (values column (or fn (lambda (val) (if val (format nil "~a" val) :null)))))) -(defun transformation-function (pgsql-type-name) - "Return the transformation function to use to switch a SQLite value to a - PostgreSQL value of type PGSQL-TYPE-NAME." - (let* ((type-name - (cond ((and (<= 7 (length pgsql-type-name)) - (string-equal "numeric" pgsql-type-name :end2 7)) - "numeric") - (t pgsql-type-name))) - (transform (assoc type-name *sqlite-to-pgsql* :test #'string=))) - (if transform - (cdr transform) - (compile nil (lambda (c) (when c (format nil "~a" c))))))) - -(defun format-pgsql-default-value (col) - "Return the PostgreSQL representation for the default value of COL." - (declare (type coldef col)) - (let ((default (coldef-default col))) - (cond - ((null default) "NULL") - ((string= "NULL" default) default) - ((string= "CURRENT_TIMESTAMP" default) default) - (t - ;; apply the transformation function to the default value - (let ((fn (transformation-function (cast (coldef-type col))))) - (if fn (funcall fn default) (format nil "'~a'" default))))))) +(defmethod cast-to-bytea-p ((col coldef)) + "Returns a generalized boolean, non-nil when the column is casted to a + PostgreSQL bytea column." + (string= "bytea" (cast-sqlite-column-definition-to-pgsql col))) (defmethod format-pgsql-column ((col coldef) &key identifier-case) "Return a string representing the PostgreSQL column definition." (let* ((column-name (apply-identifier-case (coldef-name col) identifier-case)) (type-definition - (format nil - "~a~:[~; not null~]~@[ default ~a~]" - (cast (coldef-type col)) - (coldef-nullable col) - (format-pgsql-default-value col)))) + (with-slots (table-name name dtype ctype nullable default) + col + (cast table-name name dtype ctype default nullable nil)))) (format nil "~a ~22t ~a" column-name type-definition))) (defun list-tables (&optional (db *sqlite-db*)) @@ -99,7 +109,14 @@ (let ((sql (format nil "PRAGMA table_info(~a)" table-name))) (loop for (seq name type nullable default pk-id) in (sqlite:execute-to-list db sql) - collect (make-coldef seq name type (= 1 nullable) default pk-id)))) + collect (make-coldef table-name + seq + name + (ctype-to-dtype (normalize type)) + (normalize type) + (= 1 nullable) + (unquote default) + pk-id)))) (defun list-all-columns (&optional (db *sqlite-db*)) "Get the list of SQLite column definitions per table." @@ -168,12 +185,15 @@ (unless (slot-boundp source 'fields) (setf (slot-value source 'fields) fields)) - (unless transforms - (setf (slot-value source 'transforms) - (loop for field in fields - collect - (let ((coltype (cast (coldef-type field)))) - (transformation-function coltype)))))))) + (loop for field in fields + for (column fn) = (multiple-value-bind (column fn) + (cast-sqlite-column-definition-to-pgsql field) + (list column fn)) + collect column into columns + collect fn into fns + finally (progn (setf (slot-value source 'columns) columns) + (unless transforms + (setf (slot-value source 'transforms) fns))))))) ;;; Map a function to each row extracted from SQLite ;;; @@ -182,10 +202,7 @@ argument (a list of column values) for each row" (let ((sql (format nil "SELECT * FROM ~a" (source sqlite))) (blobs-p - (coerce (mapcar (lambda (field) - (string-equal "bytea" (cast (coldef-type field)))) - (fields sqlite)) - 'vector))) + (coerce (mapcar #'cast-to-bytea-p (fields sqlite)) 'vector))) (handler-case (loop with statement = (sqlite:prepare-statement (db sqlite) sql) diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp index 51a0e77..ee3be9d 100644 --- a/src/utils/transforms.lisp +++ b/src/utils/transforms.lisp @@ -5,10 +5,6 @@ ;;; up in the pgloader.transforms package, when using the default USING ;;; syntax for transformations. -(defpackage #:pgloader.transforms - (:use #:cl) - (:export #:intern-symbol)) - (in-package :pgloader.transforms) (declaim (inline intern-symbol diff --git a/src/utils/utils.lisp b/src/utils/utils.lisp index 45904b5..46e9a24 100644 --- a/src/utils/utils.lisp +++ b/src/utils/utils.lisp @@ -61,3 +61,15 @@ when (and new-word (not (char= char #\_))) collect #\_ collect (char-downcase char)) 'string)) + +;;; +;;; Unquote SQLite default values, might be useful elsewhere +;;; +(defun unquote (string &optional (quote #\')) + "Given '0', returns 0." + (declare (type (or null simple-string) string)) + (when string + (let ((l (length string))) + (if (char= quote (aref string 0) (aref string (1- l))) + (subseq string 1 (1- l)) + string)))) diff --git a/test/sqlite.load b/test/sqlite.load index c7ffe04..5ff3504 100644 --- a/test/sqlite.load +++ b/test/sqlite.load @@ -4,4 +4,6 @@ load database -- with include drop, create tables, create indexes, reset sequences + cast column character.f1 to text drop typemod + set work_mem to '16MB', maintenance_work_mem to '512 MB'; \ No newline at end of file diff --git a/test/sqlite/sqlite.db b/test/sqlite/sqlite.db index f1f49f0227a6ba6162ec28bda11889818c9255a0..e33bd3cd3bff0a38e7457b5a728f3dd35f7388dc 100644 GIT binary patch delta 204 zcmZo@U};#uGC^8Uih+Sa5Qt%bccPB5ycC1(BUN6Y5GT(x2L9drd3>+=Ci2+=Ci2@r1^@s6