Add support for the CAST rule to SQLite sources.

This allows users to benefit from the same flexible machinery when using
SQLite as when using MySQL, and also allows to add some more default
cast rules too.
This commit is contained in:
Dimitri Fontaine 2014-10-13 00:52:55 +02:00
parent 2db4fd89aa
commit 22f4317a30
12 changed files with 459 additions and 273 deletions

View File

@ -1796,6 +1796,15 @@ This option allows to control which encoding to parse the SQLite text data with\
.
.IP "" 0
.
.IP "\(bu" 4
\fICAST\fR
.
.IP
The cast clause allows to specify custom casting rules, either to overload the default casting rules or to amend them with special cases\.
.
.IP
Please refer to the MySQL CAST clause for details\.
.
.IP "\(bu" 4
\fIINCLUDING ONLY TABLE NAMES MATCHING\fR
@ -1833,6 +1842,77 @@ EXCLUDING TABLE NAMES MATCHING ~<ory>
.
.IP "" 0
.
.IP "" 0
.
.SS "DEFAULT SQLite CASTING RULES"
When migrating from SQLite the following Casting Rules are provided:
.
.P
Numbers:
.
.IP "\(bu" 4
type tinyint to smallint
.
.IP "\(bu" 4
type float to float using float\-to\-string
.
.IP "\(bu" 4
type real to real using float\-to\-string
.
.IP "\(bu" 4
type double to double precision using float\-to\-string
.
.IP "\(bu" 4
type numeric to numeric using float\-to\-string
.
.IP "" 0
.
.P
Texts:
.
.IP "\(bu" 4
type character to text drop typemod
.
.IP "\(bu" 4
type varchar to text drop typemod
.
.IP "\(bu" 4
type nvarchar to text drop typemod
.
.IP "\(bu" 4
type char to text drop typemod
.
.IP "\(bu" 4
type nchar to text drop typemod
.
.IP "\(bu" 4
type nvarchar to text drop typemod
.
.IP "\(bu" 4
type clob to text drop typemod
.
.IP "" 0
.
.P
Binary:
.
.IP "\(bu" 4
type blob to bytea
.
.IP "" 0
.
.P
Date:
.
.IP "\(bu" 4
type datetime to timestamptz using sqlite\-timestamp\-to\-timestamp
.
.IP "\(bu" 4
type timestamp to timestamptz using sqlite\-timestamp\-to\-timestamp
.
.IP "\(bu" 4
type timestamptz to timestamptz using sqlite\-timestamp\-to\-timestamp
.
.IP "" 0
.

View File

@ -1438,6 +1438,13 @@ The `sqlite` command accepts the following clauses and options:
This option allows to control which encoding to parse the SQLite
text data with. Defaults to UTF-8.
- *CAST*
The cast clause allows to specify custom casting rules, either to
overload the default casting rules or to amend them with special cases.
Please refer to the MySQL CAST clause for details.
- *INCLUDING ONLY TABLE NAMES MATCHING*
Introduce a comma separated list of table names or *regular expression*
@ -1455,6 +1462,40 @@ The `sqlite` command accepts the following clauses and options:
EXCLUDING TABLE NAMES MATCHING ~<ory>
### DEFAULT SQLite CASTING RULES
When migrating from SQLite the following Casting Rules are provided:
Numbers:
- type tinyint to smallint
- type float to float using float-to-string
- type real to real using float-to-string
- type double to double precision using float-to-string
- type numeric to numeric using float-to-string
Texts:
- type character to text drop typemod
- type varchar to text drop typemod
- type nvarchar to text drop typemod
- type char to text drop typemod
- type nchar to text drop typemod
- type nvarchar to text drop typemod
- type clob to text drop typemod
Binary:
- type blob to bytea
Date:
- type datetime to timestamptz using sqlite-timestamp-to-timestamp
- type timestamp to timestamptz using sqlite-timestamp-to-timestamp
- type timestamptz to timestamptz using sqlite-timestamp-to-timestamp
## TRANSFORMATION FUNCTIONS
Some data types are implemented in a different enough way that a

View File

@ -2,6 +2,11 @@
;;;
;;; To avoid circular files dependencies, define all the packages here
;;;
(defpackage #:pgloader.transforms
(:use #:cl)
(:export #:precision
#:scale
#:intern-symbol))
(defpackage #:pgloader.logs
(:use #:cl #:pgloader.params)
@ -55,6 +60,7 @@
#:report-full-summary
#:with-stats-collection
#:camelCase-to-colname
#:unquote
#:make-kernel
#:list-encodings-and-aliases
#:show-encodings
@ -102,6 +108,7 @@
(defpackage #:pgloader.sources
(:use #:cl #:pgloader.params #:pgloader.utils)
(:import-from #:pgloader.transforms #:precision #:scale)
(:import-from #:pgloader.parse-date
#:parse-date-string
#:parse-date-format)
@ -118,12 +125,19 @@
#:copy-to-queue
#:copy-to
#:copy-database
;; file based utils for CSV, fixed etc
#:filter-column-list
#:with-open-file-or-stream
#:get-pathname
#:get-absolute-pathname
#:project-fields
#:reformat-then-process))
#:reformat-then-process
;; database cast machinery
#:*default-cast-rules*
#:*cast-rules*
#:cast))
(defpackage #:pgloader.queue
(:use #:cl #:pgloader.params)
@ -157,17 +171,6 @@
(:use #:cl)
(:export #:read-queries))
(defpackage #:pgloader.parser
(:use #:cl #:esrap #:metabang.bind
#:pgloader.params #:pgloader.utils #:pgloader.sql)
(:import-from #:alexandria #:read-file-into-string)
(:import-from #:pgloader.pgsql
#:with-pgsql-transaction
#:pgsql-execute)
(:export #:parse-commands
#:run-commands
#:with-database-uri))
;;
;; Specific source handling
@ -232,6 +235,7 @@
(:use #:cl
#:pgloader.params #:pgloader.utils
#:pgloader.sources #:pgloader.queue)
(:import-from #:pgloader.transforms #:precision #:scale)
(:import-from #:pgloader.pgsql
#:with-pgsql-transaction
#:pgsql-execute
@ -251,8 +255,7 @@
#:create-indexes-in-kernel
#:format-vector-row)
(:export #:copy-mysql
#:*cast-rules*
#:*default-cast-rules*
#:*mysql-default-cast-rules*
#:with-mysql-connection
#:map-rows
#:copy-to
@ -267,6 +270,7 @@
(:use #:cl
#:pgloader.params #:pgloader.utils
#:pgloader.sources #:pgloader.queue)
(:import-from #:pgloader.transforms #:precision #:scale)
(:import-from #:pgloader.pgsql
#:with-pgsql-transaction
#:pgsql-execute
@ -280,6 +284,7 @@
#:format-pgsql-create-index
#:create-indexes-in-kernel)
(:export #:copy-sqlite
#:*sqlite-default-cast-rules*
#:map-rows
#:copy-to
#:copy-from
@ -307,6 +312,26 @@
#:expand-archive
#:get-matching-filenames))
;;;
;;; The Command Parser
;;;
(defpackage #:pgloader.parser
(:use #:cl #:esrap #:metabang.bind
#:pgloader.params #:pgloader.utils #:pgloader.sql)
(:import-from #:alexandria #:read-file-into-string)
(:import-from #:pgloader.pgsql
#:with-pgsql-transaction
#:pgsql-execute)
(:import-from #:pgloader.sources
#:*default-cast-rules*
#:*cast-rules*)
(:import-from #:pgloader.mysql #:*mysql-default-cast-rules*)
(:import-from #:pgloader.sqlite #:*sqlite-default-cast-rules*)
(:export #:parse-commands
#:run-commands
#:with-database-uri))
;;

View File

@ -1004,7 +1004,8 @@
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-idx (pgloader.utils:make-pgstate))
(state-after (pgloader.utils:make-pgstate))
(pgloader.mysql:*cast-rules* ',casts)
(*default-cast-rules* ',*mysql-default-cast-rules*)
(*cast-rules* ',casts)
,@(mysql-connection-bindings my-db-uri)
,@(pgsql-connection-bindings pg-db-uri gucs)
,@(batch-control-bindings options)
@ -1106,6 +1107,7 @@ load database
(defrule load-sqlite-optional-clauses (* (or sqlite-options
gucs
casts
including
excluding))
(:lambda (clauses-list)
@ -1122,6 +1124,7 @@ load database
(bind (((sqlite-uri pg-db-uri
&key
gucs
casts
((:sqlite-options options))
((:including incl))
((:excluding excl))) source)
@ -1129,6 +1132,8 @@ load database
`(lambda ()
(let* ((state-before (pgloader.utils:make-pgstate))
(*state* (pgloader.utils:make-pgstate))
(*default-cast-rules* ',*sqlite-default-cast-rules*)
(*cast-rules* ',casts)
,@(pgsql-connection-bindings pg-db-uri gucs)
,@(batch-control-bindings options)
(db

View File

@ -345,3 +345,185 @@
e)))))
(when projected-vector
(funcall process-row-fn projected-vector)))))))
;;;
;;; Type casting machinery, to share among all database kind sources.
;;;
;;
;; The special variables *default-cast-rules* and *cast-rules* must be bound
;; by specific database commands with proper values at run-time.
;;
(defvar *default-cast-rules* nil "Default casting rules.")
(defvar *cast-rules* nil "Specific casting rules added in the command.")
;;;
;;; Handling typmod in the general case, don't apply to ENUM types
;;;
(defun parse-column-typemod (data-type column-type)
"Given int(7), returns the number 7.
Beware that some data-type are using a typmod looking definition for
things that are not typmods at all: enum."
(unless (or (string= "enum" data-type)
(string= "set" data-type))
(let ((start-1 (position #\( column-type)) ; just before start position
(end (position #\) column-type))) ; just before end position
(when start-1
(destructuring-bind (a &optional b)
(mapcar #'parse-integer
(sq:split-sequence #\, column-type
:start (+ 1 start-1) :end end))
(cons a b))))))
(defun typemod-expr-to-function (expr)
"Transform given EXPR into a callable function object."
`(lambda (typemod)
(destructuring-bind (precision &optional (scale 0)) typemod
(declare (ignorable precision scale))
,expr)))
(defun typemod-expr-matches-p (rule-typemod-expr typemod)
"Check if an expression such as (< 10) matches given typemod."
(funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod))
(defun cast-rule-matches (rule source)
"Returns the target datatype if the RULE matches the SOURCE, or nil"
(destructuring-bind (&key ((:source rule-source))
((:target rule-target))
using)
rule
(destructuring-bind
;; it's either :type or :column, just cope with both thanks to
;; &allow-other-keys
(&key ((:type rule-source-type) nil t-s-p)
((:column rule-source-column) nil c-s-p)
((:typemod typemod-expr) nil tm-s-p)
((:default rule-source-default) nil d-s-p)
((:not-null rule-source-not-null) nil n-s-p)
((:auto-increment rule-source-auto-increment) nil ai-s-p)
&allow-other-keys)
rule-source
(destructuring-bind (&key table-name
column-name
type
ctype
typemod
default
not-null
auto-increment)
source
(declare (ignore ctype))
(when
(and
(or (and t-s-p (string= type rule-source-type))
(and c-s-p
(string-equal table-name (car rule-source-column))
(string-equal column-name (cdr rule-source-column))))
(or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod))
(or (null d-s-p) (string= default rule-source-default))
(or (null n-s-p) (eq not-null rule-source-not-null))
(or (null ai-s-p) (eq auto-increment rule-source-auto-increment)))
(list :using using :target rule-target))))))
(defun format-pgsql-default-value (default &optional using-cast-fn)
"Returns suitably quoted default value for CREATE TABLE command."
(cond
((null default) "NULL")
((string= "NULL" default) default)
((string= "CURRENT_TIMESTAMP" default) default)
(t
;; apply the transformation function to the default value
(if using-cast-fn (format-pgsql-default-value
(funcall using-cast-fn default))
(format nil "'~a'" default)))))
(defun format-pgsql-type (source target using)
"Returns a string suitable for a PostgreSQL type definition"
(destructuring-bind (&key ((:table-name source-table-name))
((:column-name source-column-name))
((:type source-type))
((:ctype source-ctype))
((:typemod source-typemod))
((:default source-default))
((:not-null source-not-null))
&allow-other-keys)
source
(if target
(destructuring-bind (&key type
drop-default
drop-not-null
(drop-typemod t)
&allow-other-keys)
target
(let ((type-name
(typecase type
(function (funcall type
source-table-name source-column-name
source-type source-ctype source-typemod))
(t type)))
(pg-typemod
(when source-typemod
(destructuring-bind (a . b) source-typemod
(format nil "(~a~:[~*~;,~a~])" a b b)))))
(format nil
"~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]"
type-name
(and source-typemod (not drop-typemod))
pg-typemod
(and source-not-null (not drop-not-null))
(and source-default (not drop-default))
(format-pgsql-default-value source-default using))))
;; NO MATCH
;;
;; prefer char(24) over just char, that is the column type over the
;; data type.
(format nil "~a~:[~; not null~]~:[~; default ~a~]"
source-ctype
source-not-null
source-default
(format-pgsql-default-value source-default using)))))
(defun apply-casting-rules (dtype ctype default nullable extra
&key
table-name column-name ; ENUM support
(rules (append *cast-rules*
*default-cast-rules*)))
"Apply the given RULES to the MySQL SOURCE type definition"
(let* ((typemod (parse-column-typemod dtype ctype))
(not-null (string-equal nullable "NO"))
(auto-increment (string= "auto_increment" extra))
(source `(:table-name ,table-name
:column-name ,column-name
:type ,dtype
:ctype ,ctype
,@(when typemod (list :typemod typemod))
:default ,default
:not-null ,not-null
:auto-increment ,auto-increment)))
(let (first-match-using)
(loop
for rule in rules
for (target using) = (destructuring-bind (&key target using)
(cast-rule-matches rule source)
(list target using))
do (when (and (null target) using (null first-match-using))
(setf first-match-using using))
until target
finally
(return
(list :transform-fn (or first-match-using using)
:pgtype (format-pgsql-type source target using)))))))
(defun cast (table-name column-name dtype ctype default nullable extra)
"Convert a MySQL datatype to a PostgreSQL datatype.
DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example
that would be int and int(7) or varchar and varchar(25)."
(destructuring-bind (&key pgtype transform-fn &allow-other-keys)
(apply-casting-rules dtype ctype default nullable extra
:table-name table-name
:column-name column-name)
(values pgtype transform-fn)))

View File

@ -20,7 +20,7 @@
(#. ixf:+char+ . "text")
(#. ixf:+varchar+ . "text")))
(defun cast (ixf-type)
(defun cast-ixf-type (ixf-type)
"Return the PostgreSQL type name for a given IXF type name."
(cdr (assoc ixf-type *ixf-pgsql-type-mapping*)))
@ -31,7 +31,7 @@
(type-definition
(format nil
"~a~:[ not null~;~]~:[~*~; default ~a~]"
(cast (ixf:ixf-column-type col))
(cast-ixf-type (ixf:ixf-column-type col))
(ixf:ixf-column-nullable col)
(ixf:ixf-column-has-default col)
(ixf:ixf-column-default col))))
@ -71,7 +71,7 @@
(setf (slot-value source 'transforms)
(loop :for field :in fields
:collect
(let ((coltype (cast (ixf:ixf-column-type field))))
(let ((coltype (cast-ixf-type (ixf:ixf-column-type field))))
;;
;; The IXF driver we use maps the data type and gets
;; back proper CL typed objects, where we only want to

View File

@ -5,7 +5,7 @@
(in-package :pgloader.mysql)
;;;
;;; Some functions to deal with ENUM types
;;; Some functions to deal with ENUM and SET types
;;;
(defun explode-mysql-enum (ctype)
"Convert MySQL ENUM expression into a list of labels."
@ -44,7 +44,7 @@
;;;
;;; The default MySQL Type Casting Rules
;;;
(defparameter *default-cast-rules*
(defparameter *mysql-default-cast-rules*
`((:source (:type "int" :auto-increment t :typemod (< precision 10))
:target (:type "serial"))
@ -155,186 +155,12 @@
:using pgloader.transforms::convert-mysql-point))
"Data Type Casting rules to migrate from MySQL to PostgreSQL")
(defvar *cast-rules* nil "Specific casting rules added in the command.")
;;;
;;; Handling typmod in the general case, don't apply to ENUM types
;;; MySQL specific testing.
;;;
;;; TODO: move that to general testing.
;;;
(defun parse-column-typemod (data-type column-type)
"Given int(7), returns the number 7.
Beware that some data-type are using a typmod looking definition for
things that are not typmods at all: enum."
(unless (or (string= "enum" data-type)
(string= "set" data-type))
(let ((start-1 (position #\( column-type)) ; just before start position
(end (position #\) column-type))) ; just before end position
(when start-1
(destructuring-bind (a &optional b)
(mapcar #'parse-integer
(sq:split-sequence #\, column-type
:start (+ 1 start-1) :end end))
(cons a b))))))
(defun typemod-expr-to-function (expr)
"Transform given EXPR into a callable function object."
`(lambda (typemod)
(destructuring-bind (precision &optional (scale 0)) typemod
(declare (ignorable precision scale))
;;
;; The command parser interns symbols into the pgloader.transforms
;; package, whereas the default casting rules are defined in the
;; pgloader.mysql package. Have a compatibility layer here for the
;; generated code.
;;
(let ((pgloader.transforms::precision precision))
(declare (ignorable pgloader.transforms::precision))
,expr))))
(defun typemod-expr-matches-p (rule-typemod-expr typemod)
"Check if an expression such as (< 10) matches given typemod."
(funcall (compile nil (typemod-expr-to-function rule-typemod-expr)) typemod))
(defun cast-rule-matches (rule source)
"Returns the target datatype if the RULE matches the SOURCE, or nil"
(destructuring-bind (&key ((:source rule-source))
((:target rule-target))
using)
rule
(destructuring-bind
;; it's either :type or :column, just cope with both thanks to
;; &allow-other-keys
(&key ((:type rule-source-type) nil t-s-p)
((:column rule-source-column) nil c-s-p)
((:typemod typemod-expr) nil tm-s-p)
((:default rule-source-default) nil d-s-p)
((:not-null rule-source-not-null) nil n-s-p)
((:auto-increment rule-source-auto-increment) nil ai-s-p)
&allow-other-keys)
rule-source
(destructuring-bind (&key table-name
column-name
type
ctype
typemod
default
not-null
auto-increment)
source
(declare (ignore ctype))
(when
(and
(or (and t-s-p (string= type rule-source-type))
(and c-s-p
(string-equal table-name (car rule-source-column))
(string-equal column-name (cdr rule-source-column))))
(or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod))
(or (null d-s-p) (string= default rule-source-default))
(or (null n-s-p) (eq not-null rule-source-not-null))
(or (null ai-s-p) (eq auto-increment rule-source-auto-increment)))
(list :using using :target rule-target))))))
(defun format-pgsql-default-value (default &optional using-cast-fn)
"Returns suitably quoted default value for CREATE TABLE command."
(cond
((null default) "NULL")
((string= "NULL" default) default)
((string= "CURRENT_TIMESTAMP" default) default)
(t
;; apply the transformation function to the default value
(if using-cast-fn (format-pgsql-default-value
(funcall using-cast-fn default))
(format nil "'~a'" default)))))
(defun format-pgsql-type (source target using)
"Returns a string suitable for a PostgreSQL type definition"
(destructuring-bind (&key ((:table-name source-table-name))
((:column-name source-column-name))
((:type source-type))
((:ctype source-ctype))
((:typemod source-typemod))
((:default source-default))
((:not-null source-not-null))
&allow-other-keys)
source
(if target
(destructuring-bind (&key type
drop-default
drop-not-null
(drop-typemod t)
&allow-other-keys)
target
(let ((type-name
(typecase type
(function (funcall type
source-table-name source-column-name
source-type source-ctype source-typemod))
(t type)))
(pg-typemod
(when source-typemod
(destructuring-bind (a . b) source-typemod
(format nil "(~a~:[~*~;,~a~])" a b b)))))
(format nil
"~a~:[~*~;~a~]~:[~; not null~]~:[~; default ~a~]"
type-name
(and source-typemod (not drop-typemod))
pg-typemod
(and source-not-null (not drop-not-null))
(and source-default (not drop-default))
(format-pgsql-default-value source-default using))))
;; NO MATCH
;;
;; prefer char(24) over just char, that is the column type over the
;; data type.
(format nil "~a~:[~; not null~]~:[~; default ~a~]"
source-ctype
source-not-null
source-default
(format-pgsql-default-value source-default using)))))
(defun apply-casting-rules (dtype ctype default nullable extra
&key
table-name column-name ; ENUM support
(rules (append *cast-rules*
*default-cast-rules*)))
"Apply the given RULES to the MySQL SOURCE type definition"
(let* ((typemod (parse-column-typemod dtype ctype))
(not-null (string-equal nullable "NO"))
(auto-increment (string= "auto_increment" extra))
(source `(:table-name ,table-name
:column-name ,column-name
:type ,dtype
:ctype ,ctype
,@(when typemod (list :typemod typemod))
:default ,default
:not-null ,not-null
:auto-increment ,auto-increment)))
(let (first-match-using)
(loop
for rule in rules
for (target using) = (destructuring-bind (&key target using)
(cast-rule-matches rule source)
(list target using))
do (when (and (null target) using (null first-match-using))
(setf first-match-using using))
until target
finally
(return
(list :transform-fn (or first-match-using using)
:pgtype (format-pgsql-type source target using)))))))
(defun cast (table-name column-name dtype ctype default nullable extra)
"Convert a MySQL datatype to a PostgreSQL datatype.
DYTPE is the MySQL data_type and CTYPE the MySQL column_type, for example
that would be int and int(7) or varchar and varchar(25)."
(destructuring-bind (&key pgtype transform-fn &allow-other-keys)
(apply-casting-rules dtype ctype default nullable extra
:table-name table-name
:column-name column-name)
(values pgtype transform-fn)))
(defun test-casts ()
"Just test some cases for the casts"
(let ((*cast-rules*

View File

@ -7,83 +7,93 @@
(defvar *sqlite-db* nil
"The SQLite database connection handler.")
;;
;; The SQLite drive we use maps the CFFI data type mapping functions and
;; gets back proper CL typed objects, where we only want to deal with text.
;;
(defvar *sqlite-to-pgsql*
'(("float" . pgloader.transforms::float-to-string)
("real" . pgloader.transforms::float-to-string)
("double precision" . pgloader.transforms::float-to-string)
("numeric" . pgloader.transforms::float-to-string)
("text" . nil)
("bytea" . pgloader.transforms::byte-vector-to-bytea)
("timestamp" . pgloader.transforms::sqlite-timestamp-to-timestamp)
("timestamptz" . pgloader.transforms::sqlite-timestamp-to-timestamp))
"Transformation functions to use when migrating from SQLite to PostgreSQL.")
(defparameter *sqlite-default-cast-rules*
`((:source (:type "character") :target (:type "text" :drop-typemod t))
(:source (:type "varchar") :target (:type "text" :drop-typemod t))
(:source (:type "nvarchar") :target (:type "text" :drop-typemod t))
(:source (:type "char") :target (:type "text" :drop-typemod t))
(:source (:type "nchar") :target (:type "text" :drop-typemod t))
(:source (:type "clob") :target (:type "text" :drop-typemod t))
(:source (:type "tinyint") :target (:type "smallint"))
(:source (:type "float") :target (:type "float")
:using pgloader.transforms::float-to-string)
(:source (:type "real") :target (:type "real")
:using pgloader.transforms::float-to-string)
(:source (:type "double") :target (:type "double precision")
:using pgloader.transforms::float-to-string)
(:source (:type "numeric") :target (:type "numeric")
:using pgloader.transforms::float-to-string)
(:source (:type "blob") :target (:type "bytea")
:using pgloader.transforms::byte-vector-to-bytea)
(:source (:type "datetime") :target (:type "timestamptz")
:using pgloader.transforms::sqlite-timestamp-to-timestamp)
(:source (:type "timestamp") :target (:type "timestamp")
:using pgloader.transforms::sqlite-timestamp-to-timestamp)
(:source (:type "timestamptz") :target (:type "timestamptz")
:using pgloader.transforms::sqlite-timestamp-to-timestamp))
"Data Type Casting to migrate from SQLite to PostgreSQL")
;;;
;;; SQLite tools connecting to a database
;;;
(defstruct (coldef
(:constructor make-coldef (seq name type nullable default pk-id)))
seq name type nullable default pk-id)
(:constructor make-coldef (table-name
seq name dtype ctype
nullable default pk-id)))
table-name seq name dtype ctype nullable default pk-id)
(defun cast (sqlite-type-name)
"Return the PostgreSQL type name for a given SQLite type name."
(let* ((tokens (remove-if (lambda (token)
(member token '("unsigned" "short")
(defun normalize (sqlite-type-name)
"SQLite only has a notion of what MySQL calls column_type, or ctype in the
CAST machinery. Transform it to the data_type, or dtype."
(let* ((sqlite-type-name (string-downcase sqlite-type-name))
(tokens (remove-if (lambda (token)
(member token '("unsigned" "short"
"varying" "native")
:test #'string-equal))
(sq:split-sequence #\Space sqlite-type-name)))
(sqlite-type-name (first tokens)))
(sq:split-sequence #\Space sqlite-type-name))))
(assert (= 1 (length tokens)))
(cond ((and (<= 8 (length sqlite-type-name))
(string-equal sqlite-type-name "nvarchar" :end1 8)) "text")
(first tokens)))
((string-equal sqlite-type-name "tinyint") "smallint")
((string-equal sqlite-type-name "datetime") "timestamptz")
((string-equal sqlite-type-name "double") "double precision")
((string-equal sqlite-type-name "blob") "bytea")
((string-equal sqlite-type-name "clob") "text")
(defun ctype-to-dtype (sqlite-type-name)
"In SQLite we only get the ctype, e.g. int(7), but here we want the base
data type behind it, e.g. int."
(let* ((ctype (normalize sqlite-type-name))
(paren-pos (position #\( ctype)))
(if paren-pos (subseq ctype 0 paren-pos) ctype)))
(t sqlite-type-name))))
(defun cast-sqlite-column-definition-to-pgsql (sqlite-column)
"Return the PostgreSQL column definition from the MySQL one."
(multiple-value-bind (column fn)
(with-slots (table-name name dtype ctype default nullable)
sqlite-column
(cast table-name name dtype ctype default nullable nil))
;; the SQLite driver smartly maps data to the proper CL type, but the
;; pgloader API only wants to see text representations to send down the
;; COPY protocol.
(values column (or fn (lambda (val) (if val (format nil "~a" val) :null))))))
(defun transformation-function (pgsql-type-name)
"Return the transformation function to use to switch a SQLite value to a
PostgreSQL value of type PGSQL-TYPE-NAME."
(let* ((type-name
(cond ((and (<= 7 (length pgsql-type-name))
(string-equal "numeric" pgsql-type-name :end2 7))
"numeric")
(t pgsql-type-name)))
(transform (assoc type-name *sqlite-to-pgsql* :test #'string=)))
(if transform
(cdr transform)
(compile nil (lambda (c) (when c (format nil "~a" c)))))))
(defun format-pgsql-default-value (col)
"Return the PostgreSQL representation for the default value of COL."
(declare (type coldef col))
(let ((default (coldef-default col)))
(cond
((null default) "NULL")
((string= "NULL" default) default)
((string= "CURRENT_TIMESTAMP" default) default)
(t
;; apply the transformation function to the default value
(let ((fn (transformation-function (cast (coldef-type col)))))
(if fn (funcall fn default) (format nil "'~a'" default)))))))
(defmethod cast-to-bytea-p ((col coldef))
"Returns a generalized boolean, non-nil when the column is casted to a
PostgreSQL bytea column."
(string= "bytea" (cast-sqlite-column-definition-to-pgsql col)))
(defmethod format-pgsql-column ((col coldef) &key identifier-case)
"Return a string representing the PostgreSQL column definition."
(let* ((column-name
(apply-identifier-case (coldef-name col) identifier-case))
(type-definition
(format nil
"~a~:[~; not null~]~@[ default ~a~]"
(cast (coldef-type col))
(coldef-nullable col)
(format-pgsql-default-value col))))
(with-slots (table-name name dtype ctype nullable default)
col
(cast table-name name dtype ctype default nullable nil))))
(format nil "~a ~22t ~a" column-name type-definition)))
(defun list-tables (&optional (db *sqlite-db*))
@ -99,7 +109,14 @@
(let ((sql (format nil "PRAGMA table_info(~a)" table-name)))
(loop for (seq name type nullable default pk-id) in
(sqlite:execute-to-list db sql)
collect (make-coldef seq name type (= 1 nullable) default pk-id))))
collect (make-coldef table-name
seq
name
(ctype-to-dtype (normalize type))
(normalize type)
(= 1 nullable)
(unquote default)
pk-id))))
(defun list-all-columns (&optional (db *sqlite-db*))
"Get the list of SQLite column definitions per table."
@ -168,12 +185,15 @@
(unless (slot-boundp source 'fields)
(setf (slot-value source 'fields) fields))
(unless transforms
(setf (slot-value source 'transforms)
(loop for field in fields
collect
(let ((coltype (cast (coldef-type field))))
(transformation-function coltype))))))))
(loop for field in fields
for (column fn) = (multiple-value-bind (column fn)
(cast-sqlite-column-definition-to-pgsql field)
(list column fn))
collect column into columns
collect fn into fns
finally (progn (setf (slot-value source 'columns) columns)
(unless transforms
(setf (slot-value source 'transforms) fns)))))))
;;; Map a function to each row extracted from SQLite
;;;
@ -182,10 +202,7 @@
argument (a list of column values) for each row"
(let ((sql (format nil "SELECT * FROM ~a" (source sqlite)))
(blobs-p
(coerce (mapcar (lambda (field)
(string-equal "bytea" (cast (coldef-type field))))
(fields sqlite))
'vector)))
(coerce (mapcar #'cast-to-bytea-p (fields sqlite)) 'vector)))
(handler-case
(loop
with statement = (sqlite:prepare-statement (db sqlite) sql)

View File

@ -5,10 +5,6 @@
;;; up in the pgloader.transforms package, when using the default USING
;;; syntax for transformations.
(defpackage #:pgloader.transforms
(:use #:cl)
(:export #:intern-symbol))
(in-package :pgloader.transforms)
(declaim (inline intern-symbol

View File

@ -61,3 +61,15 @@
when (and new-word (not (char= char #\_))) collect #\_
collect (char-downcase char))
'string))
;;;
;;; Unquote SQLite default values, might be useful elsewhere
;;;
(defun unquote (string &optional (quote #\'))
"Given '0', returns 0."
(declare (type (or null simple-string) string))
(when string
(let ((l (length string)))
(if (char= quote (aref string 0) (aref string (1- l)))
(subseq string 1 (1- l))
string))))

View File

@ -4,4 +4,6 @@ load database
-- with include drop, create tables, create indexes, reset sequences
cast column character.f1 to text drop typemod
set work_mem to '16MB', maintenance_work_mem to '512 MB';

Binary file not shown.