From 52f13456d901c7ba28c2562e3be3cfa51c4d883e Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Tue, 28 Nov 2017 18:19:12 +0100 Subject: [PATCH] Rewrite the SQLite type name parsing. SQLite being very very liberal in type names (I think it accepts anything and everything actually), our simple approach of tokenizing the input and discarding noise words is not enough. In this patch, we implement a new light parser for the SQLite type names to better cope with noise words and random spacing of the catalog values that SQLite failed to normalize. Well it didn't attempt, apparently. Fix #548. --- pgloader.asd | 1 + src/package.lisp | 1 + src/parsers/parse-sqlite-type-name.lisp | 47 ++++++++++++++++++++++ src/sources/sqlite/sqlite-cast-rules.lisp | 29 +++++-------- src/sources/sqlite/sqlite-schema.lisp | 22 +++++----- test/sqlite/sqlite.db | Bin 110592 -> 114688 bytes 6 files changed, 71 insertions(+), 29 deletions(-) create mode 100644 src/parsers/parse-sqlite-type-name.lisp diff --git a/pgloader.asd b/pgloader.asd index 6c4b10a..1e6daaf 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -224,6 +224,7 @@ (:file "command-sqlite") (:file "command-archive") (:file "command-parser") + (:file "parse-sqlite-type-name") (:file "date-format"))) ;; the main entry file, used when building a stand-alone diff --git a/src/package.lisp b/src/package.lisp index d92e694..09251e5 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -772,6 +772,7 @@ #:parse-cli-casts #:parse-sql-file #:parse-target-pg-db-uri + #:parse-sqlite-type-name ;; connection types / classes symbols for use in main #:connection diff --git a/src/parsers/parse-sqlite-type-name.lisp b/src/parsers/parse-sqlite-type-name.lisp new file mode 100644 index 0000000..81d9a0a --- /dev/null +++ b/src/parsers/parse-sqlite-type-name.lisp @@ -0,0 +1,47 @@ +;;; +;;; SQLite Type Names might be complex enough to warrant a full blown +;;; parsing activity. +;;; + +(in-package :pgloader.parser) + +(defrule extra-qualifiers (and (? " ") + (or (~ "unsigned") + (~ "short") + (~ "varying") + (~ "native") + (~ "nocase") + (~ "auto_increment")) + (? " ")) + (:lambda (noise) (second noise))) + +(defrule sqlite-single-typemod (and #\( (+ (digit-char-p character)) #\)) + (:lambda (st) (cons (parse-integer (text (second st))) nil))) + +(defrule sqlite-double-typemod (and #\( + (+ (digit-char-p character)) + (* (or #\, #\Space)) + (+ (digit-char-p character)) + #\)) + (:lambda (dt) (cons (parse-integer (text (second dt))) + (parse-integer (text (fourth dt)))))) + +(defrule sqlite-typemod (or sqlite-double-typemod sqlite-single-typemod)) + +(defrule sqlite-type-name (and (* extra-qualifiers) + (+ (alpha-char-p character)) + (* extra-qualifiers) + (* #\Space) + (? sqlite-typemod) + (* #\Space) + (* extra-qualifiers)) + (:lambda (tn) (list (text (second tn)) + (fifth tn) + (remove-if #'null + (append (first tn) (third tn) (seventh tn)))))) + +(defun parse-sqlite-type-name (type-name) + (if (string= type-name "") + ;; yes SQLite allows for empty type names + "text" + (values-list (parse 'sqlite-type-name (string-downcase type-name))))) diff --git a/src/sources/sqlite/sqlite-cast-rules.lisp b/src/sources/sqlite/sqlite-cast-rules.lisp index 3f641b0..beca485 100644 --- a/src/sources/sqlite/sqlite-cast-rules.lisp +++ b/src/sources/sqlite/sqlite-cast-rules.lisp @@ -32,7 +32,7 @@ (:source (:type "double") :target (:type "double precision") :using pgloader.transforms::float-to-string) - (:source (:type "numeric") :target (:type "numeric") + (:source (:type "numeric") :target (:type "numeric" :drop-typemod nil) :using pgloader.transforms::float-to-string) (:source (:type "blob") :target (:type "bytea") @@ -57,28 +57,19 @@ (defun normalize (sqlite-type-name) "SQLite only has a notion of what MySQL calls column_type, or ctype in the CAST machinery. Transform it to the data_type, or dtype." - (if (string= sqlite-type-name "") - ;; yes SQLite allows for empty type names - "text" - (let* ((sqlite-type-name (string-downcase sqlite-type-name)) - (tokens (remove-if (lambda (token) - (or (member token '("unsigned" "short" - "varying" "native" - "nocase" - "auto_increment") - :test #'string-equal) - ;; remove typemod too, as in "integer (8)" - (char= #\( (aref token 0)))) - (sq:split-sequence #\Space sqlite-type-name)))) - (assert (= 1 (length tokens))) - (first tokens)))) + (multiple-value-bind (type-name typmod extra-noise-words) + (pgloader.parser:parse-sqlite-type-name sqlite-type-name) + (declare (ignore extra-noise-words)) + (if typmod + (format nil "~a(~a~@[,~a~])" type-name (car typmod) (cdr typmod)) + type-name))) (defun ctype-to-dtype (sqlite-type-name) "In SQLite we only get the ctype, e.g. int(7), but here we want the base data type behind it, e.g. int." - (let* ((ctype (normalize sqlite-type-name)) - (paren-pos (position #\( ctype))) - (if paren-pos (subseq ctype 0 paren-pos) ctype))) + ;; parse-sqlite-type-name returns multiple values, here we only need the + ;; first one: (type-name typmod extra-noise-words) + (pgloader.parser:parse-sqlite-type-name sqlite-type-name)) (defmethod cast ((col coldef) &key &allow-other-keys) "Return the PostgreSQL type definition from given SQLite column definition." diff --git a/src/sources/sqlite/sqlite-schema.lisp b/src/sources/sqlite/sqlite-schema.lisp index 3135995..32af50e 100644 --- a/src/sources/sqlite/sqlite-schema.lisp +++ b/src/sources/sqlite/sqlite-schema.lisp @@ -78,16 +78,18 @@ "Return the list of columns found in TABLE-NAME." (let* ((table-name (table-source-name table)) (sql (format nil "PRAGMA table_info(`~a`)" table-name))) - (loop :for (ctid name type nullable default pk-id) :in - (sqlite:execute-to-list db sql) - :do (let ((field (make-coldef table-name - ctid - name - (ctype-to-dtype (normalize type)) - (normalize type) - (= 1 nullable) - (unquote default) - pk-id))) + (loop :for (ctid name type nullable default pk-id) + :in (sqlite:execute-to-list db sql) + :do (let* ((ctype (normalize type)) + (dtype (ctype-to-dtype type)) + (field (make-coldef table-name + ctid + name + dtype + ctype + (= 1 nullable) + (unquote default) + pk-id))) (when (and db-has-sequences (not (zerop pk-id)) (string-equal (coldef-ctype field) "integer")) diff --git a/test/sqlite/sqlite.db b/test/sqlite/sqlite.db index 3f3cdf4dae69e00d8d1bb81caa7532f1ca8af49f..bbe1f5fdb7d0bed865d271ae27567d4b6ee1cfa7 100644 GIT binary patch delta 144 zcmZp8z}C>fK0#X0l7WFi28dxmd7_Rnk0pbiz;9mee+(iV+a_kKGjVL&xKN41&qst! zTvU{?wj?nrC$+@X!~%q6oP%5)LtGU?9G!ez6+j{yY5DmId8N6jMVZMOMg}?xW|}$* dNr^>pF+(FA6HU!#7S8P~oQ!{#ElXe&005-pB^&?% delta 59 zcmZo@U~hQ9HbGj@n1O*o8i-*)cA}0kk1>Ouz;9mee+