Rewrite the SQLite type name parsing.

SQLite being very very liberal in type names (I think it accepts anything
and everything actually), our simple approach of tokenizing the input and
discarding noise words is not enough.

In this patch, we implement a new light parser for the SQLite type names to
better cope with noise words and random spacing of the catalog values that
SQLite failed to normalize. Well it didn't attempt, apparently.

Fix #548.
This commit is contained in:
Dimitri Fontaine 2017-11-28 18:19:12 +01:00
parent 2b861a3e96
commit 52f13456d9
6 changed files with 71 additions and 29 deletions

View File

@ -224,6 +224,7 @@
(:file "command-sqlite")
(:file "command-archive")
(:file "command-parser")
(:file "parse-sqlite-type-name")
(:file "date-format")))
;; the main entry file, used when building a stand-alone

View File

@ -772,6 +772,7 @@
#:parse-cli-casts
#:parse-sql-file
#:parse-target-pg-db-uri
#:parse-sqlite-type-name
;; connection types / classes symbols for use in main
#:connection

View File

@ -0,0 +1,47 @@
;;;
;;; SQLite Type Names might be complex enough to warrant a full blown
;;; parsing activity.
;;;
(in-package :pgloader.parser)
(defrule extra-qualifiers (and (? " ")
(or (~ "unsigned")
(~ "short")
(~ "varying")
(~ "native")
(~ "nocase")
(~ "auto_increment"))
(? " "))
(:lambda (noise) (second noise)))
(defrule sqlite-single-typemod (and #\( (+ (digit-char-p character)) #\))
(:lambda (st) (cons (parse-integer (text (second st))) nil)))
(defrule sqlite-double-typemod (and #\(
(+ (digit-char-p character))
(* (or #\, #\Space))
(+ (digit-char-p character))
#\))
(:lambda (dt) (cons (parse-integer (text (second dt)))
(parse-integer (text (fourth dt))))))
(defrule sqlite-typemod (or sqlite-double-typemod sqlite-single-typemod))
(defrule sqlite-type-name (and (* extra-qualifiers)
(+ (alpha-char-p character))
(* extra-qualifiers)
(* #\Space)
(? sqlite-typemod)
(* #\Space)
(* extra-qualifiers))
(:lambda (tn) (list (text (second tn))
(fifth tn)
(remove-if #'null
(append (first tn) (third tn) (seventh tn))))))
(defun parse-sqlite-type-name (type-name)
(if (string= type-name "")
;; yes SQLite allows for empty type names
"text"
(values-list (parse 'sqlite-type-name (string-downcase type-name)))))

View File

@ -32,7 +32,7 @@
(:source (:type "double") :target (:type "double precision")
:using pgloader.transforms::float-to-string)
(:source (:type "numeric") :target (:type "numeric")
(:source (:type "numeric") :target (:type "numeric" :drop-typemod nil)
:using pgloader.transforms::float-to-string)
(:source (:type "blob") :target (:type "bytea")
@ -57,28 +57,19 @@
(defun normalize (sqlite-type-name)
"SQLite only has a notion of what MySQL calls column_type, or ctype in the
CAST machinery. Transform it to the data_type, or dtype."
(if (string= sqlite-type-name "")
;; yes SQLite allows for empty type names
"text"
(let* ((sqlite-type-name (string-downcase sqlite-type-name))
(tokens (remove-if (lambda (token)
(or (member token '("unsigned" "short"
"varying" "native"
"nocase"
"auto_increment")
:test #'string-equal)
;; remove typemod too, as in "integer (8)"
(char= #\( (aref token 0))))
(sq:split-sequence #\Space sqlite-type-name))))
(assert (= 1 (length tokens)))
(first tokens))))
(multiple-value-bind (type-name typmod extra-noise-words)
(pgloader.parser:parse-sqlite-type-name sqlite-type-name)
(declare (ignore extra-noise-words))
(if typmod
(format nil "~a(~a~@[,~a~])" type-name (car typmod) (cdr typmod))
type-name)))
(defun ctype-to-dtype (sqlite-type-name)
"In SQLite we only get the ctype, e.g. int(7), but here we want the base
data type behind it, e.g. int."
(let* ((ctype (normalize sqlite-type-name))
(paren-pos (position #\( ctype)))
(if paren-pos (subseq ctype 0 paren-pos) ctype)))
;; parse-sqlite-type-name returns multiple values, here we only need the
;; first one: (type-name typmod extra-noise-words)
(pgloader.parser:parse-sqlite-type-name sqlite-type-name))
(defmethod cast ((col coldef) &key &allow-other-keys)
"Return the PostgreSQL type definition from given SQLite column definition."

View File

@ -78,16 +78,18 @@
"Return the list of columns found in TABLE-NAME."
(let* ((table-name (table-source-name table))
(sql (format nil "PRAGMA table_info(`~a`)" table-name)))
(loop :for (ctid name type nullable default pk-id) :in
(sqlite:execute-to-list db sql)
:do (let ((field (make-coldef table-name
ctid
name
(ctype-to-dtype (normalize type))
(normalize type)
(= 1 nullable)
(unquote default)
pk-id)))
(loop :for (ctid name type nullable default pk-id)
:in (sqlite:execute-to-list db sql)
:do (let* ((ctype (normalize type))
(dtype (ctype-to-dtype type))
(field (make-coldef table-name
ctid
name
dtype
ctype
(= 1 nullable)
(unquote default)
pk-id)))
(when (and db-has-sequences
(not (zerop pk-id))
(string-equal (coldef-ctype field) "integer"))

Binary file not shown.