From e23de0ce9fc4df15bd8009b96fdc2d0b7cd4baa4 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sun, 22 Nov 2015 22:10:26 +0100 Subject: [PATCH] Improve SQLite table names filtering. Filter the list of tables we migrate directly from the SQLite query, avoiding to return useless data. To do that, use the LIKE pattern matching supported by SQLite, where the REGEX operator is only available when extra features are loaded apparently. See #310 where filtering out the view still caused errors in the loading. --- pgloader.1 | 22 ++++-- pgloader.1.md | 18 ++--- pgloader.asd | 1 + src/parsers/command-mssql.lisp | 23 ++---- src/parsers/command-sqlite.lisp | 16 +++- src/sources/sqlite/sqlite-schema.lisp | 107 +++++++++++--------------- src/sources/sqlite/sqlite.lisp | 12 +-- test/sqlite-chinook.load | 2 + 8 files changed, 99 insertions(+), 102 deletions(-) diff --git a/pgloader.1 b/pgloader.1 index bc31c6b..ff5438a 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1,7 +1,7 @@ .\" generated with Ronn/v0.7.3 .\" http://github.com/rtomayko/ronn/tree/0.7.3 . -.TH "PGLOADER" "1" "August 2015" "ff" "" +.TH "PGLOADER" "1" "November 2015" "ff" "" . .SH "NAME" \fBpgloader\fR \- PostgreSQL data loader @@ -1450,7 +1450,7 @@ This command instructs pgloader to load data from an IBM \fBIXF\fR file\. Here\' LOAD IXF FROM data/nsitra\.test1\.ixf INTO postgresql:///pgloader?nsitra\.test1 - WITH truncate, create table + WITH truncate, create table, timezone UTC BEFORE LOAD DO $$ create schema if not exists nsitra; $$, @@ -1502,6 +1502,12 @@ When this option is listed, pgloader creates the table using the meta data found .IP This options expects as its value the possibly qualified name of the table to create\. . +.IP "\(bu" 4 +\fItimezone\fR +. +.IP +This options allows to specify which timezone is used when parsing timestamps from an IXF file, and defaults to \fIUTC\fR\. Expected values are either \fBUTC\fR, \fBGMT\fR or a single quoted location name such as \fB\'Universal\'\fR or \fB\'Europe/Paris\'\fR\. +. .IP "" 0 . @@ -2367,10 +2373,10 @@ The cast clause allows to specify custom casting rules, either to overload the d Please refer to the MySQL CAST clause for details\. . .IP "\(bu" 4 -\fIINCLUDING ONLY TABLE NAMES MATCHING\fR +\fIINCLUDING ONLY TABLE NAMES LIKE\fR . .IP -Introduce a comma separated list of table names or \fIregular expression\fR used to limit the tables to migrate to a sublist\. +Introduce a comma separated list of table name patterns used to limit the tables to migrate to a sublist\. . .IP Example: @@ -2379,7 +2385,7 @@ Example: . .nf -INCLUDING ONLY TABLE NAMES MATCHING ~/film/, \'actor\' +INCLUDING ONLY TABLE NAMES LIKE \'Invoice%\' . .fi . @@ -2387,16 +2393,16 @@ INCLUDING ONLY TABLE NAMES MATCHING ~/film/, \'actor\' . .IP "\(bu" 4 -\fIEXCLUDING TABLE NAMES MATCHING\fR +\fIEXCLUDING TABLE NAMES LIKE\fR . .IP -Introduce a comma separated list of table names or \fIregular expression\fR used to exclude table names from the migration\. This filter only applies to the result of the \fIINCLUDING\fR filter\. +Introduce a comma separated list of table name patterns used to exclude table names from the migration\. This filter only applies to the result of the \fIINCLUDING\fR filter\. . .IP "" 4 . .nf -EXCLUDING TABLE NAMES MATCHING ~ +EXCLUDING TABLE NAMES LIKE \'appointments\' . .fi . diff --git a/pgloader.1.md b/pgloader.1.md index e391cfa..19315b9 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -1979,22 +1979,22 @@ The `sqlite` command accepts the following clauses and options: Please refer to the MySQL CAST clause for details. - - *INCLUDING ONLY TABLE NAMES MATCHING* + - *INCLUDING ONLY TABLE NAMES LIKE* - Introduce a comma separated list of table names or *regular expression* - used to limit the tables to migrate to a sublist. + Introduce a comma separated list of table name patterns used to limit + the tables to migrate to a sublist. Example: - INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor' + INCLUDING ONLY TABLE NAMES LIKE 'Invoice%' - - *EXCLUDING TABLE NAMES MATCHING* + - *EXCLUDING TABLE NAMES LIKE* - Introduce a comma separated list of table names or *regular expression* - used to exclude table names from the migration. This filter only applies - to the result of the *INCLUDING* filter. + Introduce a comma separated list of table name patterns used to exclude + table names from the migration. This filter only applies to the result + of the *INCLUDING* filter. - EXCLUDING TABLE NAMES MATCHING ~ + EXCLUDING TABLE NAMES LIKE 'appointments' ### DEFAULT SQLite CASTING RULES diff --git a/pgloader.asd b/pgloader.asd index 6d1e9c8..7f0b844 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -106,6 +106,7 @@ (:file "command-dbf") (:file "command-cast-rules") (:file "command-mysql") + (:file "command-including-like") (:file "command-mssql") (:file "command-sqlite") (:file "command-archive") diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp index 987713a..e5259db 100644 --- a/src/parsers/command-mssql.lisp +++ b/src/parsers/command-mssql.lisp @@ -43,19 +43,6 @@ (bind (((_ opts) source)) (cons :mssql-options opts)))) -(defrule like-expression (and "'" (+ (not "'")) "'") - (:lambda (le) - (bind (((_ like _) le)) (text like)))) - -(defrule another-like-expression (and comma like-expression) - (:lambda (source) - (bind (((_ like) source)) like))) - -(defrule filter-list-like (and like-expression (* another-like-expression)) - (:lambda (source) - (destructuring-bind (filter1 filters) source - (list* filter1 filters)))) - (defrule including-in-schema (and kw-including kw-only kw-table kw-names kw-like filter-list-like kw-in kw-schema quoted-namestring) @@ -63,7 +50,8 @@ (bind (((_ _ _ _ _ filter-list _ _ schema) source)) (cons schema filter-list)))) -(defrule including-like (and including-in-schema (* including-in-schema)) +(defrule including-like-in-schema + (and including-in-schema (* including-in-schema)) (:lambda (source) (destructuring-bind (inc1 incs) source (cons :including (list* inc1 incs))))) @@ -75,7 +63,8 @@ (bind (((_ _ _ _ filter-list _ _ schema) source)) (cons schema filter-list)))) -(defrule excluding-like (and excluding-in-schema (* excluding-in-schema)) +(defrule excluding-like-in-schema + (and excluding-in-schema (* excluding-in-schema)) (:lambda (source) (destructuring-bind (excl1 excls) source (cons :excluding (list* excl1 excls))))) @@ -89,8 +78,8 @@ casts before-load after-load - including-like - excluding-like)) + including-like-in-schema + excluding-like-in-schema)) (:lambda (clauses-list) (alexandria:alist-plist clauses-list))) diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index e38776f..0db2b37 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -49,6 +49,18 @@ load database (bind (((_ opts) source)) (cons :sqlite-options opts)))) +(defrule including-like + (and kw-including kw-only kw-table kw-names kw-like filter-list-like) + (:lambda (source) + (bind (((_ _ _ _ _ filter-list) source)) + (cons :including filter-list)))) + +(defrule excluding-like + (and kw-excluding kw-table kw-names kw-like filter-list-like) + (:lambda (source) + (bind (((_ _ _ _ filter-list) source)) + (cons :excluding filter-list)))) + (defrule sqlite-db-uri (and "sqlite://" filename) (:lambda (source) (bind (((_ filename) source)) filename))) @@ -77,8 +89,8 @@ load database (defrule load-sqlite-optional-clauses (* (or sqlite-options gucs casts - including-matching - excluding-matching)) + including-like + excluding-like)) (:lambda (clauses-list) (alexandria:alist-plist clauses-list))) diff --git a/src/sources/sqlite/sqlite-schema.lisp b/src/sources/sqlite/sqlite-schema.lisp index 3ef6005..1ab22e7 100644 --- a/src/sources/sqlite/sqlite-schema.lisp +++ b/src/sources/sqlite/sqlite-schema.lisp @@ -6,11 +6,32 @@ (defvar *sqlite-db* nil "The SQLite database connection handler.") -(defun list-tables (&optional (db *sqlite-db*)) +(defun filter-list-to-where-clause (filter-list + &optional + not + (table-col "tbl_name")) + "Given an INCLUDING or EXCLUDING clause, turn it into a SQLite WHERE clause." + (mapcar (lambda (table-name) + (format nil "(~a ~:[~;NOT ~]LIKE '~a')" + table-col not table-name)) + filter-list)) + +(defun list-tables (&key + (db *sqlite-db*) + including + excluding) "Return the list of tables found in SQLITE-DB." - (let ((sql "SELECT tbl_name + (let ((sql (format nil "SELECT tbl_name FROM sqlite_master - WHERE type='table' AND tbl_name <> 'sqlite_sequence'")) + WHERE type='table' + AND tbl_name <> 'sqlite_sequence' + ~:[~*~;AND (~{~a~^~&~10t or ~})~] + ~:[~*~;AND (~{~a~^~&~10t and ~})~]" + including ; do we print the clause? + (filter-list-to-where-clause including nil) + excluding ; do we print the clause? + (filter-list-to-where-clause excluding t)))) + (log-message :info "~a" sql) (loop for (name) in (sqlite:execute-to-list db sql) collect name))) @@ -28,10 +49,15 @@ (unquote default) pk-id)))) -(defun list-all-columns (&optional (db *sqlite-db*)) +(defun list-all-columns (&key + (db *sqlite-db*) + including + excluding) "Get the list of SQLite column definitions per table." - (loop for table-name in (list-tables db) - collect (cons table-name (list-columns table-name db)))) + (loop :for table-name :in (list-tables :db db + :including including + :excluding excluding) + :collect (cons table-name (list-columns table-name db)))) (defstruct sqlite-idx name table-name sql) @@ -42,11 +68,22 @@ "Generate the PostgresQL statement to build the given SQLite index definition." (sqlite-idx-sql index)) -(defun list-all-indexes (&optional (db *sqlite-db*)) +(defun list-all-indexes (&key + (db *sqlite-db*) + including + excluding) "Get the list of SQLite index definitions per table." - (let ((sql "SELECT name, tbl_name, replace(replace(sql, '[', ''), ']', '') - FROM sqlite_master - WHERE type='index'")) + (let ((sql (format nil + "SELECT name, tbl_name, replace(replace(sql, '[', ''), ']', '') + FROM sqlite_master + WHERE type='index' + ~:[~*~;AND (~{~a~^~&~10t or ~})~] + ~:[~*~;AND (~{~a~^~&~10t and ~})~]" + including ; do we print the clause? + (filter-list-to-where-clause including nil) + excluding ; do we print the clause? + (filter-list-to-where-clause excluding t)))) + (log-message :info "~a" sql) (loop :with schema := nil :for (index-name table-name sql) :in (sqlite:execute-to-list db sql) :when sql @@ -59,53 +96,3 @@ (push-to-end (cons table-name (list idxdef)) schema))) :finally (return schema)))) - -;;; -;;; Filtering lists of columns and indexes -;;; -;;; A list of columns is expected to be an alist of table-name associated -;;; with a list of objects (clos or structures) that define the generic API -;;; described in src/pgsql/schema.lisp -;;; -(defun filter-column-list (all-columns &key only-tables including excluding) - "Apply the filtering defined by the arguments: - - - keep only tables listed in ONLY-TABLES, or all of them if ONLY-TABLES - is nil, - - - then unless EXCLUDING is nil, filter out the resulting list by - applying the EXCLUDING regular expression list to table names in the - all-columns list: we only keep the table names that match none of the - regex in the EXCLUDING list - - - then unless INCLUDING is nil, only keep remaining elements that - matches at least one of the INCLUDING regular expression list." - - (labels ((apply-filtering-rule (rule) - (declare (special table-name)) - (typecase rule - (string (string-equal rule table-name)) - (list (destructuring-bind (type val) rule - (ecase type - (:regex (cl-ppcre:scan val table-name))))))) - - (only (entry) - (let ((table-name (first entry))) - (or (null only-tables) - (member table-name only-tables :test #'equal)))) - - (exclude (entry) - (let ((table-name (first entry))) - (declare (special table-name)) - (or (null excluding) - (notany #'apply-filtering-rule excluding)))) - - (include (entry) - (let ((table-name (first entry))) - (declare (special table-name)) - (or (null including) - (some #'apply-filtering-rule including))))) - - (remove-if-not #'include - (remove-if-not #'exclude - (remove-if-not #'only all-columns))))) diff --git a/src/sources/sqlite/sqlite.lisp b/src/sources/sqlite/sqlite.lisp index 8108d1f..0838e34 100644 --- a/src/sources/sqlite/sqlite.lisp +++ b/src/sources/sqlite/sqlite.lisp @@ -125,13 +125,13 @@ :section :pre) (with-connection (conn (source-db sqlite)) (let ((*sqlite-db* (conn-handle conn))) - (setf all-columns (filter-column-list (list-all-columns *sqlite-db*) - :including including - :excluding excluding) + (setf all-columns (list-all-columns :db *sqlite-db* + :including including + :excluding excluding) - all-indexes (filter-column-list (list-all-indexes *sqlite-db*) - :including including - :excluding excluding))) + all-indexes (list-all-indexes :db *sqlite-db* + :including including + :excluding excluding))) ;; return how many objects we're going to deal with in total ;; for stats collection diff --git a/test/sqlite-chinook.load b/test/sqlite-chinook.load index cfe3662..febf85b 100644 --- a/test/sqlite-chinook.load +++ b/test/sqlite-chinook.load @@ -2,6 +2,8 @@ load database from 'sqlite/Chinook_Sqlite_AutoIncrementPKs.sqlite' into postgresql:///pgloader + -- including only table names like 'Invoice%' + with include drop, create tables, create indexes, reset sequences set work_mem to '16MB', maintenance_work_mem to '512 MB'; \ No newline at end of file