Improve SQLite table names filtering.

Filter the list of tables we migrate directly from the SQLite query,
avoiding to return useless data. To do that, use the LIKE pattern
matching supported by SQLite, where the REGEX operator is only available
when extra features are loaded apparently.

See #310 where filtering out the view still caused errors in the
loading.
This commit is contained in:
Dimitri Fontaine 2015-11-22 22:10:26 +01:00
parent a81f017222
commit e23de0ce9f
8 changed files with 99 additions and 102 deletions

View File

@ -1,7 +1,7 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "PGLOADER" "1" "August 2015" "ff" ""
.TH "PGLOADER" "1" "November 2015" "ff" ""
.
.SH "NAME"
\fBpgloader\fR \- PostgreSQL data loader
@ -1450,7 +1450,7 @@ This command instructs pgloader to load data from an IBM \fBIXF\fR file\. Here\'
LOAD IXF
FROM data/nsitra\.test1\.ixf
INTO postgresql:///pgloader?nsitra\.test1
WITH truncate, create table
WITH truncate, create table, timezone UTC
BEFORE LOAD DO
$$ create schema if not exists nsitra; $$,
@ -1502,6 +1502,12 @@ When this option is listed, pgloader creates the table using the meta data found
.IP
This options expects as its value the possibly qualified name of the table to create\.
.
.IP "\(bu" 4
\fItimezone\fR
.
.IP
This options allows to specify which timezone is used when parsing timestamps from an IXF file, and defaults to \fIUTC\fR\. Expected values are either \fBUTC\fR, \fBGMT\fR or a single quoted location name such as \fB\'Universal\'\fR or \fB\'Europe/Paris\'\fR\.
.
.IP "" 0
.
@ -2367,10 +2373,10 @@ The cast clause allows to specify custom casting rules, either to overload the d
Please refer to the MySQL CAST clause for details\.
.
.IP "\(bu" 4
\fIINCLUDING ONLY TABLE NAMES MATCHING\fR
\fIINCLUDING ONLY TABLE NAMES LIKE\fR
.
.IP
Introduce a comma separated list of table names or \fIregular expression\fR used to limit the tables to migrate to a sublist\.
Introduce a comma separated list of table name patterns used to limit the tables to migrate to a sublist\.
.
.IP
Example:
@ -2379,7 +2385,7 @@ Example:
.
.nf
INCLUDING ONLY TABLE NAMES MATCHING ~/film/, \'actor\'
INCLUDING ONLY TABLE NAMES LIKE \'Invoice%\'
.
.fi
.
@ -2387,16 +2393,16 @@ INCLUDING ONLY TABLE NAMES MATCHING ~/film/, \'actor\'
.
.IP "\(bu" 4
\fIEXCLUDING TABLE NAMES MATCHING\fR
\fIEXCLUDING TABLE NAMES LIKE\fR
.
.IP
Introduce a comma separated list of table names or \fIregular expression\fR used to exclude table names from the migration\. This filter only applies to the result of the \fIINCLUDING\fR filter\.
Introduce a comma separated list of table name patterns used to exclude table names from the migration\. This filter only applies to the result of the \fIINCLUDING\fR filter\.
.
.IP "" 4
.
.nf
EXCLUDING TABLE NAMES MATCHING ~<ory>
EXCLUDING TABLE NAMES LIKE \'appointments\'
.
.fi
.

View File

@ -1979,22 +1979,22 @@ The `sqlite` command accepts the following clauses and options:
Please refer to the MySQL CAST clause for details.
- *INCLUDING ONLY TABLE NAMES MATCHING*
- *INCLUDING ONLY TABLE NAMES LIKE*
Introduce a comma separated list of table names or *regular expression*
used to limit the tables to migrate to a sublist.
Introduce a comma separated list of table name patterns used to limit
the tables to migrate to a sublist.
Example:
INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
INCLUDING ONLY TABLE NAMES LIKE 'Invoice%'
- *EXCLUDING TABLE NAMES MATCHING*
- *EXCLUDING TABLE NAMES LIKE*
Introduce a comma separated list of table names or *regular expression*
used to exclude table names from the migration. This filter only applies
to the result of the *INCLUDING* filter.
Introduce a comma separated list of table name patterns used to exclude
table names from the migration. This filter only applies to the result
of the *INCLUDING* filter.
EXCLUDING TABLE NAMES MATCHING ~<ory>
EXCLUDING TABLE NAMES LIKE 'appointments'
### DEFAULT SQLite CASTING RULES

View File

@ -106,6 +106,7 @@
(:file "command-dbf")
(:file "command-cast-rules")
(:file "command-mysql")
(:file "command-including-like")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-archive")

View File

@ -43,19 +43,6 @@
(bind (((_ opts) source))
(cons :mssql-options opts))))
(defrule like-expression (and "'" (+ (not "'")) "'")
(:lambda (le)
(bind (((_ like _) le)) (text like))))
(defrule another-like-expression (and comma like-expression)
(:lambda (source)
(bind (((_ like) source)) like)))
(defrule filter-list-like (and like-expression (* another-like-expression))
(:lambda (source)
(destructuring-bind (filter1 filters) source
(list* filter1 filters))))
(defrule including-in-schema
(and kw-including kw-only kw-table kw-names kw-like filter-list-like
kw-in kw-schema quoted-namestring)
@ -63,7 +50,8 @@
(bind (((_ _ _ _ _ filter-list _ _ schema) source))
(cons schema filter-list))))
(defrule including-like (and including-in-schema (* including-in-schema))
(defrule including-like-in-schema
(and including-in-schema (* including-in-schema))
(:lambda (source)
(destructuring-bind (inc1 incs) source
(cons :including (list* inc1 incs)))))
@ -75,7 +63,8 @@
(bind (((_ _ _ _ filter-list _ _ schema) source))
(cons schema filter-list))))
(defrule excluding-like (and excluding-in-schema (* excluding-in-schema))
(defrule excluding-like-in-schema
(and excluding-in-schema (* excluding-in-schema))
(:lambda (source)
(destructuring-bind (excl1 excls) source
(cons :excluding (list* excl1 excls)))))
@ -89,8 +78,8 @@
casts
before-load
after-load
including-like
excluding-like))
including-like-in-schema
excluding-like-in-schema))
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))

View File

@ -49,6 +49,18 @@ load database
(bind (((_ opts) source))
(cons :sqlite-options opts))))
(defrule including-like
(and kw-including kw-only kw-table kw-names kw-like filter-list-like)
(:lambda (source)
(bind (((_ _ _ _ _ filter-list) source))
(cons :including filter-list))))
(defrule excluding-like
(and kw-excluding kw-table kw-names kw-like filter-list-like)
(:lambda (source)
(bind (((_ _ _ _ filter-list) source))
(cons :excluding filter-list))))
(defrule sqlite-db-uri (and "sqlite://" filename)
(:lambda (source)
(bind (((_ filename) source)) filename)))
@ -77,8 +89,8 @@ load database
(defrule load-sqlite-optional-clauses (* (or sqlite-options
gucs
casts
including-matching
excluding-matching))
including-like
excluding-like))
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))

View File

@ -6,11 +6,32 @@
(defvar *sqlite-db* nil
"The SQLite database connection handler.")
(defun list-tables (&optional (db *sqlite-db*))
(defun filter-list-to-where-clause (filter-list
&optional
not
(table-col "tbl_name"))
"Given an INCLUDING or EXCLUDING clause, turn it into a SQLite WHERE clause."
(mapcar (lambda (table-name)
(format nil "(~a ~:[~;NOT ~]LIKE '~a')"
table-col not table-name))
filter-list))
(defun list-tables (&key
(db *sqlite-db*)
including
excluding)
"Return the list of tables found in SQLITE-DB."
(let ((sql "SELECT tbl_name
(let ((sql (format nil "SELECT tbl_name
FROM sqlite_master
WHERE type='table' AND tbl_name <> 'sqlite_sequence'"))
WHERE type='table'
AND tbl_name <> 'sqlite_sequence'
~:[~*~;AND (~{~a~^~&~10t or ~})~]
~:[~*~;AND (~{~a~^~&~10t and ~})~]"
including ; do we print the clause?
(filter-list-to-where-clause including nil)
excluding ; do we print the clause?
(filter-list-to-where-clause excluding t))))
(log-message :info "~a" sql)
(loop for (name) in (sqlite:execute-to-list db sql)
collect name)))
@ -28,10 +49,15 @@
(unquote default)
pk-id))))
(defun list-all-columns (&optional (db *sqlite-db*))
(defun list-all-columns (&key
(db *sqlite-db*)
including
excluding)
"Get the list of SQLite column definitions per table."
(loop for table-name in (list-tables db)
collect (cons table-name (list-columns table-name db))))
(loop :for table-name :in (list-tables :db db
:including including
:excluding excluding)
:collect (cons table-name (list-columns table-name db))))
(defstruct sqlite-idx name table-name sql)
@ -42,11 +68,22 @@
"Generate the PostgresQL statement to build the given SQLite index definition."
(sqlite-idx-sql index))
(defun list-all-indexes (&optional (db *sqlite-db*))
(defun list-all-indexes (&key
(db *sqlite-db*)
including
excluding)
"Get the list of SQLite index definitions per table."
(let ((sql "SELECT name, tbl_name, replace(replace(sql, '[', ''), ']', '')
FROM sqlite_master
WHERE type='index'"))
(let ((sql (format nil
"SELECT name, tbl_name, replace(replace(sql, '[', ''), ']', '')
FROM sqlite_master
WHERE type='index'
~:[~*~;AND (~{~a~^~&~10t or ~})~]
~:[~*~;AND (~{~a~^~&~10t and ~})~]"
including ; do we print the clause?
(filter-list-to-where-clause including nil)
excluding ; do we print the clause?
(filter-list-to-where-clause excluding t))))
(log-message :info "~a" sql)
(loop :with schema := nil
:for (index-name table-name sql) :in (sqlite:execute-to-list db sql)
:when sql
@ -59,53 +96,3 @@
(push-to-end (cons table-name (list idxdef)) schema)))
:finally (return schema))))
;;;
;;; Filtering lists of columns and indexes
;;;
;;; A list of columns is expected to be an alist of table-name associated
;;; with a list of objects (clos or structures) that define the generic API
;;; described in src/pgsql/schema.lisp
;;;
(defun filter-column-list (all-columns &key only-tables including excluding)
"Apply the filtering defined by the arguments:
- keep only tables listed in ONLY-TABLES, or all of them if ONLY-TABLES
is nil,
- then unless EXCLUDING is nil, filter out the resulting list by
applying the EXCLUDING regular expression list to table names in the
all-columns list: we only keep the table names that match none of the
regex in the EXCLUDING list
- then unless INCLUDING is nil, only keep remaining elements that
matches at least one of the INCLUDING regular expression list."
(labels ((apply-filtering-rule (rule)
(declare (special table-name))
(typecase rule
(string (string-equal rule table-name))
(list (destructuring-bind (type val) rule
(ecase type
(:regex (cl-ppcre:scan val table-name)))))))
(only (entry)
(let ((table-name (first entry)))
(or (null only-tables)
(member table-name only-tables :test #'equal))))
(exclude (entry)
(let ((table-name (first entry)))
(declare (special table-name))
(or (null excluding)
(notany #'apply-filtering-rule excluding))))
(include (entry)
(let ((table-name (first entry)))
(declare (special table-name))
(or (null including)
(some #'apply-filtering-rule including)))))
(remove-if-not #'include
(remove-if-not #'exclude
(remove-if-not #'only all-columns)))))

View File

@ -125,13 +125,13 @@
:section :pre)
(with-connection (conn (source-db sqlite))
(let ((*sqlite-db* (conn-handle conn)))
(setf all-columns (filter-column-list (list-all-columns *sqlite-db*)
:including including
:excluding excluding)
(setf all-columns (list-all-columns :db *sqlite-db*
:including including
:excluding excluding)
all-indexes (filter-column-list (list-all-indexes *sqlite-db*)
:including including
:excluding excluding)))
all-indexes (list-all-indexes :db *sqlite-db*
:including including
:excluding excluding)))
;; return how many objects we're going to deal with in total
;; for stats collection

View File

@ -2,6 +2,8 @@ load database
from 'sqlite/Chinook_Sqlite_AutoIncrementPKs.sqlite'
into postgresql:///pgloader
-- including only table names like 'Invoice%'
with include drop, create tables, create indexes, reset sequences
set work_mem to '16MB', maintenance_work_mem to '512 MB';