Implement a Materiaze Views option for MySQL.

This commit is contained in:
Dimitri Fontaine 2013-11-16 21:35:17 +01:00
parent eb468f92fd
commit 718ac80560
6 changed files with 151 additions and 10 deletions

View File

@ -720,7 +720,9 @@ Here's an example:
type date drop not null drop default using zero-dates-to-null,
-- type tinyint to boolean using tinyint-to-boolean,
type year to integer
MATERIALIZE VIEWS film_list, staff_list
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
@ -882,6 +884,24 @@ The `database` command accepts the following clauses and options:
be found un the `pgloader.transforms` Common Lisp package. See above
for details.
- *MATERIALIZE VIEWS*
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will
query the data. It's not possible to just allow for plain `SQL` because
we want to know a lot about the exact data types of each column involved
in the query output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the
following expression:
*name* `AS` `$$` *sql query*
The *name* and the *sql query* will be used in a `CREATE VIEW` statement
at the beginning of the data loading, and the resulting view will then
be dropped at the end of the data loading.
- *INCLUDING ONLY TABLE NAMES MATCHING*
Introduce a comma separated list of table names or *regular expression*

View File

@ -106,6 +106,7 @@
(def-keyword-rule "header")
(def-keyword-rule "null")
(def-keyword-rule "if")
(def-keyword-rule "as")
(def-keyword-rule "blanks")
(def-keyword-rule "date")
(def-keyword-rule "format")
@ -117,11 +118,13 @@
(def-keyword-rule "only")
(def-keyword-rule "drop")
(def-keyword-rule "create")
(def-keyword-rule "materialize")
(def-keyword-rule "reset")
(def-keyword-rule "table")
(def-keyword-rule "name")
(def-keyword-rule "names")
(def-keyword-rule "tables")
(def-keyword-rule "views")
(def-keyword-rule "indexes")
(def-keyword-rule "sequences")
(def-keyword-rule "foreign")
@ -674,6 +677,39 @@
(destructuring-bind (c casts) source
(declare (ignore c))
casts)))
;;;
;;; Materialize views by copying their data over, allows for doing advanced
;;; ETL processing by having parts of the processing happen on the MySQL
;;; query side.
;;;
(defrule view-name (and (alpha-char-p character)
(* (or (alpha-char-p character)
(digit-char-p character)
#\_)))
(:text t))
(defrule view-sql (and kw-as dollar-quoted)
(:destructure (as sql) (declare (ignore as)) sql))
(defrule view-definition (and view-name (? view-sql))
(:destructure (name sql) (cons name sql)))
(defrule another-view-definition (and #\, ignore-whitespace view-definition)
(:lambda (source)
(destructuring-bind (comma ws view) source
(declare (ignore comma ws))
view)))
(defrule views-list (and view-definition (* another-view-definition))
(:lambda (vlist)
(destructuring-bind (view1 views) vlist
(list* view1 views))))
(defrule materialize-views (and kw-materialize kw-views views-list)
(:destructure (mat views list) (declare (ignore mat views)) list))
;;;
;;; Including only some tables or excluding some others
@ -713,13 +749,14 @@
(? mysql-options)
(? gucs)
(? casts)
(? materialize-views)
(? including)
(? excluding)
(? before-load-do)
(? after-load-do))
(:lambda (source)
(destructuring-bind (my-db-uri pg-db-uri options
gucs casts
gucs casts views
incl excl
before after)
source
@ -767,6 +804,7 @@
`(:only-tables ',(list table-name)))
:including ',incl
:excluding ',excl
:materialize-views ',views
:state-before state-before
:state-after state-after
:state-indexes state-idx

View File

@ -96,16 +96,66 @@ order by table_name" dbname only-tables))))
;; free resources
(cl-mysql:disconnect)))
(defun create-my-views (dbname views-alist
&key
(host *myconn-host*)
(user *myconn-user*)
(pass *myconn-pass*))
"VIEWS-ALIST associates view names with their SQL definition, which might
be empty for already existing views. Create only the views for which we
have an SQL definition."
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(cl-mysql:connect :host host :user user :password pass)
(unwind-protect
(progn
(cl-mysql:use dbname)
(loop for (name . def) in views
for sql = (format nil "CREATE VIEW ~a AS ~a" name def)
do
(log-message :info "MySQL: ~a" sql)
(cl-mysql:query sql)))
;; free resources
(cl-mysql:disconnect)))))
(defun drop-my-views (dbname views-alist
&key
(host *myconn-host*)
(user *myconn-user*)
(pass *myconn-pass*))
"See `create-my-views' for VIEWS-ALIST description. This time we DROP the
views to clean out after our work."
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(cl-mysql:connect :host host :user user :password pass)
(unwind-protect
(let ((sql
(format nil "DROP VIEW ~{~a~^, ~};" (mapcar #'car views))))
(cl-mysql:use dbname)
(log-message :info "MySQL: ~a" sql)
(cl-mysql:query sql))
;; free resources
(cl-mysql:disconnect)))))
;;;
;;; Tools to get MySQL table and columns definitions and transform them to
;;; PostgreSQL CREATE TABLE statements, and run those.
;;;
(defvar *table-type* '((:table . "BASE TABLE")
(:view . "VIEW"))
"Associate internal table type symbol with what's found in MySQL
information_schema.tables.table_type column.")
(defun list-all-columns (dbname
&key
only-tables
(host *myconn-host*)
(user *myconn-user*)
(pass *myconn-pass*))
(pass *myconn-pass*)
(table-type :table)
&aux
(table-type-name (cdr (assoc table-type *table-type*))))
"Get the list of MySQL column names per table."
(cl-mysql:connect :host host :user user :password pass)
@ -121,9 +171,9 @@ order by table_name" dbname only-tables))))
c.is_nullable, c.extra
from information_schema.columns c
join information_schema.tables t using(table_schema, table_name)
where c.table_schema = '~a' and t.table_type = 'BASE TABLE'
where c.table_schema = '~a' and t.table_type = '~a'
~@[and table_name in (~{'~a'~^,~})~]
order by table_name, ordinal_position" dbname only-tables)))
order by table_name, ordinal_position" dbname table-type-name only-tables)))
do
(let ((entry (assoc table-name schema :test 'equal))
(column

View File

@ -167,7 +167,8 @@
(identifier-case :downcase) ; or :quote
only-tables
including
excluding)
excluding
materialize-views)
"Export MySQL data and Import it into PostgreSQL"
(let* ((summary (null *state*))
(*state* (or *state* (make-pgstate)))
@ -177,6 +178,8 @@
(copy-kernel (make-kernel 2))
(dbname (source-db mysql))
(pg-dbname (target-db mysql))
(view-names (mapcar #'car materialize-views))
view-columns ; must wait until we created the views
(all-columns (filter-column-list (list-all-columns dbname)
:only-tables only-tables
:including including
@ -225,20 +228,35 @@
;; MySQL allows the same index name being used against several
;; tables, so we add the PostgreSQL table OID in the index name,
;; to differenciate. Set the table oids now.
(set-table-oids all-indexes))
(set-table-oids all-indexes)
;; If asked to materialize views, now is the time to create
;; the target tables for them
(when materialize-views
(create-my-views dbname materialize-views)
(setf view-columns (list-all-columns dbname
:only-tables view-names
:table-type :view))
(create-tables view-columns
:identifier-case identifier-case
:include-drop include-drop)))
;;
;; In case some error happens in the preparatory transaction, we
;; need to stop now and refrain to try loading the data into an
;; incomplete schema.
;;
(cl-mysql-system:mysql-error (e)
(log-message :fatal "~a" e)
(return-from copy-database))
(cl-postgres:database-error (e)
(declare (ignore e)) ; a log has already been printed
(log-message :critical "Failed to create the schema, see above.")
(log-message :fatal "Failed to create the schema, see above.")
(return-from copy-database)))))
(loop
for (table-name . columns) in all-columns
for (table-name . columns) in (append all-columns view-columns)
do
(let ((table-source
(make-instance 'copy-mysql
@ -278,6 +296,11 @@
(lp:end-kernel))
;;
;; If we created some views for this run, now is the time to DROP'em
;;
(when materialize-views
(drop-my-views dbname materialize-views))
;;
;; Now Reset Sequences, the good time to do that is once the whole data
;; has been imported and once we have the indexes in place, as max() is
;; able to benefit from the indexes. In particular avoid doing that step

View File

@ -8,4 +8,12 @@ LOAD DATABASE
type date drop not null drop default using zero-dates-to-null,
type tinyint to boolean using tinyint-to-boolean,
type year to integer,
type timestamp to timestamptz drop not null using zero-dates-to-null;
type timestamp to timestamptz drop not null using zero-dates-to-null
MATERIALIZE VIEWS
d as $$
select cast(d as date) as d, count(*) as n
from plop
where d > '2013-10-02'
group by cast(d as date);
$$;

View File

@ -12,6 +12,8 @@ load database
-- type tinyint to boolean using tinyint-to-boolean,
type year to integer
MATERIALIZE VIEWS film_list, staff_list
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
-- EXCLUDING TABLE NAMES MATCHING ~<ory>