mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-05 02:46:10 +02:00
Implement a Materiaze Views option for MySQL.
This commit is contained in:
parent
eb468f92fd
commit
718ac80560
@ -720,7 +720,9 @@ Here's an example:
|
||||
type date drop not null drop default using zero-dates-to-null,
|
||||
-- type tinyint to boolean using tinyint-to-boolean,
|
||||
type year to integer
|
||||
|
||||
|
||||
MATERIALIZE VIEWS film_list, staff_list
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
|
||||
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
|
||||
|
||||
@ -882,6 +884,24 @@ The `database` command accepts the following clauses and options:
|
||||
be found un the `pgloader.transforms` Common Lisp package. See above
|
||||
for details.
|
||||
|
||||
- *MATERIALIZE VIEWS*
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will
|
||||
query the data. It's not possible to just allow for plain `SQL` because
|
||||
we want to know a lot about the exact data types of each column involved
|
||||
in the query output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the
|
||||
following expression:
|
||||
|
||||
*name* `AS` `$$` *sql query*
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement
|
||||
at the beginning of the data loading, and the resulting view will then
|
||||
be dropped at the end of the data loading.
|
||||
|
||||
- *INCLUDING ONLY TABLE NAMES MATCHING*
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression*
|
||||
|
||||
@ -106,6 +106,7 @@
|
||||
(def-keyword-rule "header")
|
||||
(def-keyword-rule "null")
|
||||
(def-keyword-rule "if")
|
||||
(def-keyword-rule "as")
|
||||
(def-keyword-rule "blanks")
|
||||
(def-keyword-rule "date")
|
||||
(def-keyword-rule "format")
|
||||
@ -117,11 +118,13 @@
|
||||
(def-keyword-rule "only")
|
||||
(def-keyword-rule "drop")
|
||||
(def-keyword-rule "create")
|
||||
(def-keyword-rule "materialize")
|
||||
(def-keyword-rule "reset")
|
||||
(def-keyword-rule "table")
|
||||
(def-keyword-rule "name")
|
||||
(def-keyword-rule "names")
|
||||
(def-keyword-rule "tables")
|
||||
(def-keyword-rule "views")
|
||||
(def-keyword-rule "indexes")
|
||||
(def-keyword-rule "sequences")
|
||||
(def-keyword-rule "foreign")
|
||||
@ -674,6 +677,39 @@
|
||||
(destructuring-bind (c casts) source
|
||||
(declare (ignore c))
|
||||
casts)))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Materialize views by copying their data over, allows for doing advanced
|
||||
;;; ETL processing by having parts of the processing happen on the MySQL
|
||||
;;; query side.
|
||||
;;;
|
||||
(defrule view-name (and (alpha-char-p character)
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\_)))
|
||||
(:text t))
|
||||
|
||||
(defrule view-sql (and kw-as dollar-quoted)
|
||||
(:destructure (as sql) (declare (ignore as)) sql))
|
||||
|
||||
(defrule view-definition (and view-name (? view-sql))
|
||||
(:destructure (name sql) (cons name sql)))
|
||||
|
||||
(defrule another-view-definition (and #\, ignore-whitespace view-definition)
|
||||
(:lambda (source)
|
||||
(destructuring-bind (comma ws view) source
|
||||
(declare (ignore comma ws))
|
||||
view)))
|
||||
|
||||
(defrule views-list (and view-definition (* another-view-definition))
|
||||
(:lambda (vlist)
|
||||
(destructuring-bind (view1 views) vlist
|
||||
(list* view1 views))))
|
||||
|
||||
(defrule materialize-views (and kw-materialize kw-views views-list)
|
||||
(:destructure (mat views list) (declare (ignore mat views)) list))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Including only some tables or excluding some others
|
||||
@ -713,13 +749,14 @@
|
||||
(? mysql-options)
|
||||
(? gucs)
|
||||
(? casts)
|
||||
(? materialize-views)
|
||||
(? including)
|
||||
(? excluding)
|
||||
(? before-load-do)
|
||||
(? after-load-do))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (my-db-uri pg-db-uri options
|
||||
gucs casts
|
||||
gucs casts views
|
||||
incl excl
|
||||
before after)
|
||||
source
|
||||
@ -767,6 +804,7 @@
|
||||
`(:only-tables ',(list table-name)))
|
||||
:including ',incl
|
||||
:excluding ',excl
|
||||
:materialize-views ',views
|
||||
:state-before state-before
|
||||
:state-after state-after
|
||||
:state-indexes state-idx
|
||||
|
||||
@ -96,16 +96,66 @@ order by table_name" dbname only-tables))))
|
||||
;; free resources
|
||||
(cl-mysql:disconnect)))
|
||||
|
||||
(defun create-my-views (dbname views-alist
|
||||
&key
|
||||
(host *myconn-host*)
|
||||
(user *myconn-user*)
|
||||
(pass *myconn-pass*))
|
||||
"VIEWS-ALIST associates view names with their SQL definition, which might
|
||||
be empty for already existing views. Create only the views for which we
|
||||
have an SQL definition."
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(cl-mysql:connect :host host :user user :password pass)
|
||||
(unwind-protect
|
||||
(progn
|
||||
(cl-mysql:use dbname)
|
||||
(loop for (name . def) in views
|
||||
for sql = (format nil "CREATE VIEW ~a AS ~a" name def)
|
||||
do
|
||||
(log-message :info "MySQL: ~a" sql)
|
||||
(cl-mysql:query sql)))
|
||||
;; free resources
|
||||
(cl-mysql:disconnect)))))
|
||||
|
||||
(defun drop-my-views (dbname views-alist
|
||||
&key
|
||||
(host *myconn-host*)
|
||||
(user *myconn-user*)
|
||||
(pass *myconn-pass*))
|
||||
"See `create-my-views' for VIEWS-ALIST description. This time we DROP the
|
||||
views to clean out after our work."
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(cl-mysql:connect :host host :user user :password pass)
|
||||
(unwind-protect
|
||||
(let ((sql
|
||||
(format nil "DROP VIEW ~{~a~^, ~};" (mapcar #'car views))))
|
||||
(cl-mysql:use dbname)
|
||||
(log-message :info "MySQL: ~a" sql)
|
||||
(cl-mysql:query sql))
|
||||
;; free resources
|
||||
(cl-mysql:disconnect)))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Tools to get MySQL table and columns definitions and transform them to
|
||||
;;; PostgreSQL CREATE TABLE statements, and run those.
|
||||
;;;
|
||||
(defvar *table-type* '((:table . "BASE TABLE")
|
||||
(:view . "VIEW"))
|
||||
"Associate internal table type symbol with what's found in MySQL
|
||||
information_schema.tables.table_type column.")
|
||||
|
||||
(defun list-all-columns (dbname
|
||||
&key
|
||||
only-tables
|
||||
(host *myconn-host*)
|
||||
(user *myconn-user*)
|
||||
(pass *myconn-pass*))
|
||||
(pass *myconn-pass*)
|
||||
(table-type :table)
|
||||
&aux
|
||||
(table-type-name (cdr (assoc table-type *table-type*))))
|
||||
"Get the list of MySQL column names per table."
|
||||
(cl-mysql:connect :host host :user user :password pass)
|
||||
|
||||
@ -121,9 +171,9 @@ order by table_name" dbname only-tables))))
|
||||
c.is_nullable, c.extra
|
||||
from information_schema.columns c
|
||||
join information_schema.tables t using(table_schema, table_name)
|
||||
where c.table_schema = '~a' and t.table_type = 'BASE TABLE'
|
||||
where c.table_schema = '~a' and t.table_type = '~a'
|
||||
~@[and table_name in (~{'~a'~^,~})~]
|
||||
order by table_name, ordinal_position" dbname only-tables)))
|
||||
order by table_name, ordinal_position" dbname table-type-name only-tables)))
|
||||
do
|
||||
(let ((entry (assoc table-name schema :test 'equal))
|
||||
(column
|
||||
|
||||
@ -167,7 +167,8 @@
|
||||
(identifier-case :downcase) ; or :quote
|
||||
only-tables
|
||||
including
|
||||
excluding)
|
||||
excluding
|
||||
materialize-views)
|
||||
"Export MySQL data and Import it into PostgreSQL"
|
||||
(let* ((summary (null *state*))
|
||||
(*state* (or *state* (make-pgstate)))
|
||||
@ -177,6 +178,8 @@
|
||||
(copy-kernel (make-kernel 2))
|
||||
(dbname (source-db mysql))
|
||||
(pg-dbname (target-db mysql))
|
||||
(view-names (mapcar #'car materialize-views))
|
||||
view-columns ; must wait until we created the views
|
||||
(all-columns (filter-column-list (list-all-columns dbname)
|
||||
:only-tables only-tables
|
||||
:including including
|
||||
@ -225,20 +228,35 @@
|
||||
;; MySQL allows the same index name being used against several
|
||||
;; tables, so we add the PostgreSQL table OID in the index name,
|
||||
;; to differenciate. Set the table oids now.
|
||||
(set-table-oids all-indexes))
|
||||
(set-table-oids all-indexes)
|
||||
|
||||
;; If asked to materialize views, now is the time to create
|
||||
;; the target tables for them
|
||||
(when materialize-views
|
||||
(create-my-views dbname materialize-views)
|
||||
(setf view-columns (list-all-columns dbname
|
||||
:only-tables view-names
|
||||
:table-type :view))
|
||||
(create-tables view-columns
|
||||
:identifier-case identifier-case
|
||||
:include-drop include-drop)))
|
||||
|
||||
;;
|
||||
;; In case some error happens in the preparatory transaction, we
|
||||
;; need to stop now and refrain to try loading the data into an
|
||||
;; incomplete schema.
|
||||
;;
|
||||
(cl-mysql-system:mysql-error (e)
|
||||
(log-message :fatal "~a" e)
|
||||
(return-from copy-database))
|
||||
|
||||
(cl-postgres:database-error (e)
|
||||
(declare (ignore e)) ; a log has already been printed
|
||||
(log-message :critical "Failed to create the schema, see above.")
|
||||
(log-message :fatal "Failed to create the schema, see above.")
|
||||
(return-from copy-database)))))
|
||||
|
||||
(loop
|
||||
for (table-name . columns) in all-columns
|
||||
for (table-name . columns) in (append all-columns view-columns)
|
||||
do
|
||||
(let ((table-source
|
||||
(make-instance 'copy-mysql
|
||||
@ -278,6 +296,11 @@
|
||||
(lp:end-kernel))
|
||||
|
||||
;;
|
||||
;; If we created some views for this run, now is the time to DROP'em
|
||||
;;
|
||||
(when materialize-views
|
||||
(drop-my-views dbname materialize-views))
|
||||
;;
|
||||
;; Now Reset Sequences, the good time to do that is once the whole data
|
||||
;; has been imported and once we have the indexes in place, as max() is
|
||||
;; able to benefit from the indexes. In particular avoid doing that step
|
||||
|
||||
@ -8,4 +8,12 @@ LOAD DATABASE
|
||||
type date drop not null drop default using zero-dates-to-null,
|
||||
type tinyint to boolean using tinyint-to-boolean,
|
||||
type year to integer,
|
||||
type timestamp to timestamptz drop not null using zero-dates-to-null;
|
||||
type timestamp to timestamptz drop not null using zero-dates-to-null
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
d as $$
|
||||
select cast(d as date) as d, count(*) as n
|
||||
from plop
|
||||
where d > '2013-10-02'
|
||||
group by cast(d as date);
|
||||
$$;
|
||||
|
||||
@ -12,6 +12,8 @@ load database
|
||||
-- type tinyint to boolean using tinyint-to-boolean,
|
||||
type year to integer
|
||||
|
||||
MATERIALIZE VIEWS film_list, staff_list
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
|
||||
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user