Fix #40 by providing a per-table forced-encoding option.

This patch takes benefits from the recent patch
62fc85a1cf
so that you will need to freshen your local Qmynd copy if you want to
test from sources.
This commit is contained in:
Dimitri Fontaine 2014-03-03 23:39:22 +01:00
parent 1461cda1c0
commit 46fd6632f2
4 changed files with 80 additions and 17 deletions

View File

@ -816,7 +816,8 @@ Here's an example:
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
-- DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
BEFORE LOAD DO
$$ create schema if not exists sakila; $$;
@ -1063,12 +1064,24 @@ The `database` command accepts the following clauses and options:
- *EXCLUDING TABLE NAMES MATCHING*
Introduce a comma separated list of table names or *rugular expression*
Introduce a comma separated list of table names or *regular expression*
used to exclude table names from the migration. This filter only applies
to the result of the *INCLUDING* filter.
EXCLUDING TABLE NAMES MATCHING ~<ory>
- *DECODING TABLE NAMES MATCHING*
Introduce a comma separated list of table names or *regular expressions*
used to force the encoding to use when processing data from MySQL. If
the data encoding known to you is different from MySQL's idea about it,
this is the option to use.
DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
You can use as many such rules as you need, all with possibly different
encodings.
- *BEFORE LOAD DO*
You can run SQL queries against the database before loading the data

View File

@ -93,6 +93,7 @@
(def-keyword-rule "log")
(def-keyword-rule "level")
(def-keyword-rule "encoding")
(def-keyword-rule "decoding")
(def-keyword-rule "truncate")
(def-keyword-rule "lines")
(def-keyword-rule "fields")
@ -804,6 +805,20 @@
(declare (ignore e table n m))
filter-list)))
;;;
;;; Per table encoding options, because MySQL is so bad at encoding...
;;;
(defrule decoding-table-as (and kw-decoding kw-table kw-names kw-matching
filter-list
kw-as encoding)
(:lambda (source)
(destructuring-bind (d table n m filter-list as encoding) source
(declare (ignore d table n m as))
(cons encoding filter-list))))
(defrule decoding-tables-as (* decoding-table-as))
;;; LOAD DATABASE FROM mysql://
(defrule load-mysql-database (and database-source target
@ -813,12 +828,13 @@
(? materialize-views)
(? including)
(? excluding)
(? decoding-tables-as)
(? before-load-do)
(? after-load-do))
(:lambda (source)
(destructuring-bind (my-db-uri pg-db-uri options
gucs casts views
incl excl
incl excl decoding-as
before after)
source
(destructuring-bind (&key ((:host myhost))
@ -867,6 +883,7 @@
`(:only-tables ',(list table-name)))
:including ',incl
:excluding ',excl
:decoding-as ',decoding-as
:materialize-views ',views
:state-before state-before
:state-after state-after

View File

@ -4,7 +4,10 @@
(in-package :pgloader.mysql)
(defclass copy-mysql (copy) ()
(defclass copy-mysql (copy)
((encoding :accessor encoding ; allows forcing encoding
:initarg :encoding
:initform nil))
(:documentation "pgloader MySQL Data Source"))
(defun cast-mysql-column-definition-to-pgsql (mysql-column)
@ -60,10 +63,14 @@
(defmethod map-rows ((mysql copy-mysql) &key process-row-fn)
"Extract MySQL data and call PROCESS-ROW-FN function with a single
argument (a list of column values) for each row."
(let ((dbname (source-db mysql))
(table-name (source mysql)))
(let ((dbname (source-db mysql))
(table-name (source mysql))
(qmynd:*mysql-encoding* (encoding mysql)))
(with-mysql-connection (dbname)
(when qmynd:*mysql-encoding*
(log-message :notice "Force encoding to ~a for ~a"
qmynd:*mysql-encoding* table-name))
(let* ((cols (get-column-list dbname table-name))
(sql (format nil "SELECT ~{~a~^, ~} FROM `~a`;" cols table-name))
(row-fn
@ -269,6 +276,18 @@
:all-indexes all-indexes
:view-columns view-columns)))
(defun apply-decoding-as-filters (table-name filters)
"Return a generialized boolean which is non-nil only if TABLE-NAME matches
one of the FILTERS."
(flet ((apply-filter (filter)
;; we close over table-name here.
(typecase filter
(string (string-equal filter table-name))
(list (destructuring-bind (type val) filter
(ecase type
(:regex (cl-ppcre:scan val table-name))))))))
(some #'apply-filter filters)))
;;;
;;; Work on all tables for given database
;;;
@ -289,6 +308,7 @@
only-tables
including
excluding
decoding-as
materialize-views)
"Export MySQL data and Import it into PostgreSQL"
(let* ((summary (null *state*))
@ -346,15 +366,25 @@
(loop
for (table-name . columns) in (append all-columns view-columns)
do
(let ((table-source
(make-instance 'copy-mysql
:source-db dbname
:target-db pg-dbname
:source table-name
:target (apply-identifier-case table-name
identifier-case)
:fields columns)))
(let* ((encoding
;; force the data encoding when asked to
(when decoding-as
(loop :for (encoding . filters) :in decoding-as
:when (apply-decoding-as-filters table-name filters)
:return encoding)))
(table-source
(make-instance 'copy-mysql
:source-db dbname
:target-db pg-dbname
:source table-name
:target (apply-identifier-case table-name
identifier-case)
:fields columns
:encoding encoding)))
(log-message :debug "TARGET: ~a" (target table-source))
;; first COPY the data from MySQL to PostgreSQL, using copy-kernel
(unless schema-only
(copy-from table-source :kernel copy-kernel :truncate truncate))

View File

@ -16,12 +16,15 @@ LOAD DATABASE
column enumerate.foo using empty-string-to-null
-- INCLUDING ONLY TABLE NAMES MATCHING ~/encoding/;
MATERIALIZE VIEWS
d as $$
select cast(d as date) as d, count(*) as n
from plop
where d > '2013-10-02'
group by cast(d as date);
$$;
$$
-- INCLUDING ONLY TABLE NAMES MATCHING ~/encoding/
DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8;