mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 10:31:02 +02:00
Fix #40 by providing a per-table forced-encoding option.
This patch takes benefits from the recent patch
62fc85a1cf
so that you will need to freshen your local Qmynd copy if you want to
test from sources.
This commit is contained in:
parent
1461cda1c0
commit
46fd6632f2
@ -816,7 +816,8 @@ Here's an example:
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
|
||||
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
|
||||
|
||||
-- DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create schema if not exists sakila; $$;
|
||||
|
||||
@ -1063,12 +1064,24 @@ The `database` command accepts the following clauses and options:
|
||||
|
||||
- *EXCLUDING TABLE NAMES MATCHING*
|
||||
|
||||
Introduce a comma separated list of table names or *rugular expression*
|
||||
Introduce a comma separated list of table names or *regular expression*
|
||||
used to exclude table names from the migration. This filter only applies
|
||||
to the result of the *INCLUDING* filter.
|
||||
|
||||
EXCLUDING TABLE NAMES MATCHING ~<ory>
|
||||
|
||||
- *DECODING TABLE NAMES MATCHING*
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
used to force the encoding to use when processing data from MySQL. If
|
||||
the data encoding known to you is different from MySQL's idea about it,
|
||||
this is the option to use.
|
||||
|
||||
DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
|
||||
|
||||
You can use as many such rules as you need, all with possibly different
|
||||
encodings.
|
||||
|
||||
- *BEFORE LOAD DO*
|
||||
|
||||
You can run SQL queries against the database before loading the data
|
||||
|
||||
@ -93,6 +93,7 @@
|
||||
(def-keyword-rule "log")
|
||||
(def-keyword-rule "level")
|
||||
(def-keyword-rule "encoding")
|
||||
(def-keyword-rule "decoding")
|
||||
(def-keyword-rule "truncate")
|
||||
(def-keyword-rule "lines")
|
||||
(def-keyword-rule "fields")
|
||||
@ -804,6 +805,20 @@
|
||||
(declare (ignore e table n m))
|
||||
filter-list)))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Per table encoding options, because MySQL is so bad at encoding...
|
||||
;;;
|
||||
(defrule decoding-table-as (and kw-decoding kw-table kw-names kw-matching
|
||||
filter-list
|
||||
kw-as encoding)
|
||||
(:lambda (source)
|
||||
(destructuring-bind (d table n m filter-list as encoding) source
|
||||
(declare (ignore d table n m as))
|
||||
(cons encoding filter-list))))
|
||||
|
||||
(defrule decoding-tables-as (* decoding-table-as))
|
||||
|
||||
|
||||
;;; LOAD DATABASE FROM mysql://
|
||||
(defrule load-mysql-database (and database-source target
|
||||
@ -813,12 +828,13 @@
|
||||
(? materialize-views)
|
||||
(? including)
|
||||
(? excluding)
|
||||
(? decoding-tables-as)
|
||||
(? before-load-do)
|
||||
(? after-load-do))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (my-db-uri pg-db-uri options
|
||||
gucs casts views
|
||||
incl excl
|
||||
incl excl decoding-as
|
||||
before after)
|
||||
source
|
||||
(destructuring-bind (&key ((:host myhost))
|
||||
@ -867,6 +883,7 @@
|
||||
`(:only-tables ',(list table-name)))
|
||||
:including ',incl
|
||||
:excluding ',excl
|
||||
:decoding-as ',decoding-as
|
||||
:materialize-views ',views
|
||||
:state-before state-before
|
||||
:state-after state-after
|
||||
|
||||
@ -4,7 +4,10 @@
|
||||
|
||||
(in-package :pgloader.mysql)
|
||||
|
||||
(defclass copy-mysql (copy) ()
|
||||
(defclass copy-mysql (copy)
|
||||
((encoding :accessor encoding ; allows forcing encoding
|
||||
:initarg :encoding
|
||||
:initform nil))
|
||||
(:documentation "pgloader MySQL Data Source"))
|
||||
|
||||
(defun cast-mysql-column-definition-to-pgsql (mysql-column)
|
||||
@ -60,10 +63,14 @@
|
||||
(defmethod map-rows ((mysql copy-mysql) &key process-row-fn)
|
||||
"Extract MySQL data and call PROCESS-ROW-FN function with a single
|
||||
argument (a list of column values) for each row."
|
||||
(let ((dbname (source-db mysql))
|
||||
(table-name (source mysql)))
|
||||
(let ((dbname (source-db mysql))
|
||||
(table-name (source mysql))
|
||||
(qmynd:*mysql-encoding* (encoding mysql)))
|
||||
|
||||
(with-mysql-connection (dbname)
|
||||
(when qmynd:*mysql-encoding*
|
||||
(log-message :notice "Force encoding to ~a for ~a"
|
||||
qmynd:*mysql-encoding* table-name))
|
||||
(let* ((cols (get-column-list dbname table-name))
|
||||
(sql (format nil "SELECT ~{~a~^, ~} FROM `~a`;" cols table-name))
|
||||
(row-fn
|
||||
@ -269,6 +276,18 @@
|
||||
:all-indexes all-indexes
|
||||
:view-columns view-columns)))
|
||||
|
||||
(defun apply-decoding-as-filters (table-name filters)
|
||||
"Return a generialized boolean which is non-nil only if TABLE-NAME matches
|
||||
one of the FILTERS."
|
||||
(flet ((apply-filter (filter)
|
||||
;; we close over table-name here.
|
||||
(typecase filter
|
||||
(string (string-equal filter table-name))
|
||||
(list (destructuring-bind (type val) filter
|
||||
(ecase type
|
||||
(:regex (cl-ppcre:scan val table-name))))))))
|
||||
(some #'apply-filter filters)))
|
||||
|
||||
;;;
|
||||
;;; Work on all tables for given database
|
||||
;;;
|
||||
@ -289,6 +308,7 @@
|
||||
only-tables
|
||||
including
|
||||
excluding
|
||||
decoding-as
|
||||
materialize-views)
|
||||
"Export MySQL data and Import it into PostgreSQL"
|
||||
(let* ((summary (null *state*))
|
||||
@ -346,15 +366,25 @@
|
||||
(loop
|
||||
for (table-name . columns) in (append all-columns view-columns)
|
||||
do
|
||||
(let ((table-source
|
||||
(make-instance 'copy-mysql
|
||||
:source-db dbname
|
||||
:target-db pg-dbname
|
||||
:source table-name
|
||||
:target (apply-identifier-case table-name
|
||||
identifier-case)
|
||||
:fields columns)))
|
||||
(let* ((encoding
|
||||
;; force the data encoding when asked to
|
||||
(when decoding-as
|
||||
(loop :for (encoding . filters) :in decoding-as
|
||||
:when (apply-decoding-as-filters table-name filters)
|
||||
:return encoding)))
|
||||
|
||||
(table-source
|
||||
(make-instance 'copy-mysql
|
||||
:source-db dbname
|
||||
:target-db pg-dbname
|
||||
:source table-name
|
||||
:target (apply-identifier-case table-name
|
||||
identifier-case)
|
||||
:fields columns
|
||||
:encoding encoding)))
|
||||
|
||||
(log-message :debug "TARGET: ~a" (target table-source))
|
||||
|
||||
;; first COPY the data from MySQL to PostgreSQL, using copy-kernel
|
||||
(unless schema-only
|
||||
(copy-from table-source :kernel copy-kernel :truncate truncate))
|
||||
|
||||
@ -16,12 +16,15 @@ LOAD DATABASE
|
||||
|
||||
column enumerate.foo using empty-string-to-null
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/encoding/;
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
d as $$
|
||||
select cast(d as date) as d, count(*) as n
|
||||
from plop
|
||||
where d > '2013-10-02'
|
||||
group by cast(d as date);
|
||||
$$;
|
||||
$$
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/encoding/
|
||||
|
||||
DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user