mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-05 10:56:10 +02:00
Merge branch 'master' into debian
This commit is contained in:
commit
de38a4473a
@ -1,3 +1,5 @@
|
||||
.git
|
||||
.vagrant
|
||||
build
|
||||
Dockerfile
|
||||
Dockerfile.ccl
|
||||
59
Dockerfile
59
Dockerfile
@ -1,20 +1,47 @@
|
||||
FROM debian:stretch
|
||||
MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
|
||||
FROM debian:stable-slim as builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
wget curl make git bzip2 time \
|
||||
ca-certificates \
|
||||
libzip-dev libssl1.1 openssl \
|
||||
patch unzip libsqlite3-dev gawk \
|
||||
freetds-dev sbcl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
git \
|
||||
libsqlite3-dev \
|
||||
libssl1.1 \
|
||||
libzip-dev \
|
||||
make \
|
||||
openssl \
|
||||
patch \
|
||||
sbcl \
|
||||
time \
|
||||
unzip \
|
||||
wget \
|
||||
cl-ironclad \
|
||||
cl-babel \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ADD ./ /opt/src/pgloader
|
||||
WORKDIR /opt/src/pgloader
|
||||
COPY ./ /opt/src/pgloader
|
||||
|
||||
# build/ is in the .dockerignore file, but we actually need it now
|
||||
RUN mkdir -p build/bin
|
||||
RUN make
|
||||
RUN mkdir -p /opt/src/pgloader/build/bin \
|
||||
&& cd /opt/src/pgloader \
|
||||
&& make
|
||||
|
||||
RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
FROM debian:stable-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
libsqlite3-dev \
|
||||
libzip-dev \
|
||||
make \
|
||||
sbcl \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
|
||||
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
|
||||
|
||||
@ -1,25 +1,51 @@
|
||||
FROM debian:stretch
|
||||
MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
|
||||
FROM debian:stable-slim as builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
wget curl make git bzip2 time \
|
||||
ca-certificates \
|
||||
libzip-dev libssl1.1 openssl \
|
||||
patch unzip libsqlite3-dev gawk \
|
||||
freetds-dev sbcl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
git \
|
||||
libsqlite3-dev \
|
||||
libssl1.1 \
|
||||
libzip-dev \
|
||||
make \
|
||||
openssl \
|
||||
patch \
|
||||
sbcl \
|
||||
time \
|
||||
unzip \
|
||||
wget \
|
||||
cl-ironclad \
|
||||
cl-babel \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/src
|
||||
RUN curl --location -O https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz
|
||||
RUN tar xf ccl-1.11.5-linuxx86.tar.gz
|
||||
RUN cp /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
|
||||
RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \
|
||||
| tar xz -C /usr/local/src/ \
|
||||
&& mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
|
||||
|
||||
ADD ./ /opt/src/pgloader
|
||||
WORKDIR /opt/src/pgloader
|
||||
COPY ./ /opt/src/pgloader
|
||||
|
||||
# build/ is in the .dockerignore file, but we actually need it now
|
||||
RUN mkdir -p build/bin
|
||||
RUN make CL=ccl DYNSIZE=256
|
||||
RUN mkdir -p /opt/src/pgloader/build/bin \
|
||||
&& cd /opt/src/pgloader \
|
||||
&& make CL=ccl DYNSIZE=256
|
||||
|
||||
RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
FROM debian:stable-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
libsqlite3-dev \
|
||||
libzip-dev \
|
||||
make \
|
||||
sbcl \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
|
||||
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
|
||||
|
||||
26
Makefile
26
Makefile
@ -1,6 +1,6 @@
|
||||
# pgloader build tool
|
||||
APP_NAME = pgloader
|
||||
VERSION = 3.5.2
|
||||
VERSION = 3.6.1
|
||||
|
||||
# use either sbcl or ccl
|
||||
CL = sbcl
|
||||
@ -24,7 +24,7 @@ QLDIR = $(BUILDDIR)/quicklisp
|
||||
MANIFEST = $(BUILDDIR)/manifest.ql
|
||||
LATEST = $(BUILDDIR)/pgloader-latest.tgz
|
||||
|
||||
BUNDLEDIST = 2018-04-30
|
||||
BUNDLEDIST = 2019-01-07
|
||||
BUNDLENAME = pgloader-bundle-$(VERSION)
|
||||
BUNDLEDIR = $(BUILDDIR)/bundle/$(BUNDLENAME)
|
||||
BUNDLE = $(BUILDDIR)/$(BUNDLENAME).tgz
|
||||
@ -99,8 +99,11 @@ clones: $(QLDIR)/local-projects/cl-ixf \
|
||||
$(QLDIR)/local-projects/cl-csv \
|
||||
$(QLDIR)/local-projects/qmynd ;
|
||||
|
||||
$(LIBS): $(QLDIR)/setup.lisp clones
|
||||
$(LIBS): $(QLDIR)/setup.lisp
|
||||
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
|
||||
--eval '(push :pgloader-image *features*)' \
|
||||
--eval '(setf *print-circle* t *print-pretty* t)' \
|
||||
--eval '(ql:quickload "pgloader")' \
|
||||
--eval '(push "$(PWD)/" ql:*local-project-directories*)' \
|
||||
--eval '(ql:quickload "pgloader")' \
|
||||
--eval '(quit)'
|
||||
@ -141,8 +144,11 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
|
||||
--manifest-file $(MANIFEST) \
|
||||
--asdf-tree $(QLDIR)/dists \
|
||||
--asdf-path . \
|
||||
--load-system $(APP_NAME) \
|
||||
--load-system cffi \
|
||||
--load-system cl+ssl \
|
||||
--load-system mssql \
|
||||
--load src/hooks.lisp \
|
||||
--load-system $(APP_NAME) \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(DYNSIZE) \
|
||||
$(COMPRESS_CORE_OPT) \
|
||||
@ -164,6 +170,12 @@ pgloader-standalone:
|
||||
test: $(PGLOADER)
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
|
||||
|
||||
save: ./src/save.lisp $(LISP_SRC)
|
||||
$(CL) $(CL_OPTS) --load ./src/save.lisp
|
||||
|
||||
check-saved: save
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
|
||||
|
||||
clean-bundle:
|
||||
rm -rf $(BUNDLEDIR)
|
||||
rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
|
||||
@ -179,8 +191,12 @@ $(BUNDLEDIR):
|
||||
--eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \
|
||||
--load bundle/ql.lisp
|
||||
|
||||
$(BUNDLE): $(BUNDLEDIR)
|
||||
$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR)
|
||||
echo "\"$(VERSION)\"" > $@
|
||||
|
||||
$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp
|
||||
cp bundle/README.md $(BUNDLEDIR)
|
||||
cp bundle/save.lisp $(BUNDLEDIR)
|
||||
sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile
|
||||
git archive --format=tar --prefix=pgloader-$(VERSION)/ master \
|
||||
| tar -C $(BUNDLEDIR)/local-projects/ -xf -
|
||||
|
||||
10
README.md
10
README.md
@ -117,6 +117,16 @@ pgloader:
|
||||
|
||||
<https://github.com/dimitri/pgloader/issues?utf8=✓&q=label%3A%22Windows%20support%22%20>
|
||||
|
||||
### Building Docker image from sources
|
||||
|
||||
You can build a Docker image from source using SBCL by default:
|
||||
|
||||
$ docker build .
|
||||
|
||||
Or Clozure CL (CCL):
|
||||
|
||||
$ docker build -f Dockerfile.ccl .
|
||||
|
||||
## More options when building from source
|
||||
|
||||
The `Makefile` target `pgloader` knows how to produce a Self Contained
|
||||
|
||||
@ -48,9 +48,12 @@ $(PGLOADER): $(BUILDAPP)
|
||||
$(BUILDAPP_OPTS) \
|
||||
--sbcl $(CL) \
|
||||
--asdf-tree . \
|
||||
--load-system cffi \
|
||||
--load-system cl+ssl \
|
||||
--load-system mssql \
|
||||
--load $(SRCDIR)/src/hooks.lisp \
|
||||
--load-system $(APP_NAME) \
|
||||
--eval '(setf pgloader.params::*version-string* "$(VERSION)")' \
|
||||
--load $(SRCDIR)/src/hooks.lisp \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(DYNSIZE) \
|
||||
$(COMPRESS_CORE_OPT) \
|
||||
@ -61,4 +64,7 @@ $(PGLOADER): $(BUILDAPP)
|
||||
test: $(PGLOADER)
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress
|
||||
|
||||
save:
|
||||
sbcl --no-userinit --load ./save.lisp
|
||||
|
||||
check: test ;
|
||||
|
||||
47
bundle/save.lisp
Normal file
47
bundle/save.lisp
Normal file
@ -0,0 +1,47 @@
|
||||
;;;
|
||||
;;; Create a build/bin/pgloader executable from the source code, using
|
||||
;;; Quicklisp to load pgloader and its dependencies.
|
||||
;;;
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
(require :asdf) ; should work in SBCL and CCL
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(bundle.lisp (uiop:merge-pathnames* "bundle.lisp" cwd))
|
||||
(version-file (uiop:merge-pathnames* "version.sexp" cwd))
|
||||
(version-string (uiop:read-file-form version-file))
|
||||
(asdf:*central-registry* (list cwd)))
|
||||
|
||||
(format t "Loading bundle.lisp~%")
|
||||
(load bundle.lisp)
|
||||
|
||||
(format t "Loading system pgloader ~a~%" version-string)
|
||||
(asdf:load-system :pgloader :verbose nil)
|
||||
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))
|
||||
|
||||
(let* ((pgl (find-package "PGLOADER"))
|
||||
(version-symbol (find-symbol "*VERSION-STRING*" pgl)))
|
||||
(setf (symbol-value version-symbol) version-string)))
|
||||
|
||||
(defun pgloader-image-main ()
|
||||
(let ((argv #+sbcl sb-ext:*posix-argv*
|
||||
#+ccl ccl:*command-line-argument-list*))
|
||||
(pgloader::main argv)))
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(bin-dir (uiop:merge-pathnames* "bin/" cwd))
|
||||
(bin-filename (uiop:merge-pathnames* "pgloader" bin-dir)))
|
||||
|
||||
(ensure-directories-exist bin-dir)
|
||||
|
||||
#+ccl
|
||||
(ccl:save-application bin-filename
|
||||
:toplevel-function #'cl-user::pgloader-image-main
|
||||
:prepend-kernel t)
|
||||
#+sbcl
|
||||
(sb-ext:save-lisp-and-die bin-filename
|
||||
:toplevel #'cl-user::pgloader-image-main
|
||||
:executable t
|
||||
:save-runtime-options t
|
||||
:compression t))
|
||||
@ -83,7 +83,8 @@ todo_include_todos = False
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'alabaster'
|
||||
#html_theme = 'alabaster'
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
|
||||
234
docs/index.rst
234
docs/index.rst
@ -6,11 +6,242 @@
|
||||
Welcome to pgloader's documentation!
|
||||
====================================
|
||||
|
||||
pgloader loads data from various sources into PostgreSQL. It can transform
|
||||
the data it reads on the fly and submit raw SQL before and after the
|
||||
loading. It uses the `COPY` PostgreSQL protocol to stream the data into the
|
||||
server, and manages errors by filling a pair of *reject.dat* and
|
||||
*reject.log* files.
|
||||
|
||||
Thanks to being able to load data directly from a database source, pgloader
|
||||
also supports from migrations from other productions to PostgreSQL. In this
|
||||
mode of operations, pgloader handles both the schema and data parts of the
|
||||
migration, in a single unmanned command, allowing to implement **Continuous
|
||||
Migration**.
|
||||
|
||||
Features Overview
|
||||
=================
|
||||
|
||||
pgloader has two modes of operation: loading from files, migrating
|
||||
databases. In both cases, pgloader uses the PostgreSQL COPY protocol which
|
||||
implements a **streaming** to send data in a very efficient way.
|
||||
|
||||
Loading file content in PostgreSQL
|
||||
----------------------------------
|
||||
|
||||
When loading from files, pgloader implements the following features:
|
||||
|
||||
Many source formats supported
|
||||
Support for a wide variety of file based formats are included in
|
||||
pgloader: the CSV family, fixed columns formats, dBase files (``db3``),
|
||||
and IBM IXF files.
|
||||
|
||||
The SQLite database engine is accounted for in the next section:
|
||||
pgloader considers SQLite as a database source and implements schema
|
||||
discovery from SQLite catalogs.
|
||||
|
||||
On the fly data transformation
|
||||
Often enough the data as read from a CSV file (or another format) needs
|
||||
some tweaking and clean-up before being sent to PostgreSQL.
|
||||
|
||||
For instance in the `geolite
|
||||
<https://github.com/dimitri/pgloader/blob/master/test/archive.load>`_
|
||||
example we can see that integer values are being rewritten as IP address
|
||||
ranges, allowing to target an ``ip4r`` column directly.
|
||||
|
||||
Full Field projections
|
||||
pgloader supports loading data into less fields than found on file, or
|
||||
more, doing some computation on the data read before sending it to
|
||||
PostgreSQL.
|
||||
|
||||
Reading files from an archive
|
||||
Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the
|
||||
archive is extracted in a temporary directly and expanded files are then
|
||||
loaded.
|
||||
|
||||
HTTP(S) support
|
||||
pgloader knows how to download a source file or a source archive using
|
||||
HTTP directly. It might be better to use ``curl -O- http://... |
|
||||
pgloader`` and read the data from *standard input*, then allowing for
|
||||
streaming of the data from its source down to PostgreSQL.
|
||||
|
||||
Target schema discovery
|
||||
When loading in an existing table, pgloader takes into account the
|
||||
existing columns and may automatically guess the CSV format for you.
|
||||
|
||||
On error stop / On error resume next
|
||||
In some cases the source data is so damaged as to be impossible to
|
||||
migrate in full, and when loading from a file then the default for
|
||||
pgloader is to use ``on error resume next`` option, where the rows
|
||||
rejected by PostgreSQL are saved away and the migration continues with
|
||||
the other rows.
|
||||
|
||||
In other cases loading only a part of the input data might not be a
|
||||
great idea, and in such cases it's possible to use the ``on error stop``
|
||||
option.
|
||||
|
||||
Pre/Post SQL commands
|
||||
This feature allows pgloader commands to include SQL commands to run
|
||||
before and after loading a file. It might be about creating a table
|
||||
first, then loading the data into it, and then doing more processing
|
||||
on-top of the data (implementing an *ELT* pipeline then), or creating
|
||||
specific indexes as soon as the data has been made ready.
|
||||
|
||||
One-command migration to PostgreSQL
|
||||
-----------------------------------
|
||||
|
||||
When migrating a full database in a single command, pgloader implements the
|
||||
following features:
|
||||
|
||||
One-command migration
|
||||
The whole migration is started with a single command line and then runs
|
||||
unattended. pgloader is meant to be integrated in a fully automated
|
||||
tooling that you can repeat as many times as needed.
|
||||
|
||||
Schema discovery
|
||||
The source database is introspected using its SQL catalogs to get the
|
||||
list of tables, attributes (with data types, default values, not null
|
||||
constraints, etc), primary key constraints, foreign key constraints,
|
||||
indexes, comments, etc. This feeds an internal database catalog of all
|
||||
the objects to migrate from the source database to the target database.
|
||||
|
||||
User defined casting rules
|
||||
Some source database have ideas about their data types that might not be
|
||||
compatible with PostgreSQL implementaion of equivalent data types.
|
||||
|
||||
For instance, SQLite since version 3 has a `Dynamic Type System
|
||||
<https://www.sqlite.org/datatype3.html>`_ which of course isn't
|
||||
compatible with the idea of a `Relation
|
||||
<https://en.wikipedia.org/wiki/Relation_(database)>`_. Or MySQL accepts
|
||||
datetime for year zero, which doesn't exists in our calendar, and
|
||||
doesn't have a boolean data type.
|
||||
|
||||
When migrating from another source database technology to PostgreSQL,
|
||||
data type casting choices must be made. pgloader implements solid
|
||||
defaults that you can rely upon, and a facility for **user defined data
|
||||
type casting rules** for specific cases. The idea is to allow users to
|
||||
specify the how the migration should be done, in order for it to be
|
||||
repeatable and included in a *Continuous Migration* process.
|
||||
|
||||
On the fly data transformations
|
||||
The user defined casting rules come with on the fly rewrite of the data.
|
||||
For instance zero dates (it's not just the year, MySQL accepts
|
||||
``0000-00-00`` as a valid datetime) are rewritten to NULL values by
|
||||
default.
|
||||
|
||||
Partial Migrations
|
||||
It is possible to include only a partial list of the source database
|
||||
tables in the migration, or to exclude some of the tables on the source
|
||||
database.
|
||||
|
||||
Schema only, Data only
|
||||
This is the **ORM compatibility** feature of pgloader, where it is
|
||||
possible to create the schema using your ORM and then have pgloader
|
||||
migrate the data targeting this already created schema.
|
||||
|
||||
When doing this, it is possible for pgloader to *reindex* the target
|
||||
schema: before loading the data from the source database into PostgreSQL
|
||||
using COPY, pgloader DROPs the indexes and constraints, and reinstalls
|
||||
the exact same definitions of them once the data has been loaded.
|
||||
|
||||
The reason for operating that way is of course data load performance.
|
||||
|
||||
Repeatable (DROP+CREATE)
|
||||
By default, pgloader issues DROP statements in the target PostgreSQL
|
||||
database before issing any CREATE statement, so that you can repeat the
|
||||
migration as many times as necessary until migration specifications and
|
||||
rules are bug free.
|
||||
|
||||
The schedule the data migration to run every night (or even more often!)
|
||||
for the whole duration of the code migration project. See the
|
||||
`Continuous Migration <https://pgloader.io/blog/continuous-migration/>`_
|
||||
methodology for more details about the approach.
|
||||
|
||||
On error stop / On error resume next The default behavior of pgloader when
|
||||
migrating from a database is ``on error stop``. The idea is to let the
|
||||
user fix either the migration specifications or the source data, and run
|
||||
the process again, until it works.
|
||||
|
||||
In some cases the source data is so damaged as to be impossible to
|
||||
migrate in full, and it might be necessary to then resort to the ``on
|
||||
error resume next`` option, where the rows rejected by PostgreSQL are
|
||||
saved away and the migration continues with the other rows.
|
||||
|
||||
Pre/Post SQL commands, Post-Schema SQL commands
|
||||
While pgloader takes care of rewriting the schema to PostgreSQL
|
||||
expectations, and even provides *user-defined data type casting rules*
|
||||
support to that end, sometimes it is necessary to add some specific SQL
|
||||
commands around the migration. It's of course supported right from
|
||||
pgloader itself, without having to script around it.
|
||||
|
||||
Online ALTER schema
|
||||
At times migrating to PostgreSQL is also a good opportunity to review
|
||||
and fix bad decisions that were made in the past, or simply that are not
|
||||
relevant to PostgreSQL.
|
||||
|
||||
The pgloader command syntax allows to ALTER pgloader's internal
|
||||
representation of the target catalogs so that the target schema can be
|
||||
created a little different from the source one. Changes supported
|
||||
include target a different *schema* or *table* name.
|
||||
|
||||
Materialized Views, or schema rewrite on-the-fly
|
||||
In some cases the schema rewriting goes deeper than just renaming the
|
||||
SQL objects to being a full normalization exercise. Because PostgreSQL
|
||||
is great at running a normalized schema in production under most
|
||||
workloads.
|
||||
|
||||
pgloader implements full flexibility in on-the-fly schema rewriting, by
|
||||
making it possible to migrate from a view definition. The view attribute
|
||||
list becomes a table definition in PostgreSQL, and the data is fetched
|
||||
by querying the view on the source system.
|
||||
|
||||
A SQL view allows to implement both content filtering at the column
|
||||
level using the SELECT projection clause, and at the row level using the
|
||||
WHERE restriction clause. And backfilling from reference tables thanks
|
||||
to JOINs.
|
||||
|
||||
Distribute to Citus
|
||||
When migrating from PostgreSQL to Citus, a important part of the process
|
||||
consists of adjusting the schema to the distribution key. Read
|
||||
`Preparing Tables and Ingesting Data
|
||||
<https://docs.citusdata.com/en/v8.0/use_cases/multi_tenant.html>`_ in
|
||||
the Citus documentation for a complete example showing how to do that.
|
||||
|
||||
When using pgloader it's possible to specify the distribution keys and
|
||||
reference tables and let pgloader take care of adjusting the table,
|
||||
indexes, primary keys and foreign key definitions all by itself.
|
||||
|
||||
Encoding Overrides
|
||||
MySQL doesn't actually enforce the encoding of the data in the database
|
||||
to match the encoding known in the metadata, defined at the database,
|
||||
table, or attribute level. Sometimes, it's necessary to override the
|
||||
metadata in order to make sense of the text, and pgloader makes it easy
|
||||
to do so.
|
||||
|
||||
|
||||
Continuous Migration
|
||||
--------------------
|
||||
|
||||
pgloader is meant to migrate a whole database in a single command line and
|
||||
without any manual intervention. The goal is to be able to setup a
|
||||
*Continuous Integration* environment as described in the `Project
|
||||
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
|
||||
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
|
||||
|
||||
1. Setup your target PostgreSQL Architecture
|
||||
2. Fork a Continuous Integration environment that uses PostgreSQL
|
||||
3. Migrate the data over and over again every night, from production
|
||||
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
|
||||
5. Migrate without suprise and enjoy!
|
||||
|
||||
In order to be able to follow this great methodology, you need tooling to
|
||||
implement the third step in a fully automated way. That's pgloader.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Table Of Contents:
|
||||
|
||||
intro
|
||||
quickstart
|
||||
tutorial/tutorial
|
||||
pgloader
|
||||
ref/csv
|
||||
@ -22,6 +253,9 @@ Welcome to pgloader's documentation!
|
||||
ref/mysql
|
||||
ref/sqlite
|
||||
ref/mssql
|
||||
ref/pgsql
|
||||
ref/pgsql-citus-target
|
||||
ref/pgsql-redshift
|
||||
ref/transforms
|
||||
bugreport
|
||||
|
||||
|
||||
@ -10,13 +10,24 @@ the data into the server, and manages errors by filling a pair of
|
||||
pgloader knows how to read data from different kind of sources:
|
||||
|
||||
* Files
|
||||
|
||||
* CSV
|
||||
* Fixed Format
|
||||
* DBF
|
||||
|
||||
* Databases
|
||||
|
||||
* SQLite
|
||||
* MySQL
|
||||
* MS SQL Server
|
||||
* PostgreSQL
|
||||
* Redshift
|
||||
|
||||
pgloader knows how to target different products using the PostgresQL Protocol:
|
||||
|
||||
* PostgreSQL
|
||||
* `Citus <https://www.citusdata.com>`_
|
||||
* Redshift
|
||||
|
||||
The level of automation provided by pgloader depends on the data source
|
||||
type. In the case of CSV and Fixed Format files, a full description of the
|
||||
@ -24,23 +35,47 @@ expected input properties must be given to pgloader. In the case of a
|
||||
database, pgloader connects to the live service and knows how to fetch the
|
||||
metadata it needs directly from it.
|
||||
|
||||
Continuous Migration
|
||||
--------------------
|
||||
Features Matrix
|
||||
---------------
|
||||
|
||||
pgloader is meant to migrate a whole database in a single command line and
|
||||
without any manual intervention. The goal is to be able to setup a
|
||||
*Continuous Integration* environment as described in the `Project
|
||||
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
|
||||
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
|
||||
Here's a comparison of the features supported depending on the source
|
||||
database engine. Some features that are not supported can be added to
|
||||
pgloader, it's just that nobody had the need to do so yet. Those features
|
||||
are marked with ✗. Empty cells are used when the feature doesn't make sense
|
||||
for the selected source database.
|
||||
|
||||
1. Setup your target PostgreSQL Architecture
|
||||
2. Fork a Continuous Integration environment that uses PostgreSQL
|
||||
3. Migrate the data over and over again every night, from production
|
||||
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
|
||||
5. Migrate without suprise and enjoy!
|
||||
========================== ======= ====== ====== =========== =========
|
||||
Feature SQLite MySQL MS SQL PostgreSQL Redshift
|
||||
========================== ======= ====== ====== =========== =========
|
||||
One-command migration ✓ ✓ ✓ ✓ ✓
|
||||
Continuous Migration ✓ ✓ ✓ ✓ ✓
|
||||
Schema discovery ✓ ✓ ✓ ✓ ✓
|
||||
Partial Migrations ✓ ✓ ✓ ✓ ✓
|
||||
Schema only ✓ ✓ ✓ ✓ ✓
|
||||
Data only ✓ ✓ ✓ ✓ ✓
|
||||
Repeatable (DROP+CREATE) ✓ ✓ ✓ ✓ ✓
|
||||
User defined casting rules ✓ ✓ ✓ ✓ ✓
|
||||
Encoding Overrides ✓
|
||||
On error stop ✓ ✓ ✓ ✓ ✓
|
||||
On error resume next ✓ ✓ ✓ ✓ ✓
|
||||
Pre/Post SQL commands ✓ ✓ ✓ ✓ ✓
|
||||
Post-Schema SQL commands ✗ ✓ ✓ ✓ ✓
|
||||
Primary key support ✓ ✓ ✓ ✓ ✓
|
||||
Foreign key support ✓ ✓ ✓ ✓
|
||||
Online ALTER schema ✓ ✓ ✓ ✓ ✓
|
||||
Materialized views ✗ ✓ ✓ ✓ ✓
|
||||
Distribute to Citus ✗ ✓ ✓ ✓ ✓
|
||||
========================== ======= ====== ====== =========== =========
|
||||
|
||||
For more details about what the features are about, see the specific
|
||||
reference pages for your database source.
|
||||
|
||||
For some of the features, missing support only means that the feature is not
|
||||
needed for the other sources, such as the capability to override MySQL
|
||||
encoding metadata about a table or a column. Only MySQL in this list is left
|
||||
completely unable to guarantee text encoding. Or Redshift not having foreign
|
||||
keys.
|
||||
|
||||
In order to be able to follow this great methodology, you need tooling to
|
||||
implement the third step in a fully automated way. That's pgloader.
|
||||
|
||||
Commands
|
||||
--------
|
||||
|
||||
@ -154,6 +154,18 @@ Those options are meant to tweak `pgloader` behavior when loading data.
|
||||
machine code) another version of itself, usually a newer one like a very
|
||||
recent git checkout.
|
||||
|
||||
* `--no-ssl-cert-verification`
|
||||
|
||||
Uses the OpenSSL option to accept a locally issued server-side
|
||||
certificate, avoiding the following error message::
|
||||
|
||||
SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY
|
||||
|
||||
The right way to fix the SSL issue is to use a trusted certificate, of
|
||||
course. Sometimes though it's useful to make progress with the pgloader
|
||||
setup while the certificate chain of trust is being fixed, maybe by
|
||||
another team. That's when this option is useful.
|
||||
|
||||
Command Line Only Operations
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@ -552,6 +564,22 @@ queries from a SQL file. Implements support for PostgreSQL dollar-quoting
|
||||
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
|
||||
they are the same thing).
|
||||
|
||||
AFTER CREATE SCHEMA DO
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
|
||||
section are executed once the schema has been craeted by pgloader, and
|
||||
before the data is loaded. It's the right time to ALTER TABLE or do some
|
||||
custom implementation on-top of what pgloader does, like maybe partitioning.
|
||||
|
||||
AFTER CREATE SCHEMA EXECUTE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read
|
||||
the SQL queries from a SQL file. Implements support for PostgreSQL
|
||||
dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch
|
||||
mode (where they are the same thing).
|
||||
|
||||
Connection String
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
PgLoader Quick Start
|
||||
--------------------
|
||||
Pgloader Quick Start
|
||||
====================
|
||||
|
||||
In simple cases, pgloader is very easy to use.
|
||||
|
||||
CSV
|
||||
^^^
|
||||
---
|
||||
|
||||
Load data from a CSV file into a pre-existing table in your database::
|
||||
|
||||
@ -26,7 +26,7 @@ For documentation about the available syntaxes for the `--field` and
|
||||
Note also that the PostgreSQL URI includes the target *tablename*.
|
||||
|
||||
Reading from STDIN
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
------------------
|
||||
|
||||
File based pgloader sources can be loaded from the standard input, as in the
|
||||
following example::
|
||||
@ -46,7 +46,7 @@ pgloader with this technique, using the Unix pipe::
|
||||
gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo
|
||||
|
||||
Loading from CSV available through HTTP
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
---------------------------------------
|
||||
|
||||
The same command as just above can also be run if the CSV file happens to be
|
||||
found on a remote HTTP location::
|
||||
@ -84,7 +84,7 @@ Also notice that the same command will work against an archived version of
|
||||
the same data.
|
||||
|
||||
Streaming CSV data from an HTTP compressed file
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
-----------------------------------------------
|
||||
|
||||
Finally, it's important to note that pgloader first fetches the content from
|
||||
the HTTP URL it to a local file, then expand the archive when it's
|
||||
@ -110,7 +110,7 @@ and the commands and pgloader will take care of streaming the data down to
|
||||
PostgreSQL.
|
||||
|
||||
Migrating from SQLite
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
---------------------
|
||||
|
||||
The following command will open the SQLite database, discover its tables
|
||||
definitions including indexes and foreign keys, migrate those definitions
|
||||
@ -121,7 +121,7 @@ and then migrate the data over::
|
||||
pgloader ./test/sqlite/sqlite.db postgresql:///newdb
|
||||
|
||||
Migrating from MySQL
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
--------------------
|
||||
|
||||
Just create a database where to host the MySQL data and definitions and have
|
||||
pgloader do the migration for you in a single command line::
|
||||
@ -130,7 +130,7 @@ pgloader do the migration for you in a single command line::
|
||||
pgloader mysql://user@localhost/sakila postgresql:///pagila
|
||||
|
||||
Fetching an archived DBF file from a HTTP remote location
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
---------------------------------------------------------
|
||||
|
||||
It's possible for pgloader to download a file from HTTP, unarchive it, and
|
||||
only then open it to discover the schema then load the data::
|
||||
@ -31,7 +31,7 @@ MS SQL Database Migration Options: WITH
|
||||
---------------------------------------
|
||||
|
||||
When loading from a `MS SQL` database, the same options as when loading a
|
||||
`MySQL` database are supported. Please refer to the MySQL section. The
|
||||
`MS SQL` database are supported. Please refer to the MS SQL section. The
|
||||
following options are added:
|
||||
|
||||
- *create schemas*
|
||||
@ -53,7 +53,39 @@ CAST
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
Please refer to the MySQL CAST clause for details.
|
||||
Please refer to the MS SQL CAST clause for details.
|
||||
|
||||
MS SQL Views Support
|
||||
--------------------
|
||||
|
||||
MS SQL views support allows pgloader to migrate view as if they were base
|
||||
tables. This feature then allows for on-the-fly transformation from MS SQL
|
||||
to PostgreSQL, as the view definition is used rather than the base data.
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will query
|
||||
the data. It's not possible to just allow for plain `SQL` because we want to
|
||||
know a lot about the exact data types of each column involved in the query
|
||||
output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the following
|
||||
expression::
|
||||
|
||||
*name* `AS` `$$` *sql query* `$$`
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
|
||||
the beginning of the data loading, and the resulting view will then be
|
||||
dropped at the end of the data loading.
|
||||
|
||||
MATERIALIZE ALL VIEWS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
|
||||
returned by MS SQL rather than asking the user to specify the list.
|
||||
|
||||
MS SQL Partial Migration
|
||||
------------------------
|
||||
@ -96,9 +128,35 @@ schema 'public' in the target database with this command::
|
||||
ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
See the MySQL explanation for this clause above. It works the same in the
|
||||
context of migrating from MS SQL, only with the added option to specify the
|
||||
name of the schema where to find the definition of the target tables.
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
IN SCHEMA 'dbo'
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
(regexp or string).
|
||||
|
||||
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
|
||||
the level of the pgloader in-memory representation of your source database
|
||||
schema. In case of a name change, the mapping is kept and reused in the
|
||||
*foreign key* and *index* support.
|
||||
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
The matching is done in pgloader itself, with a Common Lisp regular
|
||||
expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
Migrating a MySQL Database to PostgreSQL
|
||||
========================================
|
||||
|
||||
This command instructs pgloader to load data from a database connection. The
|
||||
only supported database source is currently *MySQL*, and pgloader supports
|
||||
dynamically converting the schema of the source database and the indexes
|
||||
building.
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
pgloader supports dynamically converting the schema of the source database
|
||||
and the indexes building.
|
||||
|
||||
A default set of casting rules are provided and might be overloaded and
|
||||
appended to by the command.
|
||||
@ -500,9 +499,8 @@ ALTER TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. The only two
|
||||
available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted
|
||||
string as parameter::
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
SET SCHEMA 'mv'
|
||||
@ -511,6 +509,8 @@ string as parameter::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
@ -524,6 +524,9 @@ schema. In case of a name change, the mapping is kept and reused in the
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
MySQL Migration: limitations
|
||||
----------------------------
|
||||
|
||||
@ -556,7 +559,7 @@ Numbers::
|
||||
type int with extra auto_increment to serial when (< precision 10)
|
||||
type int with extra auto_increment to bigserial when (<= 10 precision)
|
||||
type int to int when (< precision 10)
|
||||
type int to bigint when (<= 10 precision)
|
||||
type int to bigint when (>= 10 precision)
|
||||
type tinyint with extra auto_increment to serial
|
||||
type smallint with extra auto_increment to serial
|
||||
type mediumint with extra auto_increment to serial
|
||||
@ -609,6 +612,14 @@ Date::
|
||||
to timestamptz drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type datetime with extra on update current timestamp when not null
|
||||
to timestamptz drop not null drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type datetime with extra on update current timestamp
|
||||
to timestamptz drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type timestamp when default "0000-00-00 00:00:00" and not null
|
||||
to timestamptz drop not null drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
196
docs/ref/pgsql-citus-target.rst
Normal file
196
docs/ref/pgsql-citus-target.rst
Normal file
@ -0,0 +1,196 @@
|
||||
Migrating a PostgreSQL Database to Citus
|
||||
========================================
|
||||
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
Automatic discovery of the schema is supported, including build of the
|
||||
indexes, primary and foreign keys constraints. A default set of casting
|
||||
rules are provided and might be overloaded and appended to by the command.
|
||||
|
||||
Automatic distribution column backfilling is supported, either from commands
|
||||
that specify what is the distribution column in every table, or only in the
|
||||
main table, then relying on foreign key constraints to discover the other
|
||||
distribution keys.
|
||||
|
||||
Here's a short example of migrating a database from a PostgreSQL server to
|
||||
another:
|
||||
|
||||
::
|
||||
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
cast column impressions.seen_at to "timestamp with time zone"
|
||||
|
||||
distribute companies using id
|
||||
-- distribute campaigns using company_id
|
||||
-- distribute ads using company_id from campaigns
|
||||
-- distribute clicks using company_id from ads, campaigns
|
||||
-- distribute impressions using company_id from ads, campaigns
|
||||
;
|
||||
|
||||
Everything works exactly the same way as when doing a PostgreSQL to
|
||||
PostgreSQL migration, with the added fonctionality of this new `distribute`
|
||||
command.
|
||||
|
||||
Distribute Command
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The distribute command syntax is as following::
|
||||
|
||||
distribute <table name> using <column name>
|
||||
distribute <table name> using <column name> from <table> [, <table>, ...]
|
||||
distribute <table name> as reference table
|
||||
|
||||
When using the distribute command, the following steps are added to pgloader
|
||||
operations when migrating the schema:
|
||||
|
||||
- if the distribution column does not exist in the table, it is added as
|
||||
the first column of the table
|
||||
|
||||
- if the distribution column does not exists in the primary key of the
|
||||
table, it is added as the first column of the primary of the table
|
||||
|
||||
- all the foreign keys that point to the table are added the distribution
|
||||
key automatically too, including the source tables of the foreign key
|
||||
constraints
|
||||
|
||||
- once the schema has been created on the target database, pgloader then
|
||||
issues Citus specific command `create_reference_table()
|
||||
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-reference-table>`_
|
||||
and `create_distributed_table()
|
||||
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-distributed-table>`_
|
||||
to make the tables distributed
|
||||
|
||||
Those operations are done in the schema section of pgloader, before the data
|
||||
is loaded. When the data is loaded, the newly added columns need to be
|
||||
backfilled from referenced data. pgloader knows how to do that by generating
|
||||
a query like the following and importing the result set of such a query
|
||||
rather than the raw data from the source table.
|
||||
|
||||
Citus Migration Example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
With the migration command as above, pgloader adds the column ``company_id``
|
||||
to the tables that have a direct or indirect foreign key reference to the
|
||||
``companies`` table.
|
||||
|
||||
We run pgloader using the following command, where the file
|
||||
`./test/citus/company.load
|
||||
<https://github.com/dimitri/pgloader/blob/master/test/citus/company.load>`_
|
||||
contains the pgloader command as shown above.
|
||||
|
||||
::
|
||||
|
||||
$ pgloader --client-min-messages sql ./test/citus/company.load
|
||||
|
||||
The following SQL statements are all extracted from the log messages that
|
||||
the pgloader command outputs. We are going to have a look at the
|
||||
`impressions` table. It gets created with a new column `company_id` in the
|
||||
first position, as follows:
|
||||
|
||||
::
|
||||
|
||||
CREATE TABLE "public"."impressions"
|
||||
(
|
||||
company_id bigint,
|
||||
"id" bigserial,
|
||||
"ad_id" bigint default NULL,
|
||||
"seen_at" timestamp with time zone default NULL,
|
||||
"site_url" text default NULL,
|
||||
"cost_per_impression_usd" numeric(20,10) default NULL,
|
||||
"user_ip" inet default NULL,
|
||||
"user_data" jsonb default NULL
|
||||
);
|
||||
|
||||
The original schema for this table does not have the `company_id` column,
|
||||
which means pgloader now needs to change the primary key definition, the
|
||||
foreign keys constraints definitions from and to this table, and also to
|
||||
*backfill* the `company_id` data to this table when doing the COPY phase of
|
||||
the migration.
|
||||
|
||||
Then once the tables have been created, pgloader executes the following SQL
|
||||
statements::
|
||||
|
||||
SELECT create_distributed_table('"public"."companies"', 'id');
|
||||
SELECT create_distributed_table('"public"."campaigns"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."ads"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."clicks"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."impressions"', 'company_id');
|
||||
|
||||
Then when copying the data from the source PostgreSQL database to the new
|
||||
Citus tables, the new column (here ``company_id``) needs to be backfilled
|
||||
from the source tables. Here's the SQL query that pgloader uses as a data
|
||||
source for the ``ads`` table in our example:
|
||||
|
||||
::
|
||||
|
||||
SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text,
|
||||
"ads".name::text, "ads".image_url::text, "ads".target_url::text,
|
||||
"ads".impressions_count::text, "ads".clicks_count::text,
|
||||
"ads".created_at::text, "ads".updated_at::text
|
||||
|
||||
FROM "public"."ads"
|
||||
JOIN "public"."campaigns"
|
||||
ON ads.campaign_id = campaigns.id
|
||||
|
||||
The ``impressions`` table has an indirect foreign key reference to the
|
||||
``company`` table, which is the table where the distribution key is
|
||||
specified. pgloader will discover that itself from walking the PostgreSQL
|
||||
catalogs, and you may also use the following specification in the pgloader
|
||||
command to explicitely add the indirect dependency:
|
||||
|
||||
::
|
||||
|
||||
distribute impressions using company_id from ads, campaigns
|
||||
|
||||
Given this schema, the SQL query used by pgloader to fetch the data for the
|
||||
`impressions` table is the following, implementing online backfilling of the
|
||||
data:
|
||||
|
||||
::
|
||||
|
||||
SELECT "campaigns".company_id::text, "impressions".id::text,
|
||||
"impressions".ad_id::text, "impressions".seen_at::text,
|
||||
"impressions".site_url::text,
|
||||
"impressions".cost_per_impression_usd::text,
|
||||
"impressions".user_ip::text,
|
||||
"impressions".user_data::text
|
||||
|
||||
FROM "public"."impressions"
|
||||
|
||||
JOIN "public"."ads"
|
||||
ON impressions.ad_id = ads.id
|
||||
|
||||
JOIN "public"."campaigns"
|
||||
ON ads.campaign_id = campaigns.id
|
||||
|
||||
When the data copying is done, then pgloader also has to install the indexes
|
||||
supporting the primary keys, and add the foreign key definitions to the
|
||||
schema. Those definitions are not the same as in the source schema, because
|
||||
of the adding of the distribution column to the table: we need to also add
|
||||
the column to the primary key and the foreign key constraints.
|
||||
|
||||
Here's the commands issued by pgloader for the ``impressions`` table:
|
||||
|
||||
::
|
||||
|
||||
CREATE UNIQUE INDEX "impressions_pkey"
|
||||
ON "public"."impressions" (company_id, id);
|
||||
|
||||
ALTER TABLE "public"."impressions"
|
||||
ADD CONSTRAINT "impressions_ad_id_fkey"
|
||||
FOREIGN KEY(company_id,ad_id)
|
||||
REFERENCES "public"."ads"(company_id,id)
|
||||
|
||||
Given a single line of specification ``distribute companies using id`` then
|
||||
pgloader implements all the necessary schema changes on the fly when
|
||||
migrating to Citus, and also dynamically backfills the data.
|
||||
|
||||
Citus Migration: Limitations
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The way pgloader implements *reset sequence* does not work with Citus at
|
||||
this point, so sequences need to be taken care of separately at this point.
|
||||
70
docs/ref/pgsql-redshift.rst
Normal file
70
docs/ref/pgsql-redshift.rst
Normal file
@ -0,0 +1,70 @@
|
||||
Support for Redshift in pgloader
|
||||
================================
|
||||
|
||||
The command and behavior are the same as when migration from a PostgreSQL
|
||||
database source. pgloader automatically discovers that it's talking to a
|
||||
Redshift database by parsing the output of the `SELECT version()` SQL query.
|
||||
|
||||
Redhift as a data source
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Redshit is a variant of PostgreSQL version 8.0.2, which allows pgloader to
|
||||
work with only a very small amount of adaptation in the catalog queries
|
||||
used. In other words, migrating from Redshift to PostgreSQL works just the
|
||||
same as when migrating from a PostgreSQL data source, including the
|
||||
connection string specification.
|
||||
|
||||
Redshift as a data destination
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The Redshift variant of PostgreSQL 8.0.2 does not have support for the
|
||||
``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY
|
||||
with Redshift, the data must first be made available in an S3 bucket.
|
||||
|
||||
First, pgloader must authenticate to Amazon S3. pgloader uses the following
|
||||
setup for that:
|
||||
|
||||
- ``~/.aws/config``
|
||||
|
||||
This INI formatted file contains sections with your default region and
|
||||
other global values relevant to using the S3 API. pgloader parses it to
|
||||
get the region when it's setup in the ``default`` INI section.
|
||||
|
||||
The environment variable ``AWS_DEFAULT_REGION`` can be used to override
|
||||
the configuration file value.
|
||||
|
||||
- ``~/.aws/credentials``
|
||||
|
||||
The INI formatted file contains your authentication setup to Amazon,
|
||||
with the properties ``aws_access_key_id`` and ``aws_secret_access_key``
|
||||
in the section ``default``. pgloader parses this file for those keys,
|
||||
and uses their values when communicating with Amazon S3.
|
||||
|
||||
The environment variables ``AWS_ACCESS_KEY_ID`` and
|
||||
``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file
|
||||
|
||||
- ``AWS_S3_BUCKET_NAME``
|
||||
|
||||
Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is
|
||||
used by pgloader as the name of the S3 bucket where to upload the files
|
||||
to COPY to the Redshift database. The bucket name defaults to
|
||||
``pgloader``.
|
||||
|
||||
Then pgloader works as usual, see the other sections of the documentation
|
||||
for the details, depending on the data source (files, other databases, etc).
|
||||
When preparing the data for PostgreSQL, pgloader now uploads each batch into
|
||||
a single CSV file, and then issue such as the following, for each batch:
|
||||
|
||||
::
|
||||
|
||||
COPY <target_table_name>
|
||||
FROM 's3://<s3 bucket>/<s3-filename-just-uploaded>'
|
||||
FORMAT CSV
|
||||
TIMEFORMAT 'auto'
|
||||
REGION '<aws-region>'
|
||||
ACCESS_KEY_ID '<aws-access-key-id>'
|
||||
SECRET_ACCESS_KEY '<aws-secret-access-key>;
|
||||
|
||||
This is the only difference with a PostgreSQL core version, where pgloader
|
||||
can rely on the classic ``COPY FROM STDIN`` command, which allows to send
|
||||
data through the already established connection to PostgreSQL.
|
||||
408
docs/ref/pgsql.rst
Normal file
408
docs/ref/pgsql.rst
Normal file
@ -0,0 +1,408 @@
|
||||
Migrating a PostgreSQL Database to PostgreSQL
|
||||
=============================================
|
||||
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
Automatic discovery of the schema is supported, including build of the
|
||||
indexes, primary and foreign keys constraints. A default set of casting
|
||||
rules are provided and might be overloaded and appended to by the command.
|
||||
|
||||
Here's a short example of migrating a database from a PostgreSQL server to
|
||||
another:
|
||||
|
||||
::
|
||||
|
||||
load database
|
||||
from pgsql://localhost/pgloader
|
||||
into pgsql://localhost/copy
|
||||
|
||||
including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
|
||||
including only table names matching ~/geolocations/ in schema 'public'
|
||||
;
|
||||
|
||||
PostgreSQL Database Source Specification: FROM
|
||||
----------------------------------------------
|
||||
|
||||
Must be a connection URL pointing to a PostgreSQL database.
|
||||
|
||||
See the `SOURCE CONNECTION STRING` section above for details on how to write
|
||||
the connection string.
|
||||
|
||||
::
|
||||
|
||||
pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
|
||||
|
||||
|
||||
PostgreSQL Database Migration Options: WITH
|
||||
-------------------------------------------
|
||||
|
||||
When loading from a `PostgreSQL` database, the following options are
|
||||
supported, and the default *WITH* clause is: *no truncate*, *create schema*,
|
||||
*create tables*, *include drop*, *create indexes*, *reset sequences*,
|
||||
*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*.
|
||||
|
||||
- *include drop*
|
||||
|
||||
When this option is listed, pgloader drops all the tables in the target
|
||||
PostgreSQL database whose names appear in the MySQL database. This
|
||||
option allows for using the same command several times in a row until
|
||||
you figure out all the options, starting automatically from a clean
|
||||
environment. Please note that `CASCADE` is used to ensure that tables
|
||||
are dropped even if there are foreign keys pointing to them. This is
|
||||
precisely what `include drop` is intended to do: drop all target tables
|
||||
and recreate them.
|
||||
|
||||
Great care needs to be taken when using `include drop`, as it will
|
||||
cascade to *all* objects referencing the target tables, possibly
|
||||
including other tables that are not being loaded from the source DB.
|
||||
|
||||
- *include no drop*
|
||||
|
||||
When this option is listed, pgloader will not include any `DROP`
|
||||
statement when loading the data.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issue the `TRUNCATE` command
|
||||
against each PostgreSQL table just before loading data into it.
|
||||
|
||||
- *no truncate*
|
||||
|
||||
When this option is listed, pgloader issues no `TRUNCATE` command.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *create tables*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `MySQL` file, which must contain a list of fields with
|
||||
their data type. A standard data type conversion from DBF to PostgreSQL
|
||||
is done.
|
||||
|
||||
- *create no tables*
|
||||
|
||||
When this option is listed, pgloader skips the creation of table before
|
||||
loading data, target tables must then already exist.
|
||||
|
||||
Also, when using *create no tables* pgloader fetches the metadata from
|
||||
the current target database and checks type casting, then will remove
|
||||
constraints and indexes prior to loading the data and install them back
|
||||
again once the loading is done.
|
||||
|
||||
- *create indexes*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
indexes found in the MySQL database and create the same set of index
|
||||
definitions against the PostgreSQL database.
|
||||
|
||||
- *create no indexes*
|
||||
|
||||
When this option is listed, pgloader skips the creating indexes.
|
||||
|
||||
- *drop indexes*
|
||||
|
||||
When this option is listed, pgloader drops the indexes in the target
|
||||
database before loading the data, and creates them again at the end
|
||||
of the data copy.
|
||||
|
||||
- *reindex*
|
||||
|
||||
When this option is used, pgloader does both *drop indexes* before
|
||||
loading the data and *create indexes* once data is loaded.
|
||||
|
||||
- *drop schema*
|
||||
|
||||
When this option is listed, pgloader drops the target schema in the
|
||||
target PostgreSQL database before creating it again and all the objects
|
||||
it contains. The default behavior doesn't drop the target schemas.
|
||||
|
||||
- *foreign keys*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
foreign keys found in the MySQL database and create the same set of
|
||||
foreign key definitions against the PostgreSQL database.
|
||||
|
||||
- *no foreign keys*
|
||||
|
||||
When this option is listed, pgloader skips creating foreign keys.
|
||||
|
||||
- *reset sequences*
|
||||
|
||||
When this option is listed, at the end of the data loading and after the
|
||||
indexes have all been created, pgloader resets all the PostgreSQL
|
||||
sequences created to the current maximum value of the column they are
|
||||
attached to.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *reset no sequences*
|
||||
|
||||
When this option is listed, pgloader skips resetting sequences after the
|
||||
load.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *downcase identifiers*
|
||||
|
||||
When this option is listed, pgloader converts all MySQL identifiers
|
||||
(table names, index names, column names) to *downcase*, except for
|
||||
PostgreSQL *reserved* keywords.
|
||||
|
||||
The PostgreSQL *reserved* keywords are determined dynamically by using
|
||||
the system function `pg_get_keywords()`.
|
||||
|
||||
- *quote identifiers*
|
||||
|
||||
When this option is listed, pgloader quotes all MySQL identifiers so
|
||||
that their case is respected. Note that you will then have to do the
|
||||
same thing in your application code queries.
|
||||
|
||||
- *schema only*
|
||||
|
||||
When this option is listed pgloader refrains from migrating the data
|
||||
over. Note that the schema in this context includes the indexes when the
|
||||
option *create indexes* has been listed.
|
||||
|
||||
- *data only*
|
||||
|
||||
When this option is listed pgloader only issues the `COPY` statements,
|
||||
without doing any other processing.
|
||||
|
||||
- *rows per range*
|
||||
|
||||
How many rows are fetched per `SELECT` query when using *multiple
|
||||
readers per thread*, see above for details.
|
||||
|
||||
PostgreSQL Database Casting Rules
|
||||
---------------------------------
|
||||
|
||||
The command *CAST* introduces user-defined casting rules.
|
||||
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
A casting rule is expected to follow one of the forms::
|
||||
|
||||
type <type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
|
||||
column <table-name>.<column-name> [ <guards> ] to ...
|
||||
|
||||
It's possible for a *casting rule* to either match against a PostgreSQL data
|
||||
type or against a given *column name* in a given *table name*. So it's
|
||||
possible to migrate a table from a PostgreSQL database while changing and
|
||||
`int` column to a `bigint` one, automatically.
|
||||
|
||||
The *casting rules* are applied in order, the first match prevents following
|
||||
rules to be applied, and user defined rules are evaluated first.
|
||||
|
||||
The supported guards are:
|
||||
|
||||
- *when default 'value'*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have given *value*, which must be a single-quoted or a
|
||||
double-quoted string.
|
||||
|
||||
- *when typemod expression*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have a *typemod* value matching the given *typemod
|
||||
expression*. The *typemod* is separated into its *precision* and *scale*
|
||||
components.
|
||||
|
||||
Example of a cast rule using a *typemod* guard::
|
||||
|
||||
type char when (= precision 1) to char keep typemod
|
||||
|
||||
This expression casts MySQL `char(1)` column to a PostgreSQL column of
|
||||
type `char(1)` while allowing for the general case `char(N)` will be
|
||||
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
|
||||
|
||||
- *with extra auto_increment*
|
||||
|
||||
The casting rule is only applied against PostgreSQL attached to a
|
||||
sequence. This can be the result of doing that manually, using a
|
||||
`serial` or a `bigserial` data type, or an `identity` column.
|
||||
|
||||
|
||||
The supported casting options are:
|
||||
|
||||
- *drop default*, *keep default*
|
||||
|
||||
When the option *drop default* is listed, pgloader drops any
|
||||
existing default expression in the MySQL database for columns of the
|
||||
source type from the `CREATE TABLE` statement it generates.
|
||||
|
||||
The spelling *keep default* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *drop not null*, *keep not null*, *set not null*
|
||||
|
||||
When the option *drop not null* is listed, pgloader drops any
|
||||
existing `NOT NULL` constraint associated with the given source
|
||||
MySQL datatype when it creates the tables in the PostgreSQL
|
||||
database.
|
||||
|
||||
The spelling *keep not null* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
When the option *set not null* is listed, pgloader sets a `NOT NULL`
|
||||
constraint on the target column regardless whether it has been set
|
||||
in the source MySQL column.
|
||||
|
||||
- *drop typemod*, *keep typemod*
|
||||
|
||||
When the option *drop typemod* is listed, pgloader drops any
|
||||
existing *typemod* definition (e.g. *precision* and *scale*) from
|
||||
the datatype definition found in the MySQL columns of the source
|
||||
type when it created the tables in the PostgreSQL database.
|
||||
|
||||
The spelling *keep typemod* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *using*
|
||||
|
||||
This option takes as its single argument the name of a function to
|
||||
be found in the `pgloader.transforms` Common Lisp package. See above
|
||||
for details.
|
||||
|
||||
It's possible to augment a default cast rule (such as one that
|
||||
applies against `ENUM` data type for example) with a *transformation
|
||||
function* by omitting entirely the `type` parts of the casting rule,
|
||||
as in the following example::
|
||||
|
||||
column enumerate.foo using empty-string-to-null
|
||||
|
||||
PostgreSQL Views Support
|
||||
------------------------
|
||||
|
||||
PostgreSQL views support allows pgloader to migrate view as if they were
|
||||
base tables. This feature then allows for on-the-fly transformation of the
|
||||
source schema, as the view definition is used rather than the base data.
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will query
|
||||
the data. It's not possible to just allow for plain `SQL` because we want to
|
||||
know a lot about the exact data types of each column involved in the query
|
||||
output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the following
|
||||
expression::
|
||||
|
||||
*name* `AS` `$$` *sql query* `$$`
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
|
||||
the beginning of the data loading, and the resulting view will then be
|
||||
dropped at the end of the data loading.
|
||||
|
||||
MATERIALIZE ALL VIEWS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
|
||||
returned by PostgreSQL rather than asking the user to specify the list.
|
||||
|
||||
PostgreSQL Partial Migration
|
||||
----------------------------
|
||||
|
||||
INCLUDING ONLY TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to limit the tables to migrate to a sublist.
|
||||
|
||||
Example::
|
||||
|
||||
including only table names matching ~/film/, 'actor' in schema 'public'
|
||||
|
||||
EXCLUDING TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to exclude table names from the migration. This filter only applies to the
|
||||
result of the *INCLUDING* filter.
|
||||
|
||||
::
|
||||
|
||||
excluding table names matching ~<ory> in schema 'public'
|
||||
|
||||
PostgreSQL Schema Transformations
|
||||
---------------------------------
|
||||
|
||||
ALTER TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
IN SCHEMA 'public'
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'public' RENAME TO 'films'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET TABLESPACE 'pg_default'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
(regexp or string).
|
||||
|
||||
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
|
||||
the level of the pgloader in-memory representation of your source database
|
||||
schema. In case of a name change, the mapping is kept and reused in the
|
||||
*foreign key* and *index* support.
|
||||
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
PostgreSQL Migration: limitations
|
||||
---------------------------------
|
||||
|
||||
The only PostgreSQL objects supported at this time in pgloader are
|
||||
extensions, schema, tables, indexes and constraints. Anything else is ignored.
|
||||
|
||||
- Views are not migrated,
|
||||
|
||||
Supporting views might require implementing a full SQL parser for the
|
||||
MySQL dialect with a porting engine to rewrite the SQL against
|
||||
PostgreSQL, including renaming functions and changing some constructs.
|
||||
|
||||
While it's not theoretically impossible, don't hold your breath.
|
||||
|
||||
- Triggers are not migrated
|
||||
|
||||
The difficulty of doing so is not yet assessed.
|
||||
|
||||
- Stored Procedures and Functions are not migrated.
|
||||
|
||||
|
||||
Default PostgreSQL Casting Rules
|
||||
--------------------------------
|
||||
|
||||
When migrating from PostgreSQL the following Casting Rules are provided::
|
||||
|
||||
type int with extra auto_increment to serial
|
||||
type bigint with extra auto_increment to bigserial
|
||||
type "character varying" to text drop typemod
|
||||
|
||||
|
||||
@ -127,7 +127,8 @@ It's possible to use the *MATERIALIZE VIEWS* clause and give both the name
|
||||
and the SQL (in MySQL dialect) definition of view, then pgloader creates the
|
||||
view before loading the data, then drops it again at the end.
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Let's start the `pgloader` command with our `sakila.load` command file::
|
||||
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
PgLoader Tutorial
|
||||
Pgloader Tutorial
|
||||
=================
|
||||
|
||||
.. include:: quickstart.rst
|
||||
.. include:: csv.rst
|
||||
.. include:: fixed.rst
|
||||
.. include:: geolite.rst
|
||||
|
||||
41
pgloader.asd
41
pgloader.asd
@ -69,6 +69,7 @@
|
||||
(:file "quoting" :depends-on ("utils"))
|
||||
(:file "catalog" :depends-on ("quoting"))
|
||||
(:file "alter-table" :depends-on ("catalog"))
|
||||
(:file "citus" :depends-on ("catalog"))
|
||||
|
||||
;; State, monitoring, reporting
|
||||
(:file "reject" :depends-on ("state"))
|
||||
@ -95,6 +96,7 @@
|
||||
:components
|
||||
((:file "connection")
|
||||
(:file "pgsql-ddl")
|
||||
(:file "pgsql-ddl-citus")
|
||||
(:file "pgsql-schema")
|
||||
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
|
||||
(:file "pgsql-trigger")
|
||||
@ -149,40 +151,37 @@
|
||||
;(:file "syslog") ; experimental...
|
||||
|
||||
(:module "sqlite"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "sqlite-cast-rules")
|
||||
(:file "sqlite-schema"
|
||||
:depends-on ("sqlite-cast-rules"))
|
||||
(:file "sqlite"
|
||||
:depends-on ("sqlite-cast-rules"
|
||||
"sqlite-schema"))))
|
||||
(:file "sqlite-schema")
|
||||
(:file "sqlite")))
|
||||
|
||||
(:module "mssql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mssql-cast-rules")
|
||||
(:file "mssql-schema"
|
||||
:depends-on ("mssql-cast-rules"))
|
||||
(:file "mssql"
|
||||
:depends-on ("mssql-cast-rules"
|
||||
"mssql-schema"))
|
||||
(:file "mssql-index-filters"
|
||||
:depends-on ("mssql"))))
|
||||
(:file "mssql-schema")
|
||||
(:file "mssql")
|
||||
(:file "mssql-index-filters")))
|
||||
|
||||
(:module "mysql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mysql-cast-rules")
|
||||
(:file "mysql-connection")
|
||||
(:file "mysql-schema"
|
||||
:depends-on ("mysql-connection"
|
||||
"mysql-cast-rules"))
|
||||
;; (:file "mysql-csv"
|
||||
;; :depends-on ("mysql-schema"))
|
||||
(:file "mysql"
|
||||
:depends-on ("mysql-cast-rules"
|
||||
"mysql-schema"))))))
|
||||
(:file "mysql-schema")
|
||||
(:file "mysql")))
|
||||
|
||||
(:module "pgsql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components ((:file "pgsql-cast-rules")
|
||||
(:file "pgsql-schema")
|
||||
(:file "pgsql")))))
|
||||
|
||||
;; package pgloader.copy
|
||||
(:module "pg-copy"
|
||||
@ -243,10 +242,12 @@
|
||||
(:file "command-cast-rules")
|
||||
(:file "command-materialize-views")
|
||||
(:file "command-alter-table")
|
||||
(:file "command-distribute")
|
||||
(:file "command-mysql")
|
||||
(:file "command-including-like")
|
||||
(:file "command-mssql")
|
||||
(:file "command-sqlite")
|
||||
(:file "command-pgsql")
|
||||
(:file "command-archive")
|
||||
(:file "command-parser")
|
||||
(:file "parse-sqlite-type-name")
|
||||
|
||||
@ -9,10 +9,16 @@
|
||||
;;; :cl+ssl in its system definition.
|
||||
;;;
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
;; So that we can #+pgloader-image some code away, see main.lisp
|
||||
(push :pgloader-image *features*)
|
||||
|
||||
(in-package #:cl-user)
|
||||
;;;
|
||||
;;; We need to support *print-circle* for the debug traces of the catalogs,
|
||||
;;; and while at it let's enforce *print-pretty* too.
|
||||
;;;
|
||||
(setf *print-circle* t *print-pretty* t)
|
||||
|
||||
(defun close-foreign-libs ()
|
||||
"Close Foreign libs in use by pgloader at application save time."
|
||||
@ -30,10 +36,8 @@
|
||||
;; handles some context and things around loading with CFFI.
|
||||
(cl+ssl:reload)))
|
||||
|
||||
#|
|
||||
#+ccl (push #'open-foreign-libs *lisp-startup-functions*)
|
||||
#+sbcl (push #'open-foreign-libs sb-ext:*init-hooks*)
|
||||
|#
|
||||
|
||||
#+ccl (push #'close-foreign-libs *save-exit-functions*)
|
||||
#+sbcl (push #'close-foreign-libs sb-ext:*save-hooks*)
|
||||
@ -42,6 +46,10 @@
|
||||
;;; Register all loaded systems in the image, so that ASDF don't search for
|
||||
;;; them again when doing --self-upgrade
|
||||
;;;
|
||||
|
||||
;;; FIXME: this idea kept failing.
|
||||
|
||||
#|
|
||||
(defun register-preloaded-system (system)
|
||||
(unless (string= "pgloader" (asdf::coerce-name system))
|
||||
(let ((version (slot-value system 'asdf::version)))
|
||||
@ -59,3 +67,4 @@
|
||||
(asdf:find-system system-name)))
|
||||
when (typep o 'asdf:load-source-op)
|
||||
append (asdf:input-files o c)))
|
||||
|#
|
||||
|
||||
@ -74,13 +74,16 @@
|
||||
(incf task-count)))
|
||||
|
||||
(lp:task-handler-bind
|
||||
((copy-init-error
|
||||
(#+pgloader-image
|
||||
(copy-init-error
|
||||
#'(lambda (condition)
|
||||
;; everything has been handled already
|
||||
;; stop the other tasks and then transfer the control
|
||||
(log-message :log "COPY INIT ERROR")
|
||||
(lp:invoke-transfer-error condition)))
|
||||
(on-error-stop
|
||||
#'(lambda (condition)
|
||||
;; everything has been handled already
|
||||
(log-message :log "ON ERROR STOP")
|
||||
(lp:kill-tasks :default)
|
||||
(lp:invoke-transfer-error condition)))
|
||||
#+pgloader-image
|
||||
(error
|
||||
|
||||
@ -42,7 +42,10 @@
|
||||
(handler-case
|
||||
(with-pgsql-connection (pgconn)
|
||||
(setf pgsql-catalog
|
||||
(fetch-pgsql-catalog (db-name pgconn) :table (target copy)))
|
||||
(fetch-pgsql-catalog (db-name pgconn)
|
||||
:table (target copy)
|
||||
:variant (pgconn-variant pgconn)
|
||||
:pgversion (pgconn-major-version pgconn)))
|
||||
|
||||
;; if the user didn't tell us the column list of the table, now is
|
||||
;; a proper time to set it in the copy object
|
||||
@ -95,6 +98,8 @@
|
||||
(loop :for path-spec :in path-list
|
||||
:count t
|
||||
:do (let ((table-source (clone-copy-for copy path-spec)))
|
||||
(when (and (header table-source) (null (fields table-source)))
|
||||
(parse-header table-source))
|
||||
(incf task-count
|
||||
(copy-from table-source
|
||||
:concurrency concurrency
|
||||
|
||||
@ -46,6 +46,12 @@
|
||||
(with-stats-collection ("Create SQL Types" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
;; some SQL types come from extensions (ip4r, hstore, etc)
|
||||
(create-extensions catalog
|
||||
:include-drop include-drop
|
||||
:if-not-exists t
|
||||
:client-min-messages :error)
|
||||
|
||||
(create-sqltypes catalog
|
||||
:include-drop include-drop
|
||||
:client-min-messages :error))
|
||||
@ -64,9 +70,11 @@
|
||||
;;
|
||||
;; to be able to do that properly, get the constraints from
|
||||
;; the pre-existing target database catalog
|
||||
(let ((pgsql-catalog
|
||||
(fetch-pgsql-catalog (db-name (target-db copy))
|
||||
:source-catalog catalog)))
|
||||
(let* ((pgversion (pgconn-major-version (target-db copy)))
|
||||
(pgsql-catalog
|
||||
(fetch-pgsql-catalog (db-name (target-db copy))
|
||||
:source-catalog catalog
|
||||
:pgversion pgversion)))
|
||||
(merge-catalogs catalog pgsql-catalog))
|
||||
|
||||
;; now the foreign keys and only then the indexes, because a
|
||||
@ -110,6 +118,20 @@
|
||||
:include-drop include-drop
|
||||
:client-min-messages :error))))
|
||||
|
||||
;; Citus Support
|
||||
;;
|
||||
;; We need a separate transaction here in some cases, because of the
|
||||
;; distributed DDL support from Citus, to avoid the following error:
|
||||
;;
|
||||
;; ERROR Database error 25001: cannot establish a new connection for
|
||||
;; placement 2299, since DDL has been executed on a connection that is in
|
||||
;; use
|
||||
;;
|
||||
(when (catalog-distribution-rules catalog)
|
||||
(with-pgsql-transaction (:pgconn (target-db copy))
|
||||
(with-stats-collection ("Citus Distribute Tables" :section :pre)
|
||||
(create-distributed-table (catalog-distribution-rules catalog)))))
|
||||
|
||||
;; log the catalog we just fetched and (maybe) merged
|
||||
(log-message :data "CATALOG: ~s" catalog))
|
||||
|
||||
@ -207,9 +229,11 @@
|
||||
:reset-sequences reset-sequences))))
|
||||
|
||||
|
||||
(defun process-catalog (copy catalog &key alter-table alter-schema)
|
||||
(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
|
||||
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
|
||||
rewriting, pgloader level alter schema and alter table commands."
|
||||
(log-message :info "Processing source catalogs")
|
||||
|
||||
;; cast the catalog into something PostgreSQL can work on
|
||||
(cast catalog)
|
||||
|
||||
@ -223,7 +247,13 @@
|
||||
;; if asked, now alter the catalog with given rules: the alter-table
|
||||
;; keyword parameter actually contains a set of alter table rules.
|
||||
(when alter-table
|
||||
(alter-table catalog alter-table)))
|
||||
(alter-table catalog alter-table))
|
||||
|
||||
;; we also support schema changes necessary for Citus distribution
|
||||
(when distribute
|
||||
(log-message :info "Applying distribution rules")
|
||||
(setf (catalog-distribution-rules catalog)
|
||||
(citus-distribute-schema catalog distribute))))
|
||||
|
||||
|
||||
;;;
|
||||
@ -249,6 +279,8 @@
|
||||
(reset-sequences t)
|
||||
(foreign-keys t)
|
||||
(reindex nil)
|
||||
(after-schema nil)
|
||||
distribute
|
||||
only-tables
|
||||
including
|
||||
excluding
|
||||
@ -289,19 +321,33 @@
|
||||
|
||||
(copy-kernel (make-kernel worker-count))
|
||||
(copy-channel (let ((lp:*kernel* copy-kernel)) (lp:make-channel)))
|
||||
(catalog (fetch-metadata
|
||||
copy
|
||||
(make-catalog
|
||||
:name (typecase (source-db copy)
|
||||
(db-connection (db-name (source-db copy)))
|
||||
(fd-connection (pathname-name
|
||||
(fd-path (source-db copy))))))
|
||||
:materialize-views materialize-views
|
||||
:create-indexes create-indexes
|
||||
:foreign-keys foreign-keys
|
||||
:only-tables only-tables
|
||||
:including including
|
||||
:excluding excluding))
|
||||
(catalog (handler-case
|
||||
(fetch-metadata
|
||||
copy
|
||||
(make-catalog
|
||||
:name (typecase (source-db copy)
|
||||
(db-connection
|
||||
(db-name (source-db copy)))
|
||||
(fd-connection
|
||||
(pathname-name
|
||||
(fd-path (source-db copy))))))
|
||||
:materialize-views materialize-views
|
||||
:create-indexes create-indexes
|
||||
:foreign-keys foreign-keys
|
||||
:only-tables only-tables
|
||||
:including including
|
||||
:excluding excluding)
|
||||
(mssql::mssql-error (e)
|
||||
(log-message :error "MSSQL ERROR: ~a" e)
|
||||
(log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
|
||||
(return-from copy-database))
|
||||
#+pgloader-image
|
||||
(condition (e)
|
||||
(log-message :error
|
||||
"~a: ~a"
|
||||
(conn-type (source-db copy))
|
||||
e)
|
||||
(return-from copy-database))))
|
||||
pkeys
|
||||
(writers-count (make-hash-table :size (count-tables catalog)))
|
||||
(max-indexes (when create-indexes
|
||||
@ -317,23 +363,44 @@
|
||||
|
||||
;; apply catalog level transformations to support the database migration
|
||||
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
|
||||
(process-catalog copy catalog
|
||||
:alter-table alter-table
|
||||
:alter-schema alter-schema)
|
||||
(handler-case
|
||||
(process-catalog copy catalog
|
||||
:alter-table alter-table
|
||||
:alter-schema alter-schema
|
||||
:distribute distribute)
|
||||
|
||||
#+pgloader-image
|
||||
((or citus-rule-table-not-found citus-rule-is-missing-from-list) (e)
|
||||
(log-message :fatal "~a" e)
|
||||
(return-from copy-database))
|
||||
|
||||
#+pgloader-image
|
||||
(condition (e)
|
||||
(log-message :fatal "Failed to process catalogs: ~a" e)
|
||||
(return-from copy-database)))
|
||||
|
||||
;; if asked, first drop/create the tables on the PostgreSQL side
|
||||
(handler-case
|
||||
(prepare-pgsql-database copy
|
||||
catalog
|
||||
:truncate truncate
|
||||
:create-tables create-tables
|
||||
:create-schemas create-schemas
|
||||
:drop-indexes drop-indexes
|
||||
:drop-schema drop-schema
|
||||
:include-drop include-drop
|
||||
:foreign-keys foreign-keys
|
||||
:set-table-oids set-table-oids
|
||||
:materialize-views materialize-views)
|
||||
(progn
|
||||
(prepare-pgsql-database copy
|
||||
catalog
|
||||
:truncate truncate
|
||||
:create-tables create-tables
|
||||
:create-schemas create-schemas
|
||||
:drop-indexes drop-indexes
|
||||
:drop-schema drop-schema
|
||||
:include-drop include-drop
|
||||
:foreign-keys foreign-keys
|
||||
:set-table-oids set-table-oids
|
||||
:materialize-views materialize-views)
|
||||
|
||||
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
|
||||
;; to run it.
|
||||
(when after-schema
|
||||
(pgloader.parser::execute-sql-code-block (target-db copy)
|
||||
:pre
|
||||
after-schema
|
||||
"after schema")))
|
||||
;;
|
||||
;; In case some error happens in the preparatory transaction, we
|
||||
;; need to stop now and refrain from trying to load the data into
|
||||
|
||||
@ -51,6 +51,10 @@
|
||||
("on-error-stop" :type boolean
|
||||
:documentation "Refrain from handling errors properly.")
|
||||
|
||||
("no-ssl-cert-verification"
|
||||
:type boolean
|
||||
:documentation "Instruct OpenSSL to bypass verifying certificates.")
|
||||
|
||||
(("context" #\C) :type string :documentation "Command Context Variables")
|
||||
|
||||
(("with") :type string :list t :optional t
|
||||
@ -197,6 +201,7 @@
|
||||
client-min-messages log-min-messages summary
|
||||
root-dir self-upgrade
|
||||
with set field cast type encoding before after
|
||||
no-ssl-cert-verification
|
||||
regress)
|
||||
options
|
||||
|
||||
@ -238,6 +243,11 @@
|
||||
|
||||
;; Then process options
|
||||
(when debug
|
||||
(format t "pgloader version ~a~%" *version-string*)
|
||||
#+pgloader-image
|
||||
(format t "compiled with ~a ~a~%"
|
||||
(lisp-implementation-type)
|
||||
(lisp-implementation-version))
|
||||
#+sbcl
|
||||
(format t "sb-impl::*default-external-format* ~s~%"
|
||||
sb-impl::*default-external-format*)
|
||||
@ -249,11 +259,15 @@
|
||||
(lisp-implementation-type)
|
||||
(lisp-implementation-version)))
|
||||
|
||||
(when help
|
||||
(when (or help)
|
||||
(usage argv))
|
||||
|
||||
(when (or help version) (uiop:quit +os-code-success+))
|
||||
|
||||
(when (null arguments)
|
||||
(usage argv)
|
||||
(uiop:quit +os-code-error-usage+))
|
||||
|
||||
(when list-encodings
|
||||
(show-encodings)
|
||||
(uiop:quit +os-code-success+))
|
||||
@ -316,6 +330,9 @@
|
||||
(uiop:native-namestring *log-filename*))
|
||||
(log-message :log "Data errors in '~a'~%" *root-dir*)
|
||||
|
||||
(when no-ssl-cert-verification
|
||||
(setf cl+ssl:*make-ssl-client-stream-verify-default* nil))
|
||||
|
||||
(cond
|
||||
((and regress (= 1 (length arguments)))
|
||||
(process-regression-test (first arguments)))
|
||||
|
||||
@ -93,8 +93,9 @@
|
||||
(:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2))
|
||||
(:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4))
|
||||
(:syb-int8 (mem-ref data :int8))
|
||||
(:syb-real (mem-ref data :float))
|
||||
(:syb-flt8 (mem-ref data :double))
|
||||
((:syb-datetime :syb-datetime4 :syb-msdate)
|
||||
((:syb-datetime :syb-datetime4 :syb-msdate :syb-mstime)
|
||||
(with-foreign-pointer (%buf +numeric-buf-sz+)
|
||||
(let ((count
|
||||
(%dbconvert %dbproc
|
||||
|
||||
@ -49,8 +49,9 @@
|
||||
|
||||
#:catalog
|
||||
#:schema
|
||||
#:table
|
||||
#:extension
|
||||
#:sqltype
|
||||
#:table
|
||||
#:column
|
||||
#:index
|
||||
#:fkey
|
||||
@ -76,12 +77,15 @@
|
||||
#:catalog-name
|
||||
#:catalog-schema-list
|
||||
#:catalog-types-without-btree
|
||||
#:catalog-distribution-rules
|
||||
|
||||
#:schema-name
|
||||
#:schema-catalog
|
||||
#:schema-source-name
|
||||
#:schema-table-list
|
||||
#:schema-view-list
|
||||
#:schema-extension-list
|
||||
#:schema-sqltype-list
|
||||
#:schema-in-search-path
|
||||
|
||||
#:table-name
|
||||
@ -90,17 +94,23 @@
|
||||
#:table-oid
|
||||
#:table-comment
|
||||
#:table-storage-parameter-list
|
||||
#:table-tablespace
|
||||
#:table-field-list
|
||||
#:table-column-list
|
||||
#:table-index-list
|
||||
#:table-fkey-list
|
||||
#:table-trigger-list
|
||||
#:table-citus-rule
|
||||
|
||||
#:extension-name
|
||||
#:extension-schema
|
||||
|
||||
#:sqltype-name
|
||||
#:sqltype-schema
|
||||
#:sqltype-type
|
||||
#:sqltype-source-def
|
||||
#:sqltype-extra
|
||||
#:sqltype-extension
|
||||
|
||||
#:column-name
|
||||
#:column-type-name
|
||||
@ -110,6 +120,7 @@
|
||||
#:column-comment
|
||||
#:column-transform
|
||||
#:column-extra
|
||||
#:column-transform-default
|
||||
|
||||
#:index-name
|
||||
#:index-type
|
||||
@ -152,9 +163,15 @@
|
||||
|
||||
#:table-list
|
||||
#:view-list
|
||||
#:extension-list
|
||||
#:sqltype-list
|
||||
#:add-schema
|
||||
#:find-schema
|
||||
#:maybe-add-schema
|
||||
#:add-extension
|
||||
#:find-extension
|
||||
#:maybe-add-extension
|
||||
#:add-sqltype
|
||||
#:add-table
|
||||
#:find-table
|
||||
#:maybe-add-table
|
||||
@ -174,6 +191,7 @@
|
||||
#:count-indexes
|
||||
#:count-fkeys
|
||||
#:max-indexes-per-table
|
||||
#:field-name
|
||||
|
||||
#:push-to-end
|
||||
#:with-schema
|
||||
@ -194,6 +212,17 @@
|
||||
#:match-rule-action
|
||||
#:match-rule-args
|
||||
|
||||
#:citus-reference-rule
|
||||
#:citus-distributed-rule
|
||||
#:make-citus-reference-rule
|
||||
#:make-citus-distributed-rule
|
||||
#:citus-reference-rule-rule
|
||||
#:citus-distributed-rule-table
|
||||
#:citus-distributed-rule-using
|
||||
#:citus-distributed-rule-from
|
||||
#:citus-format-sql-select
|
||||
#:citus-backfill-table-p
|
||||
|
||||
#:format-table-name))
|
||||
|
||||
(defpackage #:pgloader.state
|
||||
@ -260,7 +289,30 @@
|
||||
(defpackage #:pgloader.queries
|
||||
(:use #:cl #:pgloader.params)
|
||||
(:export #:*queries*
|
||||
#:sql))
|
||||
#:sql
|
||||
#:sql-url-for-variant))
|
||||
|
||||
(defpackage #:pgloader.citus
|
||||
(:use #:cl
|
||||
#:pgloader.params
|
||||
#:pgloader.catalog
|
||||
#:pgloader.quoting
|
||||
#:pgloader.monitor)
|
||||
(:export #:citus-distribute-schema
|
||||
#:citus-format-sql-select
|
||||
#:citus-backfill-table-p
|
||||
#:citus-rule-table-not-found
|
||||
#:citus-rule-is-missing-from-list
|
||||
|
||||
#:citus-reference-rule
|
||||
#:citus-reference-rule-p
|
||||
#:citus-reference-rule-table
|
||||
|
||||
#:citus-distributed-rule
|
||||
#:citus-distributed-rule-p
|
||||
#:citus-distributed-rule-table
|
||||
#:citus-distributed-rule-using
|
||||
#:citus-distributed-rule-from))
|
||||
|
||||
(defpackage #:pgloader.utils
|
||||
(:use #:cl
|
||||
@ -269,7 +321,8 @@
|
||||
#:pgloader.quoting
|
||||
#:pgloader.catalog
|
||||
#:pgloader.monitor
|
||||
#:pgloader.state)
|
||||
#:pgloader.state
|
||||
#:pgloader.citus)
|
||||
(:import-from #:alexandria
|
||||
#:appendf
|
||||
#:read-file-into-string)
|
||||
@ -300,7 +353,8 @@
|
||||
(cl-user::export-inherited-symbols "pgloader.quoting" "pgloader.utils")
|
||||
(cl-user::export-inherited-symbols "pgloader.catalog" "pgloader.utils")
|
||||
(cl-user::export-inherited-symbols "pgloader.monitor" "pgloader.utils")
|
||||
(cl-user::export-inherited-symbols "pgloader.state" "pgloader.utils"))
|
||||
(cl-user::export-inherited-symbols "pgloader.state" "pgloader.utils")
|
||||
(cl-user::export-inherited-symbols "pgloader.citus" "pgloader.utils"))
|
||||
|
||||
|
||||
;;
|
||||
@ -389,6 +443,7 @@
|
||||
#:truncate-tables
|
||||
#:set-table-oids
|
||||
|
||||
#:create-extensions
|
||||
#:create-sqltypes
|
||||
#:create-schemas
|
||||
#:add-to-search-path
|
||||
@ -408,6 +463,11 @@
|
||||
#:reset-sequences
|
||||
#:comment-on-tables-and-columns
|
||||
|
||||
#:create-distributed-table
|
||||
|
||||
#:make-including-expr-from-catalog
|
||||
#:make-including-expr-from-view-names
|
||||
|
||||
;; finalizing catalogs support (redshift and other variants)
|
||||
#:finalize-catalogs
|
||||
#:adjust-data-types
|
||||
@ -417,6 +477,7 @@
|
||||
#:process-index-definitions
|
||||
|
||||
;; postgresql introspection queries
|
||||
#:list-all-sqltypes
|
||||
#:list-all-columns
|
||||
#:list-all-indexes
|
||||
#:list-all-fkeys
|
||||
@ -674,6 +735,14 @@
|
||||
#:*mysql-default-cast-rules*
|
||||
#:with-mysql-connection))
|
||||
|
||||
(defpackage #:pgloader.source.pgsql
|
||||
(:use #:cl
|
||||
#:pgloader.params #:pgloader.utils #:pgloader.connection
|
||||
#:pgloader.sources #:pgloader.pgsql #:pgloader.catalog)
|
||||
(:import-from #:pgloader.transforms #:precision #:scale)
|
||||
(:export #:copy-pgsql
|
||||
#:*pgsql-default-cast-rules*))
|
||||
|
||||
(defpackage #:pgloader.source.sqlite
|
||||
(:use #:cl
|
||||
#:pgloader.params #:pgloader.utils #:pgloader.connection
|
||||
@ -763,6 +832,9 @@
|
||||
(:import-from #:pgloader.source.copy
|
||||
#:copy-copy
|
||||
#:copy-connection)
|
||||
(:import-from #:pgloader.source.pgsql
|
||||
#:copy-pgsql
|
||||
#:*pgsql-default-cast-rules*)
|
||||
(:import-from #:pgloader.source.mysql
|
||||
#:copy-mysql
|
||||
#:mysql-connection
|
||||
@ -785,6 +857,7 @@
|
||||
(:export #:parse-commands
|
||||
#:parse-commands-from-file
|
||||
#:initialize-context
|
||||
#:execute-sql-code-block
|
||||
|
||||
;; tools to enable complete cli parsing in main.lisp
|
||||
#:process-relative-pathnames
|
||||
|
||||
@ -40,11 +40,11 @@
|
||||
|
||||
(in-package :pgloader.params)
|
||||
|
||||
(defparameter *release* nil
|
||||
(defparameter *release* t
|
||||
"non-nil when this build is a release build.")
|
||||
|
||||
(defparameter *major-version* "3.5")
|
||||
(defparameter *minor-version* "2")
|
||||
(defparameter *major-version* "3.6")
|
||||
(defparameter *minor-version* "1")
|
||||
|
||||
(defun git-hash ()
|
||||
"Return the current abbreviated git hash of the development tree."
|
||||
|
||||
@ -47,9 +47,14 @@
|
||||
(bind (((_ _ parameters _) stmt))
|
||||
(list #'pgloader.catalog::alter-table-set-storage-parameters parameters))))
|
||||
|
||||
(defrule set-tablespace (and kw-set kw-tablespace quoted-namestring)
|
||||
(:lambda (stmt)
|
||||
(list #'pgloader.catalog::alter-table-set-tablespace (third stmt))))
|
||||
|
||||
(defrule alter-table-action (or rename-to
|
||||
set-schema
|
||||
set-storage-parameters))
|
||||
set-storage-parameters
|
||||
set-tablespace))
|
||||
|
||||
(defrule alter-table-command (and alter-table-names-matching
|
||||
(? in-schema)
|
||||
|
||||
@ -134,7 +134,8 @@
|
||||
option-fields-terminated-by
|
||||
option-trim-unquoted-blanks
|
||||
option-keep-unquoted-blanks
|
||||
option-csv-escape-mode))
|
||||
option-csv-escape-mode
|
||||
option-null-if))
|
||||
|
||||
(defrule csv-options (and kw-with
|
||||
(and csv-option (* (and comma csv-option))))
|
||||
@ -231,11 +232,6 @@
|
||||
(destructuring-bind (field1 fields) source
|
||||
(list* field1 fields))))
|
||||
|
||||
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
|
||||
(:constant :open-paren))
|
||||
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
|
||||
(:constant :close-paren))
|
||||
|
||||
(defrule having-fields (and kw-having kw-fields) (:constant nil))
|
||||
|
||||
(defrule csv-source-field-list (and (? having-fields)
|
||||
@ -434,26 +430,35 @@
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn :pre before "before load")
|
||||
|
||||
(let ((on-error-stop (getf ',options :on-error-stop))
|
||||
(truncate (getf ',options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(max-parallel-create-index (getf ',options :max-parallel-create-index))
|
||||
(source
|
||||
(make-instance 'copy-csv
|
||||
:target-db ,pg-db-conn
|
||||
:source source-db
|
||||
:target (create-table ',target-table-name)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
:columns ',columns
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:worker-count
|
||||
:concurrency
|
||||
:truncate
|
||||
:drop-indexes
|
||||
:disable-triggers
|
||||
:max-parallel-create-index)))))
|
||||
(let* ((on-error-stop (getf ',options :on-error-stop))
|
||||
(truncate (getf ',options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(max-parallel-create-index (getf ',options :max-parallel-create-index))
|
||||
(fields
|
||||
',(let ((null-as (getf options :null-as)))
|
||||
(if null-as
|
||||
(mapcar (lambda (field)
|
||||
(if (member :null-as field) field
|
||||
(append field (list :null-as null-as))))
|
||||
fields)
|
||||
fields)))
|
||||
(source
|
||||
(make-instance 'copy-csv
|
||||
:target-db ,pg-db-conn
|
||||
:source source-db
|
||||
:target (create-table ',target-table-name)
|
||||
:encoding ,encoding
|
||||
:fields fields
|
||||
:columns ',columns
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:null-as
|
||||
:worker-count
|
||||
:concurrency
|
||||
:truncate
|
||||
:drop-indexes
|
||||
:disable-triggers
|
||||
:max-parallel-create-index)))))
|
||||
(copy-database source
|
||||
,@ (when worker-count
|
||||
(list :worker-count worker-count))
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
(defrule doubled-at-sign (and "@@") (:constant "@"))
|
||||
(defrule doubled-colon (and "::") (:constant ":"))
|
||||
(defrule password (+ (or (not "@") doubled-at-sign)) (:text t))
|
||||
(defrule username (and (or #\_ (alpha-char-p character))
|
||||
(defrule username (and (or #\_ (alpha-char-p character) (digit-char-p character))
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\.
|
||||
@ -87,10 +87,11 @@
|
||||
(append (list :host (when host (process-hostname host)))
|
||||
port))))
|
||||
|
||||
(defrule dsn-dbname (and "/" (? maybe-quoted-namestring))
|
||||
(:destructure (slash dbname)
|
||||
(declare (ignore slash))
|
||||
(list :dbname dbname)))
|
||||
(defrule dsn-dbname (and "/" (? (* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
punct))))
|
||||
(:lambda (dbn)
|
||||
(list :dbname (text (second dbn)))))
|
||||
|
||||
(defrule dsn-option-ssl-disable "disable" (:constant :no))
|
||||
(defrule dsn-option-ssl-allow "allow" (:constant :try))
|
||||
|
||||
73
src/parsers/command-distribute.lisp
Normal file
73
src/parsers/command-distribute.lisp
Normal file
@ -0,0 +1,73 @@
|
||||
#|
|
||||
distribute billers using id
|
||||
distribute bills using biller_id
|
||||
distribute receivable_accounts using biller_id
|
||||
distribute payments using biller_id
|
||||
|
||||
distribute splits using biller_id
|
||||
from receivable_accounts
|
||||
|
||||
distribute ach_accounts as reference table
|
||||
|#
|
||||
|
||||
(in-package :pgloader.parser)
|
||||
|
||||
(defun create-table-from-dsn-table-name (dsn-table-name
|
||||
&optional (schema-name "public"))
|
||||
(let ((table (create-table (cdr (second dsn-table-name)))))
|
||||
(unless (table-schema table)
|
||||
(setf (table-schema table)
|
||||
(make-schema :catalog nil
|
||||
:source-name schema-name
|
||||
:name (apply-identifier-case schema-name))))
|
||||
table))
|
||||
|
||||
(defrule distribute-reference (and kw-distribute dsn-table-name
|
||||
kw-as kw-reference kw-table)
|
||||
(:lambda (d-r)
|
||||
(make-citus-reference-rule :table (create-table-from-dsn-table-name d-r))))
|
||||
|
||||
(defrule distribute-using (and kw-distribute dsn-table-name
|
||||
kw-using maybe-quoted-namestring)
|
||||
(:lambda (d-u)
|
||||
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u)
|
||||
:using (make-column :name (fourth d-u)))))
|
||||
|
||||
;;;
|
||||
;;; The namestring rule allows for commas and we use them as a separator
|
||||
;;; here, so we need to have our own table name parsing. That's a bummer,
|
||||
;;; maybe we should revisit the whole table names parsing code?
|
||||
;;;
|
||||
(defrule distribute-from-tablename
|
||||
(or double-quoted-namestring
|
||||
quoted-namestring
|
||||
(and (or #\_ (alpha-char-p character))
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)))))
|
||||
(:text t))
|
||||
|
||||
(defrule maybe-qualified-dist-from-table-name
|
||||
(and distribute-from-tablename (? (and "." distribute-from-tablename)))
|
||||
(:lambda (name)
|
||||
(if (second name)
|
||||
(cons (first name) (second (second name)))
|
||||
(cons "public" (first name)))))
|
||||
|
||||
(defrule distribute-from-list (+ (and maybe-qualified-dist-from-table-name
|
||||
(? (and "," ignore-whitespace))))
|
||||
(:lambda (from-list)
|
||||
(mapcar #'first from-list)))
|
||||
|
||||
(defrule distribute-using-from (and kw-distribute dsn-table-name
|
||||
kw-using maybe-quoted-namestring
|
||||
kw-from distribute-from-list)
|
||||
(:lambda (d-u-f)
|
||||
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u-f)
|
||||
:using (make-column :name (fourth d-u-f))
|
||||
:from (mapcar #'create-table (sixth d-u-f)))))
|
||||
|
||||
(defrule distribute-commands (+ (or distribute-using-from
|
||||
distribute-using
|
||||
distribute-reference))
|
||||
(:lambda (commands)
|
||||
(cons :distribute commands)))
|
||||
@ -26,6 +26,7 @@
|
||||
(def-keyword-rule "with")
|
||||
(def-keyword-rule "when")
|
||||
(def-keyword-rule "set")
|
||||
(def-keyword-rule "tablespace")
|
||||
(def-keyword-rule "database")
|
||||
(def-keyword-rule "messages")
|
||||
(def-keyword-rule "matches")
|
||||
@ -103,6 +104,9 @@
|
||||
(def-keyword-rule "trim")
|
||||
(def-keyword-rule "unquoted")
|
||||
(def-keyword-rule "delimiter")
|
||||
;; option for Citus support
|
||||
(def-keyword-rule "distribute")
|
||||
(def-keyword-rule "reference")
|
||||
;; option for MySQL imports
|
||||
(def-keyword-rule "schema")
|
||||
(def-keyword-rule "schemas")
|
||||
|
||||
@ -6,11 +6,11 @@
|
||||
;;;
|
||||
(in-package #:pgloader.parser)
|
||||
|
||||
(defrule view-name (and (alpha-char-p character)
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\_)))
|
||||
(:text t))
|
||||
(defrule view-name (or qualified-table-name maybe-quoted-namestring)
|
||||
(:lambda (vn)
|
||||
(etypecase vn
|
||||
(cons vn)
|
||||
(string (cons nil vn)))))
|
||||
|
||||
(defrule view-sql (and kw-as dollar-quoted)
|
||||
(:destructure (as sql) (declare (ignore as)) sql))
|
||||
@ -18,7 +18,7 @@
|
||||
(defrule view-definition (and view-name (? view-sql))
|
||||
(:destructure (name sql) (cons name sql)))
|
||||
|
||||
(defrule another-view-definition (and comma view-definition)
|
||||
(defrule another-view-definition (and comma-separator view-definition)
|
||||
(:lambda (source)
|
||||
(bind (((_ view) source)) view)))
|
||||
|
||||
|
||||
@ -83,6 +83,8 @@
|
||||
casts
|
||||
alter-schema
|
||||
alter-table
|
||||
materialize-views
|
||||
distribute-commands
|
||||
before-load
|
||||
after-load
|
||||
including-like-in-schema
|
||||
@ -139,7 +141,8 @@
|
||||
(defun lisp-code-for-loading-from-mssql (ms-db-conn pg-db-conn
|
||||
&key
|
||||
gucs mssql-gucs
|
||||
casts before after options
|
||||
casts before after
|
||||
options distribute views
|
||||
alter-schema alter-table
|
||||
including excluding
|
||||
&allow-other-keys)
|
||||
@ -167,6 +170,8 @@
|
||||
:excluding ',excluding
|
||||
:alter-schema ',alter-schema
|
||||
:alter-table ',alter-table
|
||||
:materialize-views ',views
|
||||
:distribute ',distribute
|
||||
:set-table-oids t
|
||||
:on-error-stop on-error-stop
|
||||
,@(remove-batch-control-option options))
|
||||
@ -177,8 +182,8 @@
|
||||
(:lambda (source)
|
||||
(bind (((ms-db-uri pg-db-uri
|
||||
&key
|
||||
gucs mssql-gucs casts before after
|
||||
alter-schema alter-table
|
||||
gucs mssql-gucs casts views before after
|
||||
alter-schema alter-table distribute
|
||||
including excluding options)
|
||||
source))
|
||||
(cond (*dry-run*
|
||||
@ -188,10 +193,12 @@
|
||||
:gucs gucs
|
||||
:mssql-gucs mssql-gucs
|
||||
:casts casts
|
||||
:views views
|
||||
:before before
|
||||
:after after
|
||||
:alter-schema alter-schema
|
||||
:alter-table alter-table
|
||||
:distribute distribute
|
||||
:options options
|
||||
:including including
|
||||
:excluding excluding))))))
|
||||
|
||||
@ -89,15 +89,13 @@
|
||||
excluding-matching
|
||||
decoding-tables-as
|
||||
before-load
|
||||
after-load))
|
||||
after-load
|
||||
distribute-commands))
|
||||
(:lambda (clauses-list)
|
||||
(alexandria:alist-plist clauses-list)))
|
||||
|
||||
(defrule mysql-prefix "mysql://" (:constant (list :type :mysql)))
|
||||
|
||||
(defrule mysql-dsn-dbname (and "/" maybe-quoted-namestring)
|
||||
(:lambda (m-d-d) (list :dbname (text (second m-d-d)))))
|
||||
|
||||
(defrule mysql-dsn-option-usessl-true "true" (:constant :yes))
|
||||
(defrule mysql-dsn-option-usessl-false "false" (:constant :no))
|
||||
|
||||
@ -123,7 +121,7 @@
|
||||
(defrule mysql-uri (and mysql-prefix
|
||||
(? dsn-user-password)
|
||||
(? dsn-hostname)
|
||||
mysql-dsn-dbname
|
||||
dsn-dbname
|
||||
(? mysql-dsn-options))
|
||||
(:lambda (uri)
|
||||
(destructuring-bind (&key type
|
||||
@ -167,7 +165,7 @@
|
||||
&key
|
||||
gucs mysql-gucs
|
||||
casts views before after options
|
||||
alter-table alter-schema
|
||||
alter-table alter-schema distribute
|
||||
((:including incl))
|
||||
((:excluding excl))
|
||||
((:decoding decoding-as))
|
||||
@ -194,6 +192,7 @@
|
||||
:materialize-views ',views
|
||||
:alter-table ',alter-table
|
||||
:alter-schema ',alter-schema
|
||||
:distribute ',distribute
|
||||
:set-table-oids t
|
||||
:on-error-stop on-error-stop
|
||||
,@(remove-batch-control-option options))
|
||||
@ -206,7 +205,7 @@
|
||||
pg-db-uri
|
||||
&key
|
||||
gucs mysql-gucs casts views before after options
|
||||
alter-table alter-schema
|
||||
alter-table alter-schema distribute
|
||||
including excluding decoding)
|
||||
source
|
||||
(cond (*dry-run*
|
||||
@ -222,6 +221,7 @@
|
||||
:options options
|
||||
:alter-table alter-table
|
||||
:alter-schema alter-schema
|
||||
:distribute distribute
|
||||
:including including
|
||||
:excluding excluding
|
||||
:decoding decoding))))))
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
load-copy-file
|
||||
load-dbf-file
|
||||
load-ixf-file
|
||||
load-pgsql-database
|
||||
load-mysql-database
|
||||
load-mssql-database
|
||||
load-sqlite-database
|
||||
@ -160,12 +161,12 @@
|
||||
(declare (ignore abs paths no-path-p))
|
||||
(let ((dotted-parts (reverse (sq:split-sequence #\. filename))))
|
||||
(when (<= 2 (length dotted-parts))
|
||||
(destructuring-bind (extension name-or-ext &rest parts)
|
||||
(destructuring-bind (ext name-or-ext &rest parts)
|
||||
dotted-parts
|
||||
(declare (ignore parts))
|
||||
(if (string-equal "tar" name-or-ext) :archive
|
||||
(loop :for (type . extensions) :in *data-source-filename-extensions*
|
||||
:when (member extension extensions :test #'string-equal)
|
||||
:when (member ext extensions :test #'string-equal)
|
||||
:return type)))))))
|
||||
|
||||
(defvar *parse-rule-for-source-types*
|
||||
@ -266,6 +267,7 @@
|
||||
(:dbf 'dbf-option)
|
||||
(:ixf 'ixf-option)
|
||||
(:sqlite 'sqlite-option)
|
||||
(:pgsql 'pgsql-option)
|
||||
(:mysql 'mysql-option)
|
||||
(:mssql 'mysql-option))
|
||||
option))))
|
||||
|
||||
171
src/parsers/command-pgsql.lisp
Normal file
171
src/parsers/command-pgsql.lisp
Normal file
@ -0,0 +1,171 @@
|
||||
;;;
|
||||
;;; Parse the pgloader commands grammar
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.parser)
|
||||
|
||||
;;;
|
||||
;;; PostgreSQL options
|
||||
;;;
|
||||
(defrule pgsql-option (or option-on-error-stop
|
||||
option-on-error-resume-next
|
||||
option-workers
|
||||
option-concurrency
|
||||
option-batch-rows
|
||||
option-batch-size
|
||||
option-prefetch-rows
|
||||
option-max-parallel-create-index
|
||||
option-reindex
|
||||
option-truncate
|
||||
option-disable-triggers
|
||||
option-data-only
|
||||
option-schema-only
|
||||
option-include-drop
|
||||
option-drop-schema
|
||||
option-create-tables
|
||||
option-create-indexes
|
||||
option-index-names
|
||||
option-reset-sequences
|
||||
option-foreign-keys
|
||||
option-identifiers-case))
|
||||
|
||||
(defrule pgsql-options (and kw-with
|
||||
(and pgsql-option (* (and comma pgsql-option))))
|
||||
(:function flatten-option-list))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Including only some tables or excluding some others
|
||||
;;;
|
||||
(defrule including-matching-in-schema-filter
|
||||
(and kw-including kw-only kw-table kw-names kw-matching filter-list-matching
|
||||
kw-in kw-schema quoted-namestring)
|
||||
(:lambda (source)
|
||||
(bind (((_ _ _ _ _ filter-list _ _ schema) source))
|
||||
(cons schema filter-list))))
|
||||
|
||||
(defrule including-matching-in-schema
|
||||
(and including-matching-in-schema-filter
|
||||
(* including-matching-in-schema-filter))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (inc1 incs) source
|
||||
(cons :including (list* inc1 incs)))))
|
||||
|
||||
(defrule excluding-matching-in-schema-filter
|
||||
(and kw-excluding kw-table kw-names kw-matching filter-list-matching
|
||||
kw-in kw-schema quoted-namestring)
|
||||
(:lambda (source)
|
||||
(bind (((_ _ _ _ filter-list _ _ schema) source))
|
||||
(cons schema filter-list))))
|
||||
|
||||
(defrule excluding-matching-in-schema
|
||||
(and excluding-matching-in-schema-filter
|
||||
(* excluding-matching-in-schema-filter))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (excl1 excls) source
|
||||
(cons :excluding (list* excl1 excls)))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Allow clauses to appear in any order
|
||||
;;;
|
||||
(defrule load-pgsql-optional-clauses (* (or pgsql-options
|
||||
gucs
|
||||
casts
|
||||
alter-table
|
||||
alter-schema
|
||||
materialize-views
|
||||
including-matching-in-schema
|
||||
excluding-matching-in-schema
|
||||
decoding-tables-as
|
||||
before-load
|
||||
after-schema
|
||||
after-load
|
||||
distribute-commands))
|
||||
(:lambda (clauses-list)
|
||||
(alexandria:alist-plist clauses-list)))
|
||||
|
||||
(defrule pgsql-source (and kw-load kw-database kw-from pgsql-uri)
|
||||
(:lambda (source) (bind (((_ _ _ uri) source)) uri)))
|
||||
|
||||
(defrule load-pgsql-command (and pgsql-source target
|
||||
load-pgsql-optional-clauses)
|
||||
(:lambda (command)
|
||||
(destructuring-bind (source target clauses) command
|
||||
`(,source ,target ,@clauses))))
|
||||
|
||||
|
||||
;;; LOAD DATABASE FROM pgsql://
|
||||
(defun lisp-code-for-pgsql-dry-run (pg-src-db-conn pg-dst-db-conn)
|
||||
`(lambda ()
|
||||
(log-message :log "DRY RUN, only checking connections.")
|
||||
(check-connection ,pg-src-db-conn)
|
||||
(check-connection ,pg-dst-db-conn)))
|
||||
|
||||
(defun lisp-code-for-loading-from-pgsql (pg-src-db-conn pg-dst-db-conn
|
||||
&key
|
||||
gucs
|
||||
casts options
|
||||
before after after-schema
|
||||
alter-table alter-schema
|
||||
((:including incl))
|
||||
((:excluding excl))
|
||||
views
|
||||
distribute
|
||||
&allow-other-keys)
|
||||
`(lambda ()
|
||||
(let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
|
||||
(*cast-rules* ',casts)
|
||||
(*identifier-case* :quote)
|
||||
(on-error-stop (getf ',options :on-error-stop t))
|
||||
,@(pgsql-connection-bindings pg-dst-db-conn gucs)
|
||||
,@(batch-control-bindings options)
|
||||
(source
|
||||
(make-instance 'copy-pgsql
|
||||
:target-db ,pg-dst-db-conn
|
||||
:source-db ,pg-src-db-conn)))
|
||||
|
||||
,(sql-code-block pg-dst-db-conn :pre before "before load")
|
||||
|
||||
(copy-database source
|
||||
:including ',incl
|
||||
:excluding ',excl
|
||||
:materialize-views ',views
|
||||
:alter-table ',alter-table
|
||||
:alter-schema ',alter-schema
|
||||
:index-names :preserve
|
||||
:set-table-oids t
|
||||
:on-error-stop on-error-stop
|
||||
:after-schema ',after-schema
|
||||
:distribute ',distribute
|
||||
,@(remove-batch-control-option options))
|
||||
|
||||
,(sql-code-block pg-dst-db-conn :post after "after load"))))
|
||||
|
||||
(defrule load-pgsql-database load-pgsql-command
|
||||
(:lambda (source)
|
||||
(destructuring-bind (pg-src-db-uri
|
||||
pg-dst-db-uri
|
||||
&key
|
||||
gucs casts before after after-schema options
|
||||
alter-table alter-schema views distribute
|
||||
including excluding decoding)
|
||||
source
|
||||
(cond (*dry-run*
|
||||
(lisp-code-for-pgsql-dry-run pg-src-db-uri pg-dst-db-uri))
|
||||
(t
|
||||
(lisp-code-for-loading-from-pgsql pg-src-db-uri pg-dst-db-uri
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:views views
|
||||
:before before
|
||||
:after after
|
||||
:after-schema after-schema
|
||||
:options options
|
||||
:alter-table alter-table
|
||||
:alter-schema alter-schema
|
||||
:distribute distribute
|
||||
:including including
|
||||
:excluding excluding
|
||||
:decoding decoding))))))
|
||||
|
||||
@ -58,17 +58,26 @@
|
||||
(bind (((_ _ sql-list-of-list) after))
|
||||
(cons :after (apply #'append sql-list-of-list)))))
|
||||
|
||||
(defrule after-schema (and kw-after kw-create kw-schema
|
||||
(+ (or load-do load-execute)))
|
||||
(:lambda (after)
|
||||
(bind (((_ _ _ sql-list-of-list) after))
|
||||
(cons :after-schema (apply #'append sql-list-of-list)))))
|
||||
|
||||
(defun sql-code-block (pgconn section commands label)
|
||||
"Return lisp code to run COMMANDS against DBNAME, updating STATE."
|
||||
(when commands
|
||||
`(with-stats-collection (,label
|
||||
:dbname ,(db-name pgconn)
|
||||
:section ,section
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(log-message :notice "Executing SQL block for ~a" ,label)
|
||||
(with-pgsql-transaction (:pgconn ,pgconn)
|
||||
(loop for command in ',commands
|
||||
do
|
||||
(pgsql-execute command :client-min-messages :error)
|
||||
counting command)))))
|
||||
`(execute-sql-code-block ,pgconn ,section ',commands ,label)))
|
||||
|
||||
(defun execute-sql-code-block (pgconn section commands label)
|
||||
"Exceute given SQL commands."
|
||||
(with-stats-collection (label
|
||||
:dbname (db-name pgconn)
|
||||
:section section
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(log-message :notice "Executing SQL block for ~a" label)
|
||||
(with-pgsql-transaction (:pgconn pgconn)
|
||||
(loop :for command :in commands
|
||||
:do (pgsql-execute command :client-min-messages :error)
|
||||
:counting command))))
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
(defrule ignore-whitespace (* whitespace)
|
||||
(:constant nil))
|
||||
|
||||
(defrule punct (or #\, #\- #\_ #\$ #\%)
|
||||
(defrule punct (or #\- #\_ #\$ #\%)
|
||||
(:text t))
|
||||
|
||||
(defrule namestring (and (or #\_ (alpha-char-p character))
|
||||
@ -57,3 +57,11 @@
|
||||
quoted-namestring
|
||||
namestring))
|
||||
|
||||
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
|
||||
(:constant :open-paren))
|
||||
|
||||
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
|
||||
(:constant :close-paren))
|
||||
|
||||
(defrule comma-separator (and ignore-whitespace #\, ignore-whitespace)
|
||||
(:constant ","))
|
||||
|
||||
@ -38,11 +38,12 @@
|
||||
:for ragged-end := (when end
|
||||
(cond ((member name '(:msecs :usecs))
|
||||
;; take any number of digits up to
|
||||
;; the specified field lenght
|
||||
;; the specified field length
|
||||
;; (less digits are allowed)
|
||||
(min end (length date-string)))
|
||||
(when (<= start (length date-string))
|
||||
(min end (length date-string))))
|
||||
(t end)))
|
||||
:when (and start end)
|
||||
:when (and start ragged-end)
|
||||
:append (list name (subseq date-string start ragged-end)))
|
||||
(if (or (string= year "0000")
|
||||
(string= month "00")
|
||||
|
||||
@ -14,16 +14,22 @@
|
||||
(defrule pgpass-escaped-char (and #\\ (or #\\ #\:))
|
||||
(:lambda (c) (second c)))
|
||||
|
||||
(defrule pgpass-ipv6-hostname (and #\[
|
||||
(+ (or (digit-char-p character) ":"))
|
||||
#\])
|
||||
(:lambda (ipv6) (text (second ipv6))))
|
||||
|
||||
(defrule pgpass-entry (or "*"
|
||||
(+ (or pgpass-escaped-char
|
||||
(+ (or pgpass-ipv6-hostname
|
||||
pgpass-escaped-char
|
||||
(pgpass-char-p character))))
|
||||
(:lambda (e) (text e)))
|
||||
|
||||
(defrule pgpass-line (and pgpass-entry #\: pgpass-entry #\:
|
||||
(defrule pgpass-line (and (? pgpass-entry) #\: pgpass-entry #\:
|
||||
pgpass-entry #\: pgpass-entry #\:
|
||||
(? pgpass-entry))
|
||||
(:lambda (pl)
|
||||
(make-pgpass :hostname (first pl)
|
||||
(make-pgpass :hostname (or (first pl) "localhost")
|
||||
:port (third pl)
|
||||
:database (fifth pl)
|
||||
:username (seventh pl)
|
||||
|
||||
@ -15,14 +15,16 @@
|
||||
(? " "))
|
||||
(:lambda (noise) (second noise)))
|
||||
|
||||
(defrule sqlite-single-typemod (and #\( (+ (digit-char-p character)) #\))
|
||||
(defrule sqlite-single-typemod (and open-paren
|
||||
(+ (digit-char-p character))
|
||||
close-paren)
|
||||
(:lambda (st) (cons (parse-integer (text (second st))) nil)))
|
||||
|
||||
(defrule sqlite-double-typemod (and #\(
|
||||
(defrule sqlite-double-typemod (and open-paren
|
||||
(+ (digit-char-p character))
|
||||
(* (or #\, #\Space))
|
||||
comma-separator
|
||||
(+ (digit-char-p character))
|
||||
#\))
|
||||
close-paren)
|
||||
(:lambda (dt) (cons (parse-integer (text (second dt)))
|
||||
(parse-integer (text (fourth dt))))))
|
||||
|
||||
@ -31,9 +33,9 @@
|
||||
(defrule sqlite-type-name (and (* extra-qualifiers)
|
||||
(+ (alpha-char-p character))
|
||||
(* extra-qualifiers)
|
||||
(* #\Space)
|
||||
ignore-whitespace
|
||||
(? sqlite-typemod)
|
||||
(* #\Space)
|
||||
ignore-whitespace
|
||||
(* extra-qualifiers))
|
||||
(:lambda (tn) (list (text (second tn))
|
||||
(fifth tn)
|
||||
|
||||
@ -118,7 +118,19 @@
|
||||
(uiop:native-namestring crt-file)))
|
||||
(pomo::*ssl-key-file* (when (and (ssl-enable-p pgconn)
|
||||
(probe-file key-file))
|
||||
(uiop:native-namestring key-file))))
|
||||
(uiop:native-namestring key-file)))
|
||||
;;
|
||||
;; It's ok to set :verify-mode to NONE here because
|
||||
;; cl+ssl:*make-ssl-client-stream-verify-default* defaults to
|
||||
;; :require and takes precedence.
|
||||
;;
|
||||
;; Only when --no-ssl-cert-verification is passed as a command line
|
||||
;; option do we set cl+ssl:*make-ssl-client-stream-verify-default*
|
||||
;; to NIL, then allowing the NONE behaviour set here.
|
||||
;;
|
||||
(ssl-context
|
||||
(CL+SSL:MAKE-CONTEXT :disabled-protocols nil
|
||||
:verify-mode CL+SSL:+SSL-VERIFY-NONE+)))
|
||||
(flet ((connect (pgconn username)
|
||||
(handler-case
|
||||
;; in some cases (client_min_messages set to debug5
|
||||
@ -128,20 +140,29 @@
|
||||
#'(lambda (w)
|
||||
(log-message :warning "~a" w)
|
||||
(muffle-warning))))
|
||||
(pomo:connect (db-name pgconn)
|
||||
(or username (db-user pgconn))
|
||||
(db-pass pgconn)
|
||||
(let ((host (db-host pgconn)))
|
||||
(if (and (consp host) (eq :unix (car host)))
|
||||
:unix
|
||||
host))
|
||||
:port (db-port pgconn)
|
||||
:use-ssl (or (pgconn-use-ssl pgconn) :no)))
|
||||
(CL+SSL:WITH-GLOBAL-CONTEXT (ssl-context :auto-free-p t)
|
||||
(pomo:connect (db-name pgconn)
|
||||
(or username (db-user pgconn))
|
||||
(db-pass pgconn)
|
||||
(let ((host (db-host pgconn)))
|
||||
(if (and (consp host) (eq :unix (car host)))
|
||||
:unix
|
||||
host))
|
||||
:port (db-port pgconn)
|
||||
:use-ssl (or (pgconn-use-ssl pgconn) :no))))
|
||||
|
||||
((or too-many-connections configuration-limit-exceeded) (e)
|
||||
(log-message :error
|
||||
"Failed to connect to ~a: ~a; will try again in ~fs"
|
||||
pgconn e *retry-connect-delay*)
|
||||
(sleep *retry-connect-delay*)))))
|
||||
(sleep *retry-connect-delay*))
|
||||
|
||||
(CL+SSL:SSL-ERROR-VERIFY (e)
|
||||
(log-message :error
|
||||
"Connecting to PostgreSQL ~a: ~a"
|
||||
(db-host pgconn) e)
|
||||
(log-message :log "You may try --no-ssl-cert-verification")
|
||||
(error e)))))
|
||||
(loop :while (null (conn-handle pgconn))
|
||||
:repeat *retry-connect-times*
|
||||
:do (setf (conn-handle pgconn) (connect pgconn username))))
|
||||
@ -389,10 +410,11 @@
|
||||
;;;
|
||||
;;; PostgreSQL 8.0.2 on i686-pc-linux-gnu, compiled by GCC gcc (GCC) 3.4.2 20041017 (Red Hat 3.4.2-6.fc3), Redshift 1.0.2058
|
||||
;;; PostgreSQL 10.1 on x86_64-apple-darwin14.5.0, compiled by Apple LLVM version 7.0.0 (clang-700.1.76), 64-bit
|
||||
;;; PostgreSQL 10.6 (Ubuntu 10.6-1.pgdg14.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4, 64-bit
|
||||
(defun parse-postgresql-version-string (version-string)
|
||||
"Parse PostgreSQL select version() output."
|
||||
(cl-ppcre:register-groups-bind (full-version maybe-variant)
|
||||
("PostgreSQL ([0-9.]+) on .*, [^,]+, (.*)" version-string)
|
||||
("PostgreSQL ([0-9.]+) [^,]+, [^,]+, (.*)" version-string)
|
||||
(let* ((version-dots (split-sequence:split-sequence #\. full-version))
|
||||
(major-version (if (= 3 (length version-dots))
|
||||
(format nil "~a.~a"
|
||||
|
||||
@ -13,17 +13,7 @@
|
||||
include-drop
|
||||
(client-min-messages :notice))
|
||||
"Create the needed data types for given CATALOG."
|
||||
(let ((sqltype-list))
|
||||
;; build the sqltype list
|
||||
(loop :for table :in (append (table-list catalog)
|
||||
(view-list catalog))
|
||||
:do (loop :for column :in (table-column-list table)
|
||||
:do (when (typep (column-type-name column) 'sqltype)
|
||||
(pushnew (column-type-name column) sqltype-list
|
||||
:test #'string-equal
|
||||
:key #'sqltype-name))))
|
||||
|
||||
;; now create the types
|
||||
(let ((sqltype-list (sqltype-list catalog)))
|
||||
(loop :for sqltype :in sqltype-list
|
||||
:when include-drop
|
||||
:count t
|
||||
@ -114,6 +104,19 @@
|
||||
:log-level log-level
|
||||
:client-min-messages client-min-messages)))))
|
||||
|
||||
(defun create-extensions (catalog
|
||||
&key
|
||||
if-not-exists
|
||||
include-drop
|
||||
(client-min-messages :notice))
|
||||
"Create all extensions from the given database CATALOG."
|
||||
(let ((sql
|
||||
(loop :for extension :in (extension-list catalog)
|
||||
:when include-drop
|
||||
:collect (format-drop-sql extension :if-exists t :cascade t)
|
||||
:collect (format-create-sql extension :if-not-exists if-not-exists))))
|
||||
(pgsql-execute sql :client-min-messages client-min-messages)))
|
||||
|
||||
(defun create-tables (catalog
|
||||
&key
|
||||
if-not-exists
|
||||
@ -150,7 +153,7 @@
|
||||
:collect (format-create-sql (trigger-procedure trigger))
|
||||
:collect (format-create-sql trigger)))))
|
||||
(pgsql-execute-with-timing section label sql-list
|
||||
:log-level :log
|
||||
:log-level :sql
|
||||
:client-min-messages client-min-messages)))
|
||||
|
||||
|
||||
@ -462,3 +465,14 @@ $$; " tables)))
|
||||
(column-name column)
|
||||
quote (column-comment column) quote)))))
|
||||
(pgsql-execute-with-timing section label sql-list)))
|
||||
|
||||
|
||||
|
||||
;;;
|
||||
;;; Citus Disitribution support
|
||||
;;;
|
||||
(defun create-distributed-table (distribute-rules)
|
||||
(let ((citus-sql
|
||||
(loop :for rule :in distribute-rules
|
||||
:collect (format-create-sql rule))))
|
||||
(pgsql-execute citus-sql)))
|
||||
|
||||
20
src/pgsql/pgsql-ddl-citus.lisp
Normal file
20
src/pgsql/pgsql-ddl-citus.lisp
Normal file
@ -0,0 +1,20 @@
|
||||
;;;
|
||||
;;; PostgreSQL Citus support for calling functions.
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.pgsql)
|
||||
|
||||
(defmethod format-create-sql ((rule citus-reference-rule)
|
||||
&key (stream nil) if-not-exists)
|
||||
(declare (ignore if-not-exists))
|
||||
(format stream "SELECT create_reference_table('~a');"
|
||||
(format-table-name (citus-reference-rule-table rule))))
|
||||
|
||||
(defmethod format-create-sql ((rule citus-distributed-rule)
|
||||
&key (stream nil) if-not-exists)
|
||||
(declare (ignore if-not-exists))
|
||||
(let* ((rule-table (citus-distributed-rule-table rule))
|
||||
(rule-col-name (column-name (citus-distributed-rule-using rule))))
|
||||
(format stream "SELECT create_distributed_table('~a', '~a');"
|
||||
(format-table-name rule-table)
|
||||
(apply-identifier-case rule-col-name))))
|
||||
@ -38,6 +38,25 @@
|
||||
(sqltype-name sqltype)
|
||||
cascade))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Extensions
|
||||
;;;
|
||||
(defmethod format-create-sql ((extension extension)
|
||||
&key (stream nil) if-not-exists)
|
||||
(format stream "CREATE EXTENSION~:[~; IF NOT EXISTS~] ~a WITH SCHEMA ~a;"
|
||||
if-not-exists
|
||||
(extension-name extension)
|
||||
(schema-name (extension-schema extension))))
|
||||
|
||||
(defmethod format-drop-sql ((extension extension)
|
||||
&key (stream nil) cascade if-exists)
|
||||
(format stream "DROP EXTENSION~:[~; IF EXISTS~] ~a~@[ CASCADE~];"
|
||||
if-exists
|
||||
(extension-name extension)
|
||||
cascade))
|
||||
|
||||
|
||||
|
||||
;;;
|
||||
;;; Tables
|
||||
@ -73,6 +92,9 @@
|
||||
(alexandria:alist-plist
|
||||
(table-storage-parameter-list table))))
|
||||
|
||||
(when (table-tablespace table)
|
||||
(format s "~%TABLESPACE ~a" (table-tablespace table)))
|
||||
|
||||
(format s ";~%"))))
|
||||
|
||||
(defmethod format-drop-sql ((table table) &key (stream nil) cascade (if-exists t))
|
||||
@ -126,26 +148,30 @@
|
||||
"Common normalized default values and their PostgreSQL spelling.")
|
||||
|
||||
(defmethod format-default-value ((column column) &key (stream nil))
|
||||
(let* ((default (column-default column))
|
||||
(clean-default (cdr (assoc default *pgsql-default-values*)))
|
||||
(transform (column-transform column)))
|
||||
(or clean-default
|
||||
(if transform
|
||||
(let* ((transformed-default
|
||||
(handler-case
|
||||
(funcall transform default)
|
||||
(condition (c)
|
||||
(log-message :warning
|
||||
"Failed to transform default value ~s: ~a"
|
||||
default c)
|
||||
;; can't transform: return nil
|
||||
nil)))
|
||||
(transformed-column
|
||||
(make-column :default transformed-default)))
|
||||
(format-default-value transformed-column))
|
||||
(if default
|
||||
(ensure-quoted default #\')
|
||||
(format stream "NULL"))))))
|
||||
(if (column-transform-default column)
|
||||
(let* ((default (column-default column))
|
||||
(clean-default (cdr (assoc default *pgsql-default-values*)))
|
||||
(transform (column-transform column)))
|
||||
(or clean-default
|
||||
(if transform
|
||||
(let* ((transformed-default
|
||||
(handler-case
|
||||
(funcall transform default)
|
||||
(condition (c)
|
||||
(log-message :warning
|
||||
"Failed to transform default value ~s: ~a"
|
||||
default c)
|
||||
;; can't transform: return nil
|
||||
nil)))
|
||||
(transformed-column
|
||||
(make-column :default transformed-default)))
|
||||
(format-default-value transformed-column))
|
||||
(if default
|
||||
(ensure-quoted default #\')
|
||||
(format stream "NULL")))))
|
||||
|
||||
;; else, when column-transform-default is nil:
|
||||
(column-default column)))
|
||||
|
||||
|
||||
;;;
|
||||
@ -181,8 +207,9 @@
|
||||
;; don't use the index schema name here, PostgreSQL doesn't
|
||||
;; like it, might be implicit from the table's schema
|
||||
;; itself...
|
||||
"ALTER TABLE ~a ADD ~a USING INDEX ~a;"
|
||||
"ALTER TABLE ~a ADD~@[ CONSTRAINT ~a~] ~a USING INDEX ~a;"
|
||||
(format-table-name table)
|
||||
(index-conname index)
|
||||
(cond ((index-primary index) "PRIMARY KEY")
|
||||
((index-unique index) "UNIQUE"))
|
||||
index-name)))
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
(in-package #:pgloader.pgsql)
|
||||
|
||||
(defun finalize-catalogs (catalog variant)
|
||||
"Finalize the target PostgreSQL catalogs, dumbing down datatypes when the
|
||||
target actually is Redshift rather than core PostgreSQL."
|
||||
;;
|
||||
;; For Core PostgreSQL, we also want to find data types names that have
|
||||
;; no Btree support and fetch alternatives. This allows for supporting
|
||||
@ -30,7 +32,9 @@
|
||||
;;
|
||||
(adjust-data-types catalog variant))
|
||||
|
||||
(defgeneric adjust-data-types (catalog variant))
|
||||
(defgeneric adjust-data-types (catalog variant)
|
||||
(:documentation
|
||||
"Adjust PostgreSQL data types depending on the variant we target."))
|
||||
|
||||
;;;
|
||||
;;; Nothing needs to be done for PostgreSQL variant :pgdg, of course.
|
||||
|
||||
@ -5,7 +5,13 @@
|
||||
(in-package :pgloader.pgsql)
|
||||
|
||||
(defun fetch-pgsql-catalog (dbname
|
||||
&key table source-catalog including excluding)
|
||||
&key
|
||||
table
|
||||
source-catalog
|
||||
including
|
||||
excluding
|
||||
(variant :pgdg)
|
||||
pgversion)
|
||||
"Fetch PostgreSQL catalogs for the target database. A PostgreSQL
|
||||
connection must be opened."
|
||||
(let* ((*identifier-case* :quote)
|
||||
@ -18,6 +24,10 @@
|
||||
|
||||
(t
|
||||
including))))
|
||||
(when (eq :pgdg variant)
|
||||
(list-all-sqltypes catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
|
||||
(list-all-columns catalog
|
||||
:table-type :table
|
||||
@ -25,17 +35,19 @@
|
||||
:excluding excluding)
|
||||
|
||||
(list-all-indexes catalog
|
||||
:including including
|
||||
:excluding excluding
|
||||
:pgversion pgversion)
|
||||
|
||||
(when (eq :pgdg variant)
|
||||
(list-all-fkeys catalog
|
||||
:including including
|
||||
:excluding excluding)
|
||||
|
||||
(list-all-fkeys catalog
|
||||
:including including
|
||||
:excluding excluding)
|
||||
|
||||
;; fetch fkey we depend on with UNIQUE indexes but that have been
|
||||
;; excluded from the target list, we still need to take care of them to
|
||||
;; be able to DROP then CREATE those indexes again
|
||||
(list-missing-fk-deps catalog)
|
||||
;; fetch fkey we depend on with UNIQUE indexes but that have been
|
||||
;; excluded from the target list, we still need to take care of them to
|
||||
;; be able to DROP then CREATE those indexes again
|
||||
(list-missing-fk-deps catalog))
|
||||
|
||||
(log-message :debug "fetch-pgsql-catalog: ~d tables, ~d indexes, ~d+~d fkeys"
|
||||
(count-tables catalog)
|
||||
@ -96,7 +108,7 @@
|
||||
(defun format-table-name-as-including-exp (table)
|
||||
"Return a table name suitable for a catalog lookup using ~ operator."
|
||||
(let ((table-name (table-name table)))
|
||||
(format nil "^~a$" (ensure-unquoted table-name))))
|
||||
(make-string-match-rule :target (ensure-unquoted table-name))))
|
||||
|
||||
(defun query-table-schema (table)
|
||||
"Get PostgreSQL schema name where to locate TABLE-NAME by following the
|
||||
@ -107,6 +119,27 @@
|
||||
(table-name table))
|
||||
:single)))
|
||||
|
||||
(defun make-including-expr-from-view-names (view-names)
|
||||
"Turn MATERIALIZING VIEWs list of view names into an INCLUDING parameter."
|
||||
(let (including current-schema)
|
||||
(loop :for (schema-name . view-name) :in view-names
|
||||
:do (let* ((schema-name
|
||||
(if schema-name
|
||||
(ensure-unquoted schema-name)
|
||||
(or
|
||||
current-schema
|
||||
(setf current-schema
|
||||
(pomo:query "select current_schema()" :single)))))
|
||||
(table-expr
|
||||
(make-string-match-rule :target (ensure-unquoted view-name)))
|
||||
(schema-entry
|
||||
(or (assoc schema-name including :test #'string=)
|
||||
(progn (push (cons schema-name nil) including)
|
||||
(assoc schema-name including :test #'string=)))))
|
||||
(push-to-end table-expr (cdr schema-entry))))
|
||||
;; return the including alist
|
||||
including))
|
||||
|
||||
|
||||
(defvar *table-type*
|
||||
'((:table . ("r" "f" "p")) ; ordinary, foreign and partitioned
|
||||
@ -116,18 +149,34 @@
|
||||
"Associate internal table type symbol with what's found in PostgreSQL
|
||||
pg_class.relkind column.")
|
||||
|
||||
(defun filter-list-to-where-clause (filter-list
|
||||
(defun filter-list-to-where-clause (schema-filter-list
|
||||
&optional
|
||||
not
|
||||
(schema-col "table_schema")
|
||||
(table-col "table_name"))
|
||||
"Given an INCLUDING or EXCLUDING clause, turn it into a PostgreSQL WHERE
|
||||
clause."
|
||||
(loop :for (schema . table-name-list) :in filter-list
|
||||
:append (mapcar (lambda (table-name)
|
||||
(format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
|
||||
schema-col schema table-col not table-name))
|
||||
table-name-list)))
|
||||
(loop :for (schema . filter-list) :in schema-filter-list
|
||||
:append (mapcar (lambda (filter)
|
||||
(typecase filter
|
||||
(string-match-rule
|
||||
(format nil "(~a = '~a' and ~a ~:[~;!~]= '~a')"
|
||||
schema-col
|
||||
schema
|
||||
table-col
|
||||
not
|
||||
(string-match-rule-target filter)))
|
||||
(regex-match-rule
|
||||
(format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
|
||||
schema-col
|
||||
schema
|
||||
table-col
|
||||
not
|
||||
(regex-match-rule-target filter)))))
|
||||
filter-list)))
|
||||
|
||||
(defun normalize-extra (extra)
|
||||
(cond ((string= "auto_increment" extra) :auto-increment)))
|
||||
|
||||
(defun list-all-columns (catalog
|
||||
&key
|
||||
@ -137,7 +186,8 @@
|
||||
&aux
|
||||
(table-type-name (cdr (assoc table-type *table-type*))))
|
||||
"Get the list of PostgreSQL column names per table."
|
||||
(loop :for (schema-name table-name table-oid name type typmod notnull default)
|
||||
(loop :for (schema-name table-name table-oid
|
||||
name type typmod notnull default extra)
|
||||
:in
|
||||
(query nil
|
||||
(format nil
|
||||
@ -156,23 +206,28 @@
|
||||
:do
|
||||
(let* ((schema (maybe-add-schema catalog schema-name))
|
||||
(table (maybe-add-table schema table-name :oid table-oid))
|
||||
(field (make-column :name name
|
||||
(field (make-column :table table
|
||||
:name name
|
||||
:type-name type
|
||||
:type-mod typmod
|
||||
:nullable (not notnull)
|
||||
:default default)))
|
||||
:default default
|
||||
:transform-default nil
|
||||
:extra (normalize-extra extra))))
|
||||
(add-field table field))
|
||||
:finally (return catalog)))
|
||||
|
||||
(defun list-all-indexes (catalog &key including excluding)
|
||||
(defun list-all-indexes (catalog &key including excluding pgversion)
|
||||
"Get the list of PostgreSQL index definitions per table."
|
||||
(loop
|
||||
:for (schema-name name oid
|
||||
table-schema table-name
|
||||
primary unique sql conname condef)
|
||||
primary unique cols sql conname condef)
|
||||
:in (query nil
|
||||
(format nil
|
||||
(sql "/pgsql/list-all-indexes.sql")
|
||||
(sql (sql-url-for-variant "pgsql"
|
||||
"list-all-indexes.sql"
|
||||
pgversion))
|
||||
including ; do we print the clause?
|
||||
(filter-list-to-where-clause including
|
||||
nil
|
||||
@ -186,17 +241,20 @@
|
||||
:do (let* ((schema (find-schema catalog schema-name))
|
||||
(tschema (find-schema catalog table-schema))
|
||||
(table (find-table tschema table-name))
|
||||
(columns (parse-index-column-names cols sql))
|
||||
(pg-index
|
||||
(make-index :name name
|
||||
(make-index :name (ensure-quoted name)
|
||||
:oid oid
|
||||
:schema schema
|
||||
:table table
|
||||
:primary primary
|
||||
:unique unique
|
||||
:columns nil
|
||||
:columns columns
|
||||
:sql sql
|
||||
:conname (unless (eq :null conname) conname)
|
||||
:condef (unless (eq :null condef) condef))))
|
||||
:conname (unless (eq :null conname)
|
||||
(ensure-quoted conname))
|
||||
:condef (unless (eq :null condef)
|
||||
condef))))
|
||||
(maybe-add-index table name pg-index :key #'index-name))
|
||||
:finally (return catalog)))
|
||||
|
||||
@ -204,7 +262,7 @@
|
||||
"Get the list of PostgreSQL index definitions per table."
|
||||
(loop
|
||||
:for (schema-name table-name fschema-name ftable-name
|
||||
conoid conname condef
|
||||
conoid pkeyoid conname condef
|
||||
cols fcols
|
||||
updrule delrule mrule deferrable deferred)
|
||||
:in (query nil
|
||||
@ -246,9 +304,13 @@
|
||||
(table (find-table schema table-name))
|
||||
(fschema (find-schema catalog fschema-name))
|
||||
(ftable (find-table fschema ftable-name))
|
||||
(pkey (find pkeyoid (table-index-list ftable)
|
||||
:test #'=
|
||||
:key #'index-oid))
|
||||
(fk
|
||||
(make-fkey :name conname
|
||||
(make-fkey :name (ensure-quoted conname)
|
||||
:oid conoid
|
||||
:pkey pkey
|
||||
:condef condef
|
||||
:table table
|
||||
:columns (split-sequence:split-sequence #\, cols)
|
||||
@ -259,6 +321,13 @@
|
||||
:match-rule (pg-fk-match-rule-to-match-clause mrule)
|
||||
:deferrable deferrable
|
||||
:initially-deferred deferred)))
|
||||
;; add the fkey reference to the pkey index too
|
||||
(unless (find conoid
|
||||
(index-fk-deps pkey)
|
||||
:test #'=
|
||||
:key #'fkey-oid)
|
||||
(push-to-end fk (index-fk-deps pkey)))
|
||||
;; check that both tables are in pgloader's scope
|
||||
(if (and table ftable)
|
||||
(add-fkey table fk)
|
||||
(log-message :notice "Foreign Key ~a is ignored, one of its table is missing from pgloader table selection"
|
||||
@ -355,3 +424,71 @@
|
||||
(sql "/pgsql/list-table-oids-from-temp-table.sql"))))
|
||||
:do (setf (gethash name oidmap) oid)))
|
||||
oidmap))
|
||||
|
||||
|
||||
|
||||
;;;
|
||||
;;; PostgreSQL specific support for extensions and user defined data types.
|
||||
;;;
|
||||
(defun list-all-sqltypes (catalog &key including excluding)
|
||||
"Set the catalog's schema extension list and sqltype list"
|
||||
(loop :for (schema-name extension-name type-name enum-values)
|
||||
:in (query nil
|
||||
(format nil
|
||||
(sql "/pgsql/list-all-sqltypes.sql")
|
||||
including ; do we print the clause?
|
||||
(filter-list-to-where-clause including
|
||||
nil
|
||||
"n.nspname"
|
||||
"c.relname")
|
||||
excluding ; do we print the clause?
|
||||
(filter-list-to-where-clause excluding
|
||||
nil
|
||||
"n.nspname"
|
||||
"c.relname")))
|
||||
:do
|
||||
(let* ((schema (maybe-add-schema catalog schema-name))
|
||||
(sqltype
|
||||
(make-sqltype :name (ensure-quoted type-name)
|
||||
:schema schema
|
||||
:type (when enum-values :enum)
|
||||
:extra (when (and enum-values
|
||||
(not (eq enum-values :null)))
|
||||
(coerce enum-values 'list)))))
|
||||
|
||||
(if (and extension-name (not (eq :null extension-name)))
|
||||
;; then create extension will create the type
|
||||
(maybe-add-extension schema extension-name)
|
||||
|
||||
;; only create a specific entry for types that we need to create
|
||||
;; ourselves, when extension is not null "create extension" is
|
||||
;; going to take care of creating the type.
|
||||
(add-sqltype schema sqltype)))
|
||||
:finally (return catalog)))
|
||||
|
||||
|
||||
|
||||
;;;
|
||||
;;; Extra utils like parsing a list of column names from an index definition.
|
||||
;;;
|
||||
(defun parse-index-column-names (columns index-definition)
|
||||
"Return a list of column names for the given index."
|
||||
(if (and columns (not (eq :null columns)))
|
||||
;; the normal case, no much parsing to do, the data has been prepared
|
||||
;; for us in the SQL query
|
||||
(split-sequence:split-sequence #\, columns)
|
||||
|
||||
;; the redshift variant case, where there's no way to string_agg or
|
||||
;; even array_to_string(array_agg(...)) and so we need to parse the
|
||||
;; index-definition instead.
|
||||
;;
|
||||
;; CREATE UNIQUE INDEX pg_amproc_opc_proc_index ON pg_amproc USING btree (amopclaid, amprocsubtype, amprocnum)
|
||||
(when index-definition
|
||||
(let ((open-paren-pos (position #\( index-definition))
|
||||
(close-paren-pos (position #\) index-definition)))
|
||||
(when (and open-paren-pos close-paren-pos)
|
||||
(mapcar (lambda (colname) (string-trim " " colname))
|
||||
(split-sequence:split-sequence #\,
|
||||
index-definition
|
||||
:start (+ 1 open-paren-pos)
|
||||
:end close-paren-pos)))))))
|
||||
|
||||
4
src/pgsql/sql/8.0/README.md
Normal file
4
src/pgsql/sql/8.0/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
Redshift is a fork of PostgreSQL 8.0, and our catalog queries must then
|
||||
target this old PostgreSQL version to work on Redshift. Parts of what we
|
||||
would usually implement in SQL is implemented in pgloader code instead, in
|
||||
order to support such an old PostgreSQL version.
|
||||
29
src/pgsql/sql/8.0/list-all-indexes.sql
Normal file
29
src/pgsql/sql/8.0/list-all-indexes.sql
Normal file
@ -0,0 +1,29 @@
|
||||
-- params: including
|
||||
-- filter-list-to-where-clause for including
|
||||
-- excluding
|
||||
-- filter-list-to-where-clause for excluding
|
||||
select n.nspname,
|
||||
i.relname,
|
||||
i.oid,
|
||||
rn.nspname,
|
||||
r.relname,
|
||||
indisprimary,
|
||||
indisunique,
|
||||
null,
|
||||
pg_get_indexdef(indexrelid),
|
||||
c.conname,
|
||||
pg_get_constraintdef(c.oid)
|
||||
from pg_index x
|
||||
join pg_class i ON i.oid = x.indexrelid
|
||||
join pg_class r ON r.oid = x.indrelid
|
||||
join pg_namespace n ON n.oid = i.relnamespace
|
||||
join pg_namespace rn ON rn.oid = r.relnamespace
|
||||
left join pg_depend d on d.classid = 'pg_class'::regclass
|
||||
and d.objid = i.oid
|
||||
and d.refclassid = 'pg_constraint'::regclass
|
||||
and d.deptype = 'i'
|
||||
left join pg_constraint c ON c.oid = d.refobjid
|
||||
where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
|
||||
~:[~*~;and (~{~a~^~&~10t or ~})~]
|
||||
~:[~*~;and (~{~a~^~&~10t and ~})~]
|
||||
order by n.nspname, r.relname;
|
||||
@ -3,17 +3,37 @@
|
||||
-- filter-list-to-where-clause for including
|
||||
-- excluding
|
||||
-- filter-list-to-where-clause for excluding
|
||||
with seqattr as
|
||||
(
|
||||
select adrelid,
|
||||
adnum,
|
||||
adsrc,
|
||||
case when adsrc ~~ 'nextval'
|
||||
then substring(pg_get_expr(d.adbin, d.adrelid)
|
||||
from '''([^'']+)'''
|
||||
)
|
||||
else null
|
||||
end as seqname
|
||||
from pg_attrdef d
|
||||
)
|
||||
select nspname, relname, c.oid, attname,
|
||||
t.oid::regtype as type,
|
||||
case when atttypmod > 0 then atttypmod - 4 else null end as typmod,
|
||||
case when atttypmod > 0
|
||||
then substring(format_type(t.oid, atttypmod) from '\d+(?:,\d+)?')
|
||||
else null
|
||||
end as typmod,
|
||||
attnotnull,
|
||||
case when atthasdef then def.adsrc end as default
|
||||
case when atthasdef then def.adsrc end as default,
|
||||
case when s.seqname is not null then 'auto_increment' end as extra
|
||||
from pg_class c
|
||||
join pg_namespace n on n.oid = c.relnamespace
|
||||
left join pg_attribute a on c.oid = a.attrelid
|
||||
join pg_type t on t.oid = a.atttypid and attnum > 0
|
||||
left join pg_attrdef def on a.attrelid = def.adrelid
|
||||
and a.attnum = def.adnum
|
||||
and a.atthasdef
|
||||
left join seqattr s on def.adrelid = s.adrelid
|
||||
and def.adnum = s.adnum
|
||||
|
||||
where nspname !~~ '^pg_' and n.nspname <> 'information_schema'
|
||||
and relkind in (~{'~a'~^, ~})
|
||||
|
||||
4
src/pgsql/sql/list-all-extensions.sql
Normal file
4
src/pgsql/sql/list-all-extensions.sql
Normal file
@ -0,0 +1,4 @@
|
||||
select nspname, extname
|
||||
from pg_extension e
|
||||
join pg_namespace n on n.oid = e.extnamespace
|
||||
where nspname !~ '^pg_';
|
||||
@ -7,7 +7,9 @@
|
||||
-- excluding (ftable)
|
||||
-- filter-list-to-where-clause for excluding
|
||||
select n.nspname, c.relname, nf.nspname, cf.relname as frelname,
|
||||
r.oid, conname,
|
||||
r.oid,
|
||||
d.refobjid as pkeyoid,
|
||||
conname,
|
||||
pg_catalog.pg_get_constraintdef(r.oid, true) as condef,
|
||||
(select string_agg(attname, ',')
|
||||
from pg_attribute
|
||||
@ -26,6 +28,9 @@
|
||||
JOIN pg_namespace n on c.relnamespace = n.oid
|
||||
JOIN pg_class cf on r.confrelid = cf.oid
|
||||
JOIN pg_namespace nf on cf.relnamespace = nf.oid
|
||||
JOIN pg_depend d on d.classid = 'pg_constraint'::regclass
|
||||
and d.objid = r.oid
|
||||
and d.refobjsubid = 0
|
||||
where r.contype = 'f'
|
||||
AND c.relkind in ('r', 'f', 'p')
|
||||
AND cf.relkind in ('r', 'f', 'p')
|
||||
|
||||
@ -9,6 +9,11 @@
|
||||
r.relname,
|
||||
indisprimary,
|
||||
indisunique,
|
||||
(select string_agg(attname, ',')
|
||||
from pg_attribute
|
||||
where attrelid = r.oid
|
||||
and array[attnum::integer] <@ indkey::integer[]
|
||||
) as cols,
|
||||
pg_get_indexdef(indexrelid),
|
||||
c.conname,
|
||||
pg_get_constraintdef(c.oid)
|
||||
@ -17,10 +22,11 @@
|
||||
join pg_class r ON r.oid = x.indrelid
|
||||
join pg_namespace n ON n.oid = i.relnamespace
|
||||
join pg_namespace rn ON rn.oid = r.relnamespace
|
||||
left join pg_constraint c ON c.conindid = i.oid
|
||||
and c.conrelid = r.oid
|
||||
-- filter out self-fkeys
|
||||
and c.confrelid <> r.oid
|
||||
left join pg_depend d on d.classid = 'pg_class'::regclass
|
||||
and d.objid = i.oid
|
||||
and d.refclassid = 'pg_constraint'::regclass
|
||||
and d.deptype = 'i'
|
||||
left join pg_constraint c ON c.oid = d.refobjid
|
||||
where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
|
||||
~:[~*~;and (~{~a~^~&~10t or ~})~]
|
||||
~:[~*~;and (~{~a~^~&~10t and ~})~]
|
||||
|
||||
43
src/pgsql/sql/list-all-sqltypes.sql
Normal file
43
src/pgsql/sql/list-all-sqltypes.sql
Normal file
@ -0,0 +1,43 @@
|
||||
--
|
||||
-- get user defined SQL types
|
||||
--
|
||||
select nt.nspname,
|
||||
extname,
|
||||
typname,
|
||||
case when enum.enumtypid is not null
|
||||
then array_agg(enum.enumlabel order by enumsortorder)
|
||||
end as enumvalues
|
||||
|
||||
from pg_class c
|
||||
join pg_namespace n on n.oid = c.relnamespace
|
||||
left join pg_attribute a on c.oid = a.attrelid and a.attnum > 0
|
||||
join pg_type t on t.oid = a.atttypid
|
||||
left join pg_namespace nt on nt.oid = t.typnamespace
|
||||
left join pg_depend d on d.classid = 'pg_type'::regclass
|
||||
and d.refclassid = 'pg_extension'::regclass
|
||||
and d.objid = t.oid
|
||||
left join pg_extension e on refobjid = e.oid
|
||||
left join pg_enum enum on enum.enumtypid = t.oid
|
||||
|
||||
where nt.nspname !~~ '^pg_' and nt.nspname <> 'information_schema'
|
||||
and n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
|
||||
and c.relkind in ('r', 'f', 'p')
|
||||
~:[~*~;and (~{~a~^~&~10t or ~})~]
|
||||
~:[~*~;and (~{~a~^~&~10t and ~})~]
|
||||
and
|
||||
( t.typrelid = 0
|
||||
or
|
||||
(select c.relkind = 'c'
|
||||
from pg_class c
|
||||
where c.oid = t.typrelid)
|
||||
)
|
||||
and not exists
|
||||
(
|
||||
select 1
|
||||
from pg_type el
|
||||
where el.oid = t.typelem
|
||||
and el.typarray = t.oid
|
||||
)
|
||||
|
||||
group by nt.nspname, extname, typname, enumtypid
|
||||
order by nt.nspname, extname, typname, enumtypid;
|
||||
84
src/save.lisp
Normal file
84
src/save.lisp
Normal file
@ -0,0 +1,84 @@
|
||||
;;;
|
||||
;;; Create a build/bin/pgloader executable from the source code, using
|
||||
;;; Quicklisp to load pgloader and its dependencies.
|
||||
;;;
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
;; ccl provides an implementation of getenv already.
|
||||
#+sbcl
|
||||
(defun getenv (name &optional default)
|
||||
"Return the current value for the environment variable NAME, or default
|
||||
when unset."
|
||||
(or (sb-ext:posix-getenv name) default))
|
||||
|
||||
;; So that we can #+pgloader-image some code away, see main.lisp
|
||||
(push :pgloader-image *features*)
|
||||
|
||||
;;;
|
||||
;;; We need to support *print-circle* for the debug traces of the catalogs,
|
||||
;;; and while at it let's enforce *print-pretty* too.
|
||||
;;;
|
||||
(setf *print-circle* t *print-pretty* t)
|
||||
|
||||
|
||||
(require :asdf) ; should work in SBCL and CCL
|
||||
|
||||
(defvar *quicklisp.lisp* "http://beta.quicklisp.org/quicklisp.lisp")
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(build-dir (uiop:merge-pathnames* "build/" cwd))
|
||||
(ql.lisp (uiop:merge-pathnames* "quicklisp.lisp" build-dir))
|
||||
(qldir (uiop:merge-pathnames* "quicklisp/" build-dir))
|
||||
(qlsetup (uiop:merge-pathnames* "setup.lisp" qldir)))
|
||||
;;
|
||||
;; We might have to install Quicklisp in build/quicklisp
|
||||
;;
|
||||
(unless (probe-file qlsetup)
|
||||
(format t "File ~a is not found, installing Quicklisp from ~a~%"
|
||||
qlsetup *quicklisp.lisp*)
|
||||
(let ((command (format nil "curl -o ~a ~a" ql.lisp *quicklisp.lisp*)))
|
||||
(format t "Running command: ~a~%" command)
|
||||
(uiop:run-program command))
|
||||
(load ql.lisp)
|
||||
(let* ((quickstart (find-package "QUICKLISP-QUICKSTART"))
|
||||
(ql-install (find-symbol "INSTALL" quickstart)))
|
||||
(funcall ql-install :path qldir :proxy (getenv "http_proxy"))))
|
||||
|
||||
;;
|
||||
;; Now that we have Quicklisp, load it and push our copy of pgloader in
|
||||
;; ql:*local-project-directories* where Quicklisp will find it.
|
||||
;;
|
||||
(format t "Loading file ~a~%" qlsetup)
|
||||
(load qlsetup)
|
||||
|
||||
(let* ((ql (find-package "QL"))
|
||||
(lpd (find-symbol "*LOCAL-PROJECT-DIRECTORIES*" ql))
|
||||
(quickload (find-symbol "QUICKLOAD" ql)))
|
||||
(push cwd (symbol-value lpd))
|
||||
|
||||
;;
|
||||
;; And finally load pgloader and its image-based hooks
|
||||
;;
|
||||
(format t "Loading system pgloader~%")
|
||||
(funcall quickload :pgloader)
|
||||
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))))
|
||||
|
||||
(defun pgloader-image-main ()
|
||||
(let ((argv #+sbcl sb-ext:*posix-argv*
|
||||
#+ccl ccl:*command-line-argument-list*))
|
||||
(pgloader::main argv)))
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(build-dir (uiop:merge-pathnames* "build/bin/" cwd))
|
||||
(image-filename (uiop:merge-pathnames* "pgloader" build-dir)))
|
||||
#+ccl
|
||||
(ccl:save-application image-filename
|
||||
:toplevel-function #'cl-user::pgloader-image-main
|
||||
:prepend-kernel t)
|
||||
#+sbcl
|
||||
(sb-ext:save-lisp-and-die image-filename
|
||||
:toplevel #'cl-user::pgloader-image-main
|
||||
:executable t
|
||||
:save-runtime-options t
|
||||
:compression t))
|
||||
@ -95,7 +95,7 @@
|
||||
:initform nil)) ;
|
||||
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
|
||||
|
||||
(defgeneric parse-header (md-copy header)
|
||||
(defgeneric parse-header (md-copy)
|
||||
(:documentation "Parse the file header and return a list of fields."))
|
||||
|
||||
(defgeneric process-rows (md-copy stream process-fn)
|
||||
|
||||
@ -58,7 +58,8 @@
|
||||
;; otherwide, we do the full dance
|
||||
(and
|
||||
(or (and t-s-p (string= type rule-source-type)))
|
||||
(or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod))
|
||||
(or (null tm-s-p) (when typemod
|
||||
(typemod-expr-matches-p typemod-expr typemod)))
|
||||
(or (null d-s-p) (string= default rule-source-default))
|
||||
(or (null u-s-p) (eq unsigned rule-unsigned))
|
||||
(or (null n-s-p) (eq not-null rule-source-not-null))
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
|
||||
(in-package #:pgloader.sources)
|
||||
|
||||
(defmethod parse-header ((copy md-copy) header)
|
||||
(defmethod parse-header ((copy md-copy))
|
||||
"Unsupported by default, to be implemented in each md-copy subclass."
|
||||
(error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
|
||||
|
||||
@ -59,12 +59,8 @@
|
||||
;; about skipping the first line
|
||||
(loop :repeat (skip-lines copy) :do (read-line input nil nil))
|
||||
|
||||
;; we might now have to read the fields from the header line
|
||||
(when (header copy)
|
||||
(setf (fields copy)
|
||||
(parse-header copy (read-line input nil nil)))
|
||||
|
||||
(log-message :debug "Parsed header columns ~s" (fields copy)))
|
||||
;; we might now have to skip the header line
|
||||
(when (header copy) (read-line input nil nil))
|
||||
|
||||
;; read in the text file, split it into columns
|
||||
(process-rows copy input process-row-fn))))
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
nil
|
||||
col))
|
||||
(lambda (col)
|
||||
(declare (optimize speed))
|
||||
(if (string= null-as col) nil col))))
|
||||
|
||||
(field-name-as-symbol (field-name-or-list)
|
||||
@ -116,7 +115,7 @@
|
||||
sexp))
|
||||
(t sexp)))))
|
||||
`(lambda (row)
|
||||
(declare (optimize speed) (type list row))
|
||||
(declare (type list row))
|
||||
(destructuring-bind (&optional ,@args &rest extra) row
|
||||
(declare (ignorable ,@args) (ignore extra))
|
||||
(let ,values
|
||||
|
||||
@ -57,19 +57,29 @@
|
||||
;;;
|
||||
;;; Read a file format in CSV format, and call given function on each line.
|
||||
;;;
|
||||
(defmethod parse-header ((csv copy-csv) header)
|
||||
(defmethod parse-header ((csv copy-csv))
|
||||
"Parse the header line given csv setup."
|
||||
;; a field entry is a list of field name and options
|
||||
(mapcar #'list
|
||||
(car ; parsing a single line
|
||||
(cl-csv:read-csv header
|
||||
:separator (csv-separator csv)
|
||||
:quote (csv-quote csv)
|
||||
:escape (csv-escape csv)
|
||||
:unquoted-empty-string-is-nil t
|
||||
:quoted-empty-string-is-nil nil
|
||||
:trim-outer-whitespace (csv-trim-blanks csv)
|
||||
:newline (csv-newline csv)))))
|
||||
(with-connection (cnx (source csv)
|
||||
:direction :input
|
||||
:external-format (encoding csv)
|
||||
:if-does-not-exist nil)
|
||||
(let ((input (md-strm cnx)))
|
||||
(loop :repeat (skip-lines csv) :do (read-line input nil nil))
|
||||
(let* ((header-line (read-line input nil nil))
|
||||
(field-name-list
|
||||
(mapcar #'list ; we need each field to be a list
|
||||
(car ; parsing a single line
|
||||
(cl-csv:read-csv header-line
|
||||
:separator (csv-separator csv)
|
||||
:quote (csv-quote csv)
|
||||
:escape (csv-escape csv)
|
||||
:unquoted-empty-string-is-nil t
|
||||
:quoted-empty-string-is-nil nil
|
||||
:trim-outer-whitespace (csv-trim-blanks csv)
|
||||
:newline (csv-newline csv))))))
|
||||
(log-message :notice "Parsed header columns ~s" (fields csv))
|
||||
(setf (fields csv) field-name-list )))))
|
||||
|
||||
(defmethod process-rows ((csv copy-csv) stream process-fn)
|
||||
"Process rows from STREAM according to COPY specifications and PROCESS-FN."
|
||||
|
||||
@ -88,6 +88,9 @@
|
||||
datetime-precision
|
||||
character-set-name collation-name)
|
||||
|
||||
(defmethod field-name ((field mssql-column) &key)
|
||||
(mssql-column-name field))
|
||||
|
||||
(defmethod mssql-column-ctype ((col mssql-column))
|
||||
"Build the ctype definition from the full mssql-column information."
|
||||
(let ((type (mssql-column-type col)))
|
||||
@ -125,7 +128,7 @@
|
||||
field
|
||||
(declare (ignore schema)) ; FIXME
|
||||
(let* ((ctype (mssql-column-ctype field))
|
||||
(extra (when (mssql-column-identity field) "auto_increment"))
|
||||
(extra (when (mssql-column-identity field) :auto-increment))
|
||||
(pgcol
|
||||
(apply-casting-rules table-name name type ctype default nullable extra)))
|
||||
;; the MS SQL driver smartly maps data to the proper CL type, but the
|
||||
|
||||
@ -144,8 +144,14 @@
|
||||
:columns nil
|
||||
:filter filter))
|
||||
(index
|
||||
(maybe-add-index table index-name pg-index :key #'index-name)))
|
||||
(add-column index colname))
|
||||
(when table
|
||||
(maybe-add-index table index-name pg-index :key #'index-name))))
|
||||
(unless table
|
||||
(log-message :warning
|
||||
"Failed to find table ~s in schema ~s for index ~s, skipping the index"
|
||||
table-name schema-name index-name))
|
||||
(when index
|
||||
(add-column index colname)))
|
||||
:finally (return catalog)))
|
||||
|
||||
(defun list-all-fkeys (catalog &key including excluding)
|
||||
@ -195,6 +201,7 @@
|
||||
|
||||
Mostly we just use the name, and make try to avoid parsing dates."
|
||||
(case (intern (string-upcase type) "KEYWORD")
|
||||
(:time (format nil "convert(varchar, [~a], 114)" name))
|
||||
(:datetime (format nil "convert(varchar, [~a], 126)" name))
|
||||
(:smalldatetime (format nil "convert(varchar, [~a], 126)" name))
|
||||
(:date (format nil "convert(varchar, [~a], 126)" name))
|
||||
@ -206,3 +213,43 @@
|
||||
(loop :for col :in columns
|
||||
:collect (with-slots (name type) col
|
||||
(get-column-sql-expression name type))))
|
||||
|
||||
|
||||
|
||||
;;;
|
||||
;;; Materialize Views support
|
||||
;;;
|
||||
(defun create-ms-views (views-alist)
|
||||
"VIEWS-ALIST associates view names with their SQL definition, which might
|
||||
be empty for already existing views. Create only the views for which we
|
||||
have an SQL definition."
|
||||
(unless (eq :all views-alist)
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(loop :for (name . def) :in views
|
||||
:for sql := (destructuring-bind (schema . v-name) name
|
||||
(format nil
|
||||
"CREATE VIEW ~@[~s~].~s AS ~a"
|
||||
schema v-name def))
|
||||
:do (progn
|
||||
(log-message :info "MS SQL: ~a" sql)
|
||||
(mssql-query sql)))))))
|
||||
|
||||
(defun drop-ms-views (views-alist)
|
||||
"See `create-ms-views' for VIEWS-ALIST description. This time we DROP the
|
||||
views to clean out after our work."
|
||||
(unless (eq :all views-alist)
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(let ((sql
|
||||
(with-output-to-string (sql)
|
||||
(format sql "DROP VIEW ")
|
||||
(loop :for view-definition :in views
|
||||
:for i :from 0
|
||||
:do (destructuring-bind (name . def) view-definition
|
||||
(declare (ignore def))
|
||||
(format sql
|
||||
"~@[, ~]~@[~s.~]~s"
|
||||
(not (zerop i)) (car name) (cdr name)))))))
|
||||
(log-message :info "PostgreSQL Source: ~a" sql)
|
||||
(mssql-query sql))))))
|
||||
|
||||
@ -72,30 +72,63 @@
|
||||
including
|
||||
excluding)
|
||||
"MS SQL introspection to prepare the migration."
|
||||
(declare (ignore materialize-views only-tables))
|
||||
(declare (ignore only-tables))
|
||||
(with-stats-collection ("fetch meta data"
|
||||
:use-result-as-rows t
|
||||
:use-result-as-read t
|
||||
:section :pre)
|
||||
(with-connection (*mssql-db* (source-db mssql))
|
||||
(list-all-columns catalog
|
||||
:including including
|
||||
:excluding excluding)
|
||||
(with-connection (*mssql-db* (source-db mssql))
|
||||
;; If asked to MATERIALIZE VIEWS, now is the time to create them in MS
|
||||
;; SQL, when given definitions rather than existing view names.
|
||||
(when (and materialize-views (not (eq :all materialize-views)))
|
||||
(create-ms-views materialize-views))
|
||||
|
||||
(when create-indexes
|
||||
(list-all-indexes catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
(list-all-columns catalog
|
||||
:including including
|
||||
:excluding excluding)
|
||||
|
||||
(when foreign-keys
|
||||
(list-all-fkeys catalog
|
||||
;; fetch view (and their columns) metadata, covering comments too
|
||||
(let* ((view-names (unless (eq :all materialize-views)
|
||||
(mapcar #'car materialize-views)))
|
||||
(including
|
||||
(loop :for (schema-name . view-name) :in view-names
|
||||
:do (let* ((schema-name (or schema-name "dbo"))
|
||||
(schema-entry
|
||||
(or (assoc schema-name including :test #'string=)
|
||||
(progn (push (cons schema-name nil) including)
|
||||
(assoc schema-name including
|
||||
:test #'string=)))))
|
||||
(push-to-end view-name (cdr schema-entry))))))
|
||||
(cond (view-names
|
||||
(list-all-columns catalog
|
||||
:including including
|
||||
:table-type :view))
|
||||
|
||||
((eq :all materialize-views)
|
||||
(list-all-columns catalog :table-type :view))))
|
||||
|
||||
(when create-indexes
|
||||
(list-all-indexes catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
|
||||
;; return how many objects we're going to deal with in total
|
||||
;; for stats collection
|
||||
(+ (count-tables catalog) (count-indexes catalog))))
|
||||
(when foreign-keys
|
||||
(list-all-fkeys catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
|
||||
;; return how many objects we're going to deal with in total
|
||||
;; for stats collection
|
||||
(+ (count-tables catalog) (count-indexes catalog))))
|
||||
|
||||
;; be sure to return the catalog itself
|
||||
catalog)
|
||||
|
||||
|
||||
(defmethod cleanup ((mssql copy-mssql) (catalog catalog) &key materialize-views)
|
||||
"When there is a PostgreSQL error at prepare-pgsql-database step, we might
|
||||
need to clean-up any view created in the MS SQL connection for the
|
||||
migration purpose."
|
||||
(when materialize-views
|
||||
(with-connection (*mssql-db* (source-db mssql))
|
||||
(drop-ms-views materialize-views))))
|
||||
|
||||
@ -119,6 +119,10 @@
|
||||
:target (:type "timestamptz" :drop-default t :drop-not-null t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
|
||||
(:source (:type "datetime" :on-update-current-timestamp t :not-null nil)
|
||||
:target (:type "timestamptz" :drop-default t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
|
||||
(:source (:type "timestamp" :default "0000-00-00 00:00:00" :not-null t)
|
||||
:target (:type "timestamptz" :drop-default t :drop-not-null t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
@ -131,6 +135,10 @@
|
||||
:target (:type "timestamptz" :drop-default t :drop-not-null t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
|
||||
(:source (:type "timestamp" :on-update-current-timestamp t :not-null nil)
|
||||
:target (:type "timestamptz" :drop-default t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
|
||||
(:source (:type "date" :default "0000-00-00")
|
||||
:target (:type "date" :drop-default t)
|
||||
:using pgloader.transforms::zero-dates-to-null)
|
||||
@ -178,6 +186,9 @@
|
||||
(table-name name comment dtype ctype default nullable extra)))
|
||||
table-name name dtype ctype default nullable extra comment)
|
||||
|
||||
(defmethod field-name ((field mysql-column) &key)
|
||||
(mysql-column-name field))
|
||||
|
||||
(defun explode-mysql-enum (ctype)
|
||||
"Convert MySQL ENUM expression into a list of labels."
|
||||
(cl-ppcre:register-groups-bind (list)
|
||||
|
||||
@ -235,13 +235,7 @@ Illegal ~a character starting at position ~a~@[: ~a~].~%"
|
||||
(defun apply-decoding-as-filters (table-name filters)
|
||||
"Return a generialized boolean which is non-nil only if TABLE-NAME matches
|
||||
one of the FILTERS."
|
||||
(flet ((apply-filter (filter)
|
||||
;; we close over table-name here.
|
||||
(typecase filter
|
||||
(string (string-equal filter table-name))
|
||||
(list (destructuring-bind (type val) filter
|
||||
(ecase type
|
||||
(:regex (cl-ppcre:scan val table-name))))))))
|
||||
(flet ((apply-filter (filter) (matches filter table-name)))
|
||||
(some #'apply-filter filters)))
|
||||
|
||||
(defmethod instanciate-table-copy-object ((copy copy-mysql) (table table))
|
||||
|
||||
80
src/sources/pgsql/pgsql-cast-rules.lisp
Normal file
80
src/sources/pgsql/pgsql-cast-rules.lisp
Normal file
@ -0,0 +1,80 @@
|
||||
;;;
|
||||
;;; Tools to handle PostgreSQL data type casting rules
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.source.pgsql)
|
||||
|
||||
(defparameter *pgsql-default-cast-rules*
|
||||
'((:source (:type "integer" :auto-increment t)
|
||||
:target (:type "serial" :drop-default t))
|
||||
|
||||
(:source (:type "bigint" :auto-increment t)
|
||||
:target (:type "bigserial" :drop-default t))
|
||||
|
||||
(:source (:type "character varying")
|
||||
:target (:type "text" :drop-typemod t)))
|
||||
"Data Type Casting to migrate from PostgtreSQL to PostgreSQL")
|
||||
|
||||
(defmethod pgsql-column-ctype ((column column))
|
||||
"Build the ctype definition from the PostgreSQL column information."
|
||||
(let ((type-name (column-type-name column))
|
||||
(type-mod (unless (or (null (column-type-mod column))
|
||||
(eq :null (column-type-mod column)))
|
||||
(column-type-mod column))))
|
||||
(format nil "~a~@[(~a)~]" type-name type-mod)))
|
||||
|
||||
(defmethod cast ((field column) &key &allow-other-keys)
|
||||
"Return the PostgreSQL type definition from the given PostgreSQL column
|
||||
definition"
|
||||
(with-slots (pgloader.catalog::table
|
||||
pgloader.catalog::name
|
||||
pgloader.catalog::type-name
|
||||
pgloader.catalog::type-mod
|
||||
pgloader.catalog::nullable
|
||||
pgloader.catalog::default
|
||||
pgloader.catalog::comment
|
||||
pgloader.catalog::transform
|
||||
pgloader.catalog::extra)
|
||||
field
|
||||
(let* ((ctype (pgsql-column-ctype field))
|
||||
(extra (or pgloader.catalog::extra
|
||||
(when (and (stringp (column-default field))
|
||||
(search "identity" (column-default field)))
|
||||
:auto-increment)))
|
||||
(pgcol (apply-casting-rules (table-source-name pgloader.catalog::table)
|
||||
pgloader.catalog::name
|
||||
pgloader.catalog::type-name
|
||||
ctype
|
||||
pgloader.catalog::default
|
||||
pgloader.catalog::nullable
|
||||
extra)))
|
||||
;; re-install our instruction not to transform default value: it comes
|
||||
;; from PostgreSQL, and we trust it.
|
||||
(setf (column-transform-default pgcol)
|
||||
(column-transform-default field))
|
||||
|
||||
;; Redshift may be using DEFAULT getdate() instead of now()
|
||||
(let ((default (column-default pgcol)))
|
||||
(setf (column-default pgcol)
|
||||
(cond
|
||||
((and (stringp default) (string= "NULL" default))
|
||||
:null)
|
||||
|
||||
((and (stringp default) (string= "getdate()" default))
|
||||
:current-timestamp)
|
||||
|
||||
;; get rid of the identity default value, we already added
|
||||
;; an hint in the column-extra field.
|
||||
;;
|
||||
;; "identity"(347358, 0, ('1,1'::character varying)::text)
|
||||
((and (stringp default) (search "identity" default))
|
||||
:null)
|
||||
|
||||
(t (column-default pgcol))))
|
||||
|
||||
;; we usually trust defaults that come from PostgreSQL... but we
|
||||
;; also have support for Redshift.
|
||||
(when (member (column-default pgcol) '(:null :current-timestamp))
|
||||
(setf (column-transform-default pgcol) t)))
|
||||
|
||||
pgcol)))
|
||||
50
src/sources/pgsql/pgsql-schema.lisp
Normal file
50
src/sources/pgsql/pgsql-schema.lisp
Normal file
@ -0,0 +1,50 @@
|
||||
(in-package :pgloader.source.pgsql)
|
||||
|
||||
(defun create-pg-views (views-alist)
|
||||
"VIEWS-ALIST associates view names with their SQL definition, which might
|
||||
be empty for already existing views. Create only the views for which we
|
||||
have an SQL definition."
|
||||
(unless (eq :all views-alist)
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(loop :for (name . def) :in views
|
||||
:for sql := (destructuring-bind (schema . v-name) name
|
||||
(format nil
|
||||
"CREATE VIEW ~@[~s.~]~s AS ~a"
|
||||
schema v-name def))
|
||||
:do (progn
|
||||
(log-message :info "PostgreSQL Source: ~a" sql)
|
||||
#+pgloader-image
|
||||
(pgsql-execute sql)
|
||||
#-pgloader-image
|
||||
(restart-case
|
||||
(pgsql-execute sql)
|
||||
(use-existing-view ()
|
||||
:report "Use the already existing view and continue"
|
||||
nil)
|
||||
(replace-view ()
|
||||
:report
|
||||
"Replace the view with the one from pgloader's command"
|
||||
(let ((drop-sql (format nil "DROP VIEW ~a;" (car name))))
|
||||
(log-message :info "PostgreSQL Source: ~a" drop-sql)
|
||||
(pgsql-execute drop-sql)
|
||||
(pgsql-execute sql))))))))))
|
||||
|
||||
(defun drop-pg-views (views-alist)
|
||||
"See `create-pg-views' for VIEWS-ALIST description. This time we DROP the
|
||||
views to clean out after our work."
|
||||
(unless (eq :all views-alist)
|
||||
(let ((views (remove-if #'null views-alist :key #'cdr)))
|
||||
(when views
|
||||
(let ((sql
|
||||
(with-output-to-string (sql)
|
||||
(format sql "DROP VIEW ")
|
||||
(loop :for view-definition :in views
|
||||
:for i :from 0
|
||||
:do (destructuring-bind (name . def) view-definition
|
||||
(declare (ignore def))
|
||||
(format sql
|
||||
"~@[, ~]~@[~s.~]~s"
|
||||
(not (zerop i)) (car name) (cdr name)))))))
|
||||
(log-message :info "PostgreSQL Source: ~a" sql)
|
||||
(pgsql-execute sql))))))
|
||||
142
src/sources/pgsql/pgsql.lisp
Normal file
142
src/sources/pgsql/pgsql.lisp
Normal file
@ -0,0 +1,142 @@
|
||||
;;;
|
||||
;;; Read from a PostgreSQL database.
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.source.pgsql)
|
||||
|
||||
(defclass copy-pgsql (db-copy) ()
|
||||
(:documentation "pgloader PostgreSQL Data Source"))
|
||||
|
||||
(defmethod initialize-instance :after ((source copy-pgsql) &key)
|
||||
"Add a default value for transforms in case it's not been provided."
|
||||
(let* ((transforms (when (slot-boundp source 'transforms)
|
||||
(slot-value source 'transforms))))
|
||||
(when (and (slot-boundp source 'fields) (slot-value source 'fields))
|
||||
;; cast typically happens in copy-database in the schema structure,
|
||||
;; and the result is then copied into the copy-mysql instance.
|
||||
(unless (and (slot-boundp source 'columns) (slot-value source 'columns))
|
||||
(setf (slot-value source 'columns)
|
||||
(mapcar #'cast (slot-value source 'fields))))
|
||||
|
||||
(unless transforms
|
||||
(setf (slot-value source 'transforms)
|
||||
(mapcar #'column-transform (slot-value source 'columns)))))))
|
||||
|
||||
(defmethod map-rows ((pgsql copy-pgsql) &key process-row-fn)
|
||||
"Extract PostgreSQL data and call PROCESS-ROW-FN function with a single
|
||||
argument (a list of column values) for each row"
|
||||
(let ((map-reader
|
||||
;;
|
||||
;; Build a Postmodern row reader that prepares a vector of strings
|
||||
;; and call PROCESS-ROW-FN with the vector as single argument.
|
||||
;;
|
||||
(cl-postgres:row-reader (fields)
|
||||
(let ((nb-cols (length fields)))
|
||||
(loop :while (cl-postgres:next-row)
|
||||
:do (let ((row (make-array nb-cols)))
|
||||
(loop :for i :from 0
|
||||
:for field :across fields
|
||||
:do (setf (aref row i)
|
||||
(cl-postgres:next-field field)))
|
||||
(funcall process-row-fn row)))))))
|
||||
|
||||
(with-pgsql-connection ((source-db pgsql))
|
||||
(if (citus-backfill-table-p (target pgsql))
|
||||
;;
|
||||
;; SELECT dist_key, * FROM source JOIN dist ON ...
|
||||
;;
|
||||
(let ((sql (citus-format-sql-select (source pgsql) (target pgsql))))
|
||||
(log-message :sql "~a" sql)
|
||||
(cl-postgres:exec-query pomo:*database* sql map-reader))
|
||||
|
||||
;;
|
||||
;; No JOIN to add to backfill data in the SQL query here.
|
||||
;;
|
||||
(let* ((cols (mapcar #'column-name (fields pgsql)))
|
||||
(sql
|
||||
(format nil
|
||||
"SELECT ~{~s::text~^, ~} FROM ~s.~s"
|
||||
cols
|
||||
(schema-source-name (table-schema (source pgsql)))
|
||||
(table-source-name (source pgsql)))))
|
||||
(log-message :sql "~a" sql)
|
||||
(cl-postgres:exec-query pomo:*database* sql map-reader))))))
|
||||
|
||||
(defmethod copy-column-list ((pgsql copy-pgsql))
|
||||
"We are sending the data in the MySQL columns ordering here."
|
||||
(mapcar #'column-name (fields pgsql)))
|
||||
|
||||
(defmethod fetch-metadata ((pgsql copy-pgsql)
|
||||
(catalog catalog)
|
||||
&key
|
||||
materialize-views
|
||||
only-tables
|
||||
create-indexes
|
||||
foreign-keys
|
||||
including
|
||||
excluding)
|
||||
"PostgreSQL introspection to prepare the migration."
|
||||
(declare (ignore only-tables))
|
||||
(with-stats-collection ("fetch meta data"
|
||||
:use-result-as-rows t
|
||||
:use-result-as-read t
|
||||
:section :pre)
|
||||
(with-pgsql-transaction (:pgconn (source-db pgsql))
|
||||
(let ((variant (pgconn-variant (source-db pgsql)))
|
||||
(pgversion (pgconn-major-version (source-db pgsql))))
|
||||
;;
|
||||
;; First, create the source views that we're going to materialize in
|
||||
;; the target database.
|
||||
;;
|
||||
(when (and materialize-views (not (eq :all materialize-views)))
|
||||
(create-pg-views materialize-views))
|
||||
|
||||
(when (eq :pgdg variant)
|
||||
(list-all-sqltypes catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
|
||||
(list-all-columns catalog
|
||||
:including including
|
||||
:excluding excluding)
|
||||
|
||||
(let* ((view-names (unless (eq :all materialize-views)
|
||||
(mapcar #'car materialize-views)))
|
||||
(including (make-including-expr-from-view-names view-names)))
|
||||
(cond (view-names
|
||||
(list-all-columns catalog
|
||||
:including including
|
||||
:table-type :view))
|
||||
|
||||
((eq :all materialize-views)
|
||||
(list-all-columns catalog :table-type :view))))
|
||||
|
||||
(when create-indexes
|
||||
(list-all-indexes catalog
|
||||
:including including
|
||||
:excluding excluding
|
||||
:pgversion pgversion))
|
||||
|
||||
(when (and (eq :pgdg variant) foreign-keys)
|
||||
(list-all-fkeys catalog
|
||||
:including including
|
||||
:excluding excluding))
|
||||
|
||||
;; return how many objects we're going to deal with in total
|
||||
;; for stats collection
|
||||
(+ (count-tables catalog)
|
||||
(count-views catalog)
|
||||
(count-indexes catalog)
|
||||
(count-fkeys catalog)))))
|
||||
|
||||
;; be sure to return the catalog itself
|
||||
catalog)
|
||||
|
||||
|
||||
(defmethod cleanup ((pgsql copy-pgsql) (catalog catalog) &key materialize-views)
|
||||
"When there is a PostgreSQL error at prepare-pgsql-database step, we might
|
||||
need to clean-up any view created in the source PostgreSQL connection for
|
||||
the migration purpose."
|
||||
(when materialize-views
|
||||
(with-pgsql-transaction (:pgconn (source-db pgsql))
|
||||
(drop-pg-views materialize-views))))
|
||||
1
src/sources/sqlite/sql/get-create-table.sql
Normal file
1
src/sources/sqlite/sql/get-create-table.sql
Normal file
@ -0,0 +1 @@
|
||||
select sql from sqlite_master where name = '~a'
|
||||
@ -67,6 +67,47 @@
|
||||
(loop for (name) in (sqlite:execute-to-list db sql)
|
||||
collect name)))
|
||||
|
||||
(defun find-sequence (db table-name column-name)
|
||||
"Find if table-name.column-name is attached to a sequence in
|
||||
sqlite_sequence catalog."
|
||||
(let* ((sql (format nil (sql "/sqlite/find-sequence.sql") table-name))
|
||||
(seq (sqlite:execute-single db sql)))
|
||||
(when (and seq (not (zerop seq)))
|
||||
;; magic marker for `apply-casting-rules'
|
||||
(log-message :notice "SQLite column ~a.~a uses a sequence"
|
||||
table-name column-name)
|
||||
seq)))
|
||||
|
||||
(defun find-auto-increment-in-create-sql (db table-name column-name)
|
||||
"The sqlite_sequence catalog is only created when some content has been
|
||||
added to the table. So we might fail to FIND-SEQUENCE, and still need to
|
||||
consider the column has an autoincrement. Parse the SQL definition of the
|
||||
table to find out."
|
||||
(let* ((sql (format nil (sql "/sqlite/get-create-table.sql") table-name))
|
||||
(create-table (sqlite:execute-single db sql))
|
||||
(open-paren (+ 1 (position #\( create-table)))
|
||||
(close-paren (position #\) create-table :from-end t))
|
||||
(coldefs
|
||||
(mapcar (lambda (def) (string-trim (list #\Space) def))
|
||||
(split-sequence:split-sequence #\,
|
||||
create-table
|
||||
:start open-paren
|
||||
:end close-paren))))
|
||||
(loop :for coldef :in coldefs
|
||||
:do (let* ((words (mapcar (lambda (w) (string-trim '(#\" #\') w))
|
||||
(split-sequence:split-sequence #\Space coldef)))
|
||||
(colname (first words))
|
||||
(props (rest words)))
|
||||
(when (and (string= colname column-name)
|
||||
(member "autoincrement" props :test #'string-equal))
|
||||
;; we know the target column has no sequence because we
|
||||
;; looked into that first by calling find-sequence, and we
|
||||
;; only call find-auto-increment-in-create-sql when
|
||||
;; find-sequence failed to find anything.
|
||||
(log-message :notice "SQLite column ~a.~a is autoincrement, but has no sequence"
|
||||
table-name column-name)
|
||||
(return t))))))
|
||||
|
||||
(defun list-columns (table &key db-has-sequences (db *sqlite-db*) )
|
||||
"Return the list of columns found in TABLE-NAME."
|
||||
(let* ((table-name (table-source-name table))
|
||||
@ -85,17 +126,14 @@
|
||||
pk-id)))
|
||||
(when (and db-has-sequences
|
||||
(not (zerop pk-id))
|
||||
(string-equal (coldef-ctype field) "integer"))
|
||||
(string-equal (coldef-ctype field) "integer")
|
||||
(or (find-sequence db table-name name)
|
||||
(find-auto-increment-in-create-sql db
|
||||
table-name
|
||||
name)))
|
||||
;; then it might be an auto_increment, which we know by
|
||||
;; looking at the sqlite_sequence catalog
|
||||
(let* ((sql
|
||||
(format nil (sql "/sqlite/find-sequence.sql") table-name))
|
||||
(seq (sqlite:execute-single db sql)))
|
||||
(when (and seq (not (zerop seq)))
|
||||
;; magic marker for `apply-casting-rules'
|
||||
(log-message :notice "SQLite column ~a.~a uses a sequence"
|
||||
table-name name)
|
||||
(setf (coldef-extra field) :auto-increment))))
|
||||
(setf (coldef-extra field) :auto-increment))
|
||||
(add-field table field)))))
|
||||
|
||||
(defun list-all-columns (schema
|
||||
@ -150,7 +188,7 @@
|
||||
"Return the list of columns in INDEX-NAME."
|
||||
(let ((sql (format nil (sql "/sqlite/list-index-cols.sql") index-name)))
|
||||
(loop :for (index-pos table-pos col-name) :in (sqlite:execute-to-list db sql)
|
||||
:collect col-name)))
|
||||
:collect (apply-identifier-case col-name))))
|
||||
|
||||
(defun list-indexes (table &optional (db *sqlite-db*))
|
||||
"Return the list of indexes attached to TABLE."
|
||||
|
||||
@ -96,7 +96,7 @@
|
||||
"Send the data in the SQLite column ordering."
|
||||
(mapcar #'apply-identifier-case (mapcar #'coldef-name (fields sqlite))))
|
||||
|
||||
(defmethod fetch-metadata (sqlite catalog
|
||||
(defmethod fetch-metadata ((sqlite copy-sqlite) (catalog catalog)
|
||||
&key
|
||||
materialize-views
|
||||
only-tables
|
||||
|
||||
@ -75,6 +75,10 @@
|
||||
"Alter the storage parameters of TABLE."
|
||||
(setf (table-storage-parameter-list table) parameters))
|
||||
|
||||
(defun alter-table-set-tablespace (table tablespace)
|
||||
"Alter the tablespace slot of TABLE"
|
||||
(setf (table-tablespace table) tablespace))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Apply the match rules as given by the parser to a table name.
|
||||
|
||||
@ -42,32 +42,44 @@
|
||||
;;; Column structures details depend on the specific source type and are
|
||||
;;; implemented in each source separately.
|
||||
;;;
|
||||
(defstruct catalog name schema-list types-without-btree)
|
||||
(defstruct schema source-name name catalog table-list view-list in-search-path)
|
||||
(defstruct table source-name name schema oid comment storage-parameter-list
|
||||
(defstruct catalog name schema-list types-without-btree distribution-rules)
|
||||
|
||||
(defstruct schema source-name name catalog in-search-path
|
||||
table-list view-list extension-list sqltype-list)
|
||||
|
||||
(defstruct table source-name name schema oid comment
|
||||
storage-parameter-list tablespace
|
||||
;; field is for SOURCE
|
||||
;; column is for TARGET
|
||||
field-list column-list index-list fkey-list trigger-list)
|
||||
;; citus is an extra slot for citus support
|
||||
field-list column-list index-list fkey-list trigger-list citus-rule)
|
||||
|
||||
;;;
|
||||
;;; When migrating from PostgreSQL to PostgreSQL we might have to install
|
||||
;;; extensions to have data type coverage.
|
||||
;;;
|
||||
(defstruct extension name schema)
|
||||
|
||||
;;;
|
||||
;;; When migrating from another database to PostgreSQL some data types might
|
||||
;;; need to be tranformed dynamically into User Defined Types: ENUMs, SET,
|
||||
;;; etc.
|
||||
;;;
|
||||
(defstruct sqltype name schema type source-def extra)
|
||||
(defstruct sqltype name schema type source-def extra extension)
|
||||
|
||||
;;;
|
||||
;;; The generic PostgreSQL column that the CAST generic function is asked to
|
||||
;;; produce, so that we know how to CREATE TABLEs in PostgreSQL whatever the
|
||||
;;; source is.
|
||||
;;;
|
||||
(defstruct column name type-name type-mod nullable default comment transform extra)
|
||||
(defstruct column table name type-name type-mod nullable default comment
|
||||
transform extra (transform-default t))
|
||||
|
||||
;;;
|
||||
;;; Index and Foreign Keys
|
||||
;;;
|
||||
(defstruct fkey
|
||||
name oid table columns foreign-table foreign-columns condef
|
||||
name oid table columns pkey foreign-table foreign-columns condef
|
||||
update-rule delete-rule match-rule deferrable initially-deferred)
|
||||
|
||||
;;;
|
||||
@ -94,13 +106,18 @@
|
||||
;;;
|
||||
;;; Main data collection API
|
||||
;;;
|
||||
(defgeneric add-schema (object schema-name &key))
|
||||
(defgeneric add-table (object table-name &key))
|
||||
(defgeneric add-view (object view-name &key))
|
||||
(defgeneric add-column (object column &key))
|
||||
(defgeneric add-index (object index &key))
|
||||
(defgeneric add-fkey (object fkey &key))
|
||||
(defgeneric add-comment (object comment &key))
|
||||
(defgeneric add-schema (object schema-name &key))
|
||||
(defgeneric add-extension (object extension-name &key))
|
||||
(defgeneric add-table (object table-name &key))
|
||||
(defgeneric add-view (object view-name &key))
|
||||
(defgeneric add-sqltype (object column &key))
|
||||
(defgeneric add-column (object column &key))
|
||||
(defgeneric add-index (object index &key))
|
||||
(defgeneric add-fkey (object fkey &key))
|
||||
(defgeneric add-comment (object comment &key))
|
||||
|
||||
(defgeneric extension-list (object &key)
|
||||
(:documentation "Return the list of extensions found in OBJECT."))
|
||||
|
||||
(defgeneric table-list (object &key)
|
||||
(:documentation "Return the list of tables found in OBJECT."))
|
||||
@ -112,6 +129,10 @@
|
||||
(:documentation
|
||||
"Find a schema by SCHEMA-NAME in a catalog OBJECT and return the schema"))
|
||||
|
||||
(defgeneric find-extension (object extension-name &key)
|
||||
(:documentation
|
||||
"Find an extension by EXTENSION-NAME in a schema OBJECT and return the table"))
|
||||
|
||||
(defgeneric find-table (object table-name &key)
|
||||
(:documentation
|
||||
"Find a table by TABLE-NAME in a schema OBJECT and return the table"))
|
||||
@ -131,6 +152,9 @@
|
||||
(defgeneric maybe-add-schema (object schema-name &key)
|
||||
(:documentation "Add a new schema or return existing one."))
|
||||
|
||||
(defgeneric maybe-add-extension (object extension-name &key)
|
||||
(:documentation "Add a new extension or return existing one."))
|
||||
|
||||
(defgeneric maybe-add-table (object table-name &key)
|
||||
(:documentation "Add a new table or return existing one."))
|
||||
|
||||
@ -163,10 +187,44 @@
|
||||
"Cast a FIELD definition from a source database into a PostgreSQL COLUMN
|
||||
definition."))
|
||||
|
||||
(defgeneric field-name (object &key)
|
||||
(:documentation "Get the source database column name, or field-name."))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Implementation of the methods
|
||||
;;;
|
||||
(defmethod extension-list ((schema schema) &key)
|
||||
"Return the list of extensions for SCHEMA."
|
||||
(schema-extension-list schema))
|
||||
|
||||
(defmethod extension-list ((catalog catalog) &key)
|
||||
"Return the list of extensions for CATALOG."
|
||||
(apply #'append (mapcar #'extension-list (catalog-schema-list catalog))))
|
||||
|
||||
(defmethod sqltype-list ((column column) &key)
|
||||
"Return the list of sqltypes for SCHEMA."
|
||||
(when (typep (column-type-name column) 'sqltype)
|
||||
(column-type-name column)))
|
||||
|
||||
(defmethod sqltype-list ((table table) &key)
|
||||
"Return the list of sqltypes for SCHEMA."
|
||||
(mapcar #'sqltype-list (table-column-list table)))
|
||||
|
||||
(defmethod sqltype-list ((schema schema) &key)
|
||||
"Return the list of sqltypes for SCHEMA."
|
||||
(append (schema-sqltype-list schema)
|
||||
(apply #'append
|
||||
(mapcar #'sqltype-list (schema-table-list schema)))))
|
||||
|
||||
(defmethod sqltype-list ((catalog catalog) &key)
|
||||
"Return the list of sqltypes for CATALOG."
|
||||
(remove-duplicates
|
||||
(remove-if #'null
|
||||
(apply #'append
|
||||
(mapcar #'sqltype-list (catalog-schema-list catalog))))
|
||||
:test #'string-equal :key #'sqltype-name))
|
||||
|
||||
(defmethod table-list ((schema schema) &key)
|
||||
"Return the list of tables for SCHEMA."
|
||||
(schema-table-list schema))
|
||||
@ -212,6 +270,17 @@
|
||||
:in-search-path in-search-path)))
|
||||
(push-to-end schema (catalog-schema-list catalog))))
|
||||
|
||||
(defmethod add-extension ((schema schema) extension-name &key)
|
||||
"Add EXTENSION-NAME to SCHEMA and return the new extension instance."
|
||||
(let ((extension
|
||||
(make-extension :name extension-name
|
||||
:schema schema)))
|
||||
(push-to-end extension (schema-extension-list schema))))
|
||||
|
||||
(defmethod add-sqltype ((schema schema) sqltype &key)
|
||||
"Add SQLTYPE instance to SCHEMA and return SQLTYPE."
|
||||
(push-to-end sqltype (schema-sqltype-list schema)))
|
||||
|
||||
(defmethod add-table ((schema schema) table-name &key comment oid)
|
||||
"Add TABLE-NAME to SCHEMA and return the new table instance."
|
||||
(let ((table
|
||||
@ -238,6 +307,11 @@
|
||||
(find schema-name (catalog-schema-list catalog)
|
||||
:key #'schema-source-name :test 'string=))
|
||||
|
||||
(defmethod find-extension ((schema schema) extension-name &key)
|
||||
"Find EXTENSION-NAME in SCHEMA and return the EXTENSION object of this name."
|
||||
(find extension-name (schema-extension-list schema)
|
||||
:key #'extension-name :test 'string=))
|
||||
|
||||
(defmethod find-table ((schema schema) table-name &key)
|
||||
"Find TABLE-NAME in SCHEMA and return the TABLE object of this name."
|
||||
(find table-name (schema-table-list schema)
|
||||
@ -254,6 +328,12 @@
|
||||
(let ((schema (find-schema catalog schema-name)))
|
||||
(or schema (add-schema catalog schema-name))))
|
||||
|
||||
(defmethod maybe-add-extension ((schema schema) extension-name &key)
|
||||
"Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
|
||||
of the same name if it already exists in the schema table-list."
|
||||
(let ((extension (find-extension schema extension-name)))
|
||||
(or extension (add-extension schema extension-name))))
|
||||
|
||||
(defmethod maybe-add-table ((schema schema) table-name &key comment oid)
|
||||
"Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
|
||||
of the same name if it already exists in the schema table-list."
|
||||
@ -297,6 +377,9 @@
|
||||
(loop :for schema :in (catalog-schema-list catalog)
|
||||
:do (cast schema)))
|
||||
|
||||
(defmethod field-name ((column column) &key)
|
||||
(column-name column))
|
||||
|
||||
;;;
|
||||
;;; There's no simple equivalent to array_agg() in MS SQL, so the index and
|
||||
;;; fkey queries return a row per index|fkey column rather than per
|
||||
|
||||
382
src/utils/citus.lisp
Normal file
382
src/utils/citus.lisp
Normal file
@ -0,0 +1,382 @@
|
||||
;;;
|
||||
;;; Citus support in pgloader allows to declare what needs to change in the
|
||||
;;; source schema in terms of Citus concepts: reference and distributed
|
||||
;;; table.
|
||||
;;;
|
||||
|
||||
#|
|
||||
distribute billers using id
|
||||
distribute bills using biller_id
|
||||
distribute receivable_accounts using biller_id
|
||||
distribute payments using biller_id
|
||||
|
||||
distribute splits using biller_id
|
||||
from receivable_accounts
|
||||
|
||||
distribute ach_accounts as reference table
|
||||
|#
|
||||
|
||||
|
||||
(in-package #:pgloader.citus)
|
||||
|
||||
;;;
|
||||
;;; Main data structures to host our distribution rules.
|
||||
;;;
|
||||
(defstruct citus-reference-rule table)
|
||||
(defstruct citus-distributed-rule table using from)
|
||||
|
||||
(defun citus-distribute-schema (catalog distribution-rules)
|
||||
"Distribute a CATALOG with given user provided DISTRIBUTION-RULES. Return
|
||||
the list of rules applied."
|
||||
(let ((processed-rules '())
|
||||
(derived-rules
|
||||
(loop :for rule :in distribution-rules
|
||||
:append (progn
|
||||
(citus-set-table rule catalog)
|
||||
(compute-foreign-rules rule (citus-rule-table rule))))))
|
||||
|
||||
;;
|
||||
;; Apply rules only once.
|
||||
;;
|
||||
;; ERROR Database error 42P16: table ;; "campaigns" is already distributed
|
||||
;;
|
||||
;; In the PostgreSQL source case, we have the table OIDs already at this
|
||||
;; point, but in the general case we don't. Use the names to match what
|
||||
;; we did up to now.
|
||||
;;
|
||||
(loop :for rule :in (append distribution-rules derived-rules)
|
||||
:unless (member (table-source-name (citus-rule-table rule))
|
||||
processed-rules
|
||||
:key (lambda (rule)
|
||||
(table-source-name (citus-rule-table rule)))
|
||||
:test #'equal)
|
||||
:collect (progn
|
||||
(push rule processed-rules)
|
||||
(apply-citus-rule rule)
|
||||
rule))))
|
||||
|
||||
(define-condition citus-rule-table-not-found (error)
|
||||
((schema-name :initarg :schema-name
|
||||
:accessor citus-rule-table-not-found-schema-name)
|
||||
(table-name :initarg :table-name
|
||||
:accessor citus-rule-table-not-found-table-name))
|
||||
(:report
|
||||
(lambda (err stream)
|
||||
(let ((*print-circle* nil))
|
||||
(with-slots (schema-name table-name)
|
||||
err
|
||||
(format stream
|
||||
"Could not find table ~s in schema ~s for distribution rules."
|
||||
table-name schema-name))))))
|
||||
|
||||
(defun citus-find-table (catalog table)
|
||||
(let* ((table-name (cdr (table-source-name table)))
|
||||
(schema-name (schema-name (table-schema table))))
|
||||
(or (find-table (find-schema catalog schema-name) table-name)
|
||||
(error (make-condition 'citus-rule-table-not-found
|
||||
:table-name table-name
|
||||
:schema-name schema-name)))))
|
||||
|
||||
(defgeneric citus-rule-table (rule)
|
||||
(:documentation "Returns the RULE's table.")
|
||||
(:method ((rule citus-reference-rule)) (citus-reference-rule-table rule))
|
||||
(:method ((rule citus-distributed-rule)) (citus-distributed-rule-table rule)))
|
||||
|
||||
(defgeneric citus-set-table (rule catalog)
|
||||
(:documentation "Find citus RULE table in CATALOG and update the
|
||||
placeholder with the table found there.")
|
||||
(:method ((rule citus-reference-rule) (catalog catalog))
|
||||
(let ((table (citus-reference-rule-table rule)))
|
||||
(setf (citus-reference-rule-table rule)
|
||||
(citus-find-table catalog table))))
|
||||
|
||||
(:method ((rule citus-distributed-rule) (catalog catalog))
|
||||
(let ((table (citus-distributed-rule-table rule)))
|
||||
(map-into (citus-distributed-rule-from rule)
|
||||
(lambda (from) (citus-find-table catalog from))
|
||||
(citus-distributed-rule-from rule))
|
||||
(setf (citus-distributed-rule-table rule)
|
||||
(citus-find-table catalog table)))))
|
||||
|
||||
(defmethod print-object ((rule citus-reference-rule) stream)
|
||||
(print-unreadable-object (rule stream :type t :identity t)
|
||||
(with-slots (table) rule
|
||||
(format stream
|
||||
"distribute ~a as reference"
|
||||
(format-table-name table)))))
|
||||
|
||||
(defmethod print-object ((rule citus-distributed-rule) stream)
|
||||
(print-unreadable-object (rule stream :type t :identity t)
|
||||
(with-slots (table using from) rule
|
||||
(format stream
|
||||
"distribute ~a :using ~a~@[ :from ~{~a~^, ~}~]"
|
||||
(format-table-name table)
|
||||
(column-name using)
|
||||
(mapcar #'format-table-name from)))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; When distributing a table on a given key, we can follow foreign keys
|
||||
;;; pointing to this table. We might find out that when computing the
|
||||
;;; following rule:
|
||||
;;;
|
||||
;;; distribute companies using id
|
||||
;;;
|
||||
;;; We then want to add the set of rules that we find walking the foreign
|
||||
;;; keys:
|
||||
;;;
|
||||
;;; distribute campaigns using company_id
|
||||
;;; distribute ads using company_id from campaigns
|
||||
;;; distribute clicks using company_id from ads, campaigns
|
||||
;;; distribute impressions using company_id from ads, campaigns
|
||||
;;;
|
||||
(defgeneric compute-foreign-rules (rule table &key)
|
||||
(:documentation
|
||||
"Compute rules to apply that derive from the distribution rule RULE when
|
||||
following foreign-keys from TABLE."))
|
||||
|
||||
(defmethod compute-foreign-rules ((rule citus-reference-rule)
|
||||
(table table)
|
||||
&key)
|
||||
"There's nothing to do here, reference table doesn't impact the schema."
|
||||
nil)
|
||||
|
||||
(defmethod compute-foreign-rules ((rule citus-distributed-rule)
|
||||
(table table)
|
||||
&key fkey-list)
|
||||
"Find every foreign key that points to TABLE and add return a list of new
|
||||
rules for the source of those foreign keys."
|
||||
(let ((pkey (find-if #'index-primary (table-index-list table))))
|
||||
|
||||
(when (and pkey (member (column-name (citus-distributed-rule-using rule))
|
||||
(index-columns pkey)
|
||||
:test #'string=))
|
||||
(loop :for fkey :in (index-fk-deps pkey)
|
||||
:for new-fkey-list := (cons fkey fkey-list)
|
||||
:for new-rule := (make-distributed-table-from-fkey rule new-fkey-list)
|
||||
:collect new-rule :into new-rule-list
|
||||
:collect (compute-foreign-rules rule (fkey-table fkey)
|
||||
:fkey-list new-fkey-list)
|
||||
:into dep-rule-list
|
||||
:finally (return (append new-rule-list
|
||||
;; flatten sub-lists as we go
|
||||
(apply #'append dep-rule-list)))))))
|
||||
|
||||
(defun make-distributed-table-from-fkey (rule fkey-list)
|
||||
"Make a new Citus distributed table rule from an existing rule and a fkey
|
||||
definition."
|
||||
;;
|
||||
;; We have a list of foreign keys pointing from a current table,
|
||||
;; (fkey-table fkey), to the root table that is distributed,
|
||||
;; (fkey-foreign-table fkey).
|
||||
;;
|
||||
;; For the distribution key name, we consider the name of the column used
|
||||
;; in the last entry from the fkey-list, the column name that points to
|
||||
;; the root.id distribution key and might be named root_id or something.
|
||||
;;
|
||||
;; Then we only need to specifying USING the intermediate tables, the last
|
||||
;; entry gives us the data we need to backfill our tables.
|
||||
;;
|
||||
(let* ((fkey (car (last fkey-list)))
|
||||
(dist-key (column-name (citus-distributed-rule-using rule)))
|
||||
(dist-key-pos (position dist-key
|
||||
(fkey-foreign-columns fkey)
|
||||
:test #'string=))
|
||||
(fkey-table-dist-key (nth dist-key-pos (fkey-columns fkey)))
|
||||
(from-table-list (butlast (mapcar #'fkey-foreign-table fkey-list))))
|
||||
(make-citus-distributed-rule :table (fkey-table (first fkey-list))
|
||||
:using (make-column :name fkey-table-dist-key)
|
||||
:from from-table-list)))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Apply a citus distribution rule to given table, and store the rule
|
||||
;;; itself to the table-citus-rule slot so that we later know to generate a
|
||||
;;; proper SELECT query that includes the backfilling.
|
||||
;;;
|
||||
(define-condition citus-rule-is-missing-from-list (error)
|
||||
((rule :initarg :rule :accessor citus-rule))
|
||||
(:report
|
||||
(lambda (err stream)
|
||||
(let ((*print-circle* nil))
|
||||
(format stream
|
||||
"Failed to add column ~s to table ~a for lack of a FROM clause in the distribute rule:~% distribute ~a using ~a from ?"
|
||||
(column-name (citus-distributed-rule-using (citus-rule err)))
|
||||
(format-table-name (citus-distributed-rule-table (citus-rule err)))
|
||||
(format-table-name (citus-distributed-rule-table (citus-rule err)))
|
||||
(column-name (citus-distributed-rule-using (citus-rule err))))))))
|
||||
|
||||
(defgeneric apply-citus-rule (rule)
|
||||
(:documentation "Apply a Citus distribution RULE to given TABLE."))
|
||||
|
||||
(defmethod apply-citus-rule ((rule citus-reference-rule))
|
||||
;; for a reference table, we have nothing to do really.
|
||||
(setf (table-citus-rule (citus-reference-rule-table rule)) rule)
|
||||
t)
|
||||
|
||||
(defmethod apply-citus-rule ((rule citus-distributed-rule))
|
||||
;; ok now we need to check if the USING column exists or if we need to add
|
||||
;; it to our model
|
||||
(setf (table-citus-rule (citus-distributed-rule-table rule)) rule)
|
||||
|
||||
(let* ((table (citus-distributed-rule-table rule))
|
||||
(column (find (column-name (citus-distributed-rule-using rule))
|
||||
(table-field-list table)
|
||||
:test #'string=
|
||||
:key #'field-name)))
|
||||
(if column
|
||||
|
||||
;; add it to the PKEY definition, in first position
|
||||
(add-column-to-pkey table
|
||||
(column-name (citus-distributed-rule-using rule)))
|
||||
|
||||
;; The column doesn't exist, we need to find it in the :FROM rule's
|
||||
;; list. The :FROM slot of the rule is a list of tables to
|
||||
;; "traverse" when backfilling the data. The list follows the
|
||||
;; foreign-key relationships from TABLE to the source of the
|
||||
;; distribution key.
|
||||
;;
|
||||
;; To find the column definition to add to the current TABLE, look
|
||||
;; it up in the last entry of the FROM rule's list.
|
||||
(let* ((last-from-rule (car (last (citus-distributed-rule-from rule))))
|
||||
(column-definition
|
||||
(when last-from-rule
|
||||
(find (column-name (citus-distributed-rule-using rule))
|
||||
(table-field-list last-from-rule)
|
||||
:test #'string=
|
||||
:key #'column-name)))
|
||||
(new-column
|
||||
(when column-definition
|
||||
(make-column :name (column-name column-definition)
|
||||
:type-name (column-type-name column-definition)
|
||||
:nullable (column-nullable column-definition)
|
||||
:transform (column-transform column-definition)))))
|
||||
|
||||
(if column-definition
|
||||
(progn
|
||||
;;
|
||||
;; Here also we need to add the new column to the PKEY
|
||||
;; definition, in first position.
|
||||
;;
|
||||
(add-column-to-pkey table (column-name new-column))
|
||||
|
||||
;;
|
||||
;; We need to backfill the distribution key in the data,
|
||||
;; which we're implementing with a JOIN when we SELECT from
|
||||
;; the source table. We add the new field here.
|
||||
;;
|
||||
(push new-column (table-field-list table))
|
||||
(push new-column (table-column-list table)))
|
||||
|
||||
;;
|
||||
;; We don't have any table-field-list in the citus rule,
|
||||
;; meaning that the distribute ... using ... clause is lacking
|
||||
;; the FROM part, and we need it.
|
||||
;;
|
||||
(error
|
||||
(make-condition 'citus-rule-is-missing-from-list :rule rule)))))))
|
||||
|
||||
|
||||
(defun add-column-to-pkey (table column-name)
|
||||
"Add COLUMN in the first position of the TABLE's primary key index."
|
||||
(let* ((index (find-if #'index-primary (table-index-list table)))
|
||||
(idxcol (when index
|
||||
(find column-name (index-columns index) :test #'string=))))
|
||||
(when (and index (null idxcol))
|
||||
;; add a new column
|
||||
(push column-name (index-columns index))
|
||||
;; now remove origin schema sql and condef, we need to redo them
|
||||
(setf (index-sql index) nil)
|
||||
(setf (index-condef index) nil)
|
||||
|
||||
;; now tweak the fkey definitions that are using this index
|
||||
(loop :for fkey :in (index-fk-deps index)
|
||||
:do (push column-name (fkey-columns fkey))
|
||||
:do (push column-name (fkey-foreign-columns fkey))
|
||||
:do (setf (fkey-condef fkey) nil)))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Format a query for backfilling the data right from pgloader:
|
||||
;;;
|
||||
;;; SELECT dist_key, * FROM source JOIN pivot ON ...
|
||||
;;;
|
||||
(defun format-citus-join-clause (table distribution-rule)
|
||||
"Format a JOIN clause to backfill the distribution key data in tables that
|
||||
are referencing (even indirectly) the main distribution table."
|
||||
(with-output-to-string (s)
|
||||
(loop :for current-table := table :then rel
|
||||
:for rel :in (citus-distributed-rule-from distribution-rule)
|
||||
:do (let* ((fkey
|
||||
(find (ensure-unquoted (table-name rel))
|
||||
(table-fkey-list current-table)
|
||||
:test #'string=
|
||||
:key (lambda (fkey)
|
||||
(ensure-unquoted
|
||||
(table-name (fkey-foreign-table fkey))))))
|
||||
(ftable (fkey-foreign-table fkey)))
|
||||
(format s
|
||||
" JOIN ~s.~s"
|
||||
(schema-source-name (table-schema ftable))
|
||||
(table-source-name ftable))
|
||||
;;
|
||||
;; Skip the first column in the fkey definition, that's the
|
||||
;; distribution key that was just added by pgloader: we don't
|
||||
;; have it on the source database, we are going to create it on
|
||||
;; the target database.
|
||||
;;
|
||||
(loop :for first := t :then nil
|
||||
:for c :in (cdr (fkey-columns fkey))
|
||||
:for fc :in (cdr (fkey-foreign-columns fkey))
|
||||
:do (format s
|
||||
" ~:[AND~;ON~] ~a.~a = ~a.~a"
|
||||
first
|
||||
(table-source-name (fkey-table fkey))
|
||||
c
|
||||
(table-source-name (fkey-foreign-table fkey))
|
||||
fc))))))
|
||||
|
||||
(defun citus-format-sql-select (source-table target-table)
|
||||
"Return the SQL statement to use to fetch data from the COPY context,
|
||||
including backfilling the distribution key in related tables."
|
||||
|
||||
;;
|
||||
;; SELECT from.id, id, ... from source join from-table ...
|
||||
;;
|
||||
;; So we must be careful to prefix the column names with the
|
||||
;; proper table name, because of the join(s), and the first column
|
||||
;; in the output is taken from the main FROM table (the last one
|
||||
;; in the rule).
|
||||
;;
|
||||
(let* ((last-from-rule
|
||||
(car (last (citus-distributed-rule-from
|
||||
(table-citus-rule target-table)))))
|
||||
(cols
|
||||
(append (list
|
||||
(format nil "~a.~a"
|
||||
(table-name last-from-rule)
|
||||
(column-name (first (table-field-list source-table)))))
|
||||
(mapcar (lambda (field)
|
||||
(format nil "~a.~a"
|
||||
(table-name source-table)
|
||||
(column-name field)))
|
||||
(rest (table-field-list source-table)))))
|
||||
(joins
|
||||
(format-citus-join-clause source-table
|
||||
(table-citus-rule target-table))))
|
||||
(format nil
|
||||
"SELECT ~{~a::text~^, ~} FROM ~s.~s ~a"
|
||||
cols
|
||||
(schema-source-name (table-schema source-table))
|
||||
(table-source-name source-table)
|
||||
joins)))
|
||||
|
||||
;;;
|
||||
;;; Predicate to see if a table needs backfilling
|
||||
;;;
|
||||
(defun citus-backfill-table-p (table)
|
||||
"Returns non-nil when given TABLE should be backfilled with the
|
||||
distribution key."
|
||||
(and (table-citus-rule table)
|
||||
(typep (table-citus-rule table) 'citus-distributed-rule)
|
||||
(not (null (citus-distributed-rule-from (table-citus-rule table))))))
|
||||
@ -147,7 +147,8 @@
|
||||
(*summary-pathname* . ,*summary-pathname*)
|
||||
(*sections* . ',*sections*)))
|
||||
(kernel (lp:make-kernel 1 :bindings bindings))
|
||||
(lparallel:*kernel* kernel))
|
||||
(lparallel:*kernel* kernel)
|
||||
(lparallel:*task-category* :monitor))
|
||||
|
||||
;; make our kernel and channel visible from the outside
|
||||
(setf *monitoring-kernel* kernel
|
||||
@ -155,7 +156,8 @@
|
||||
*monitoring-queue* (lq:make-queue))
|
||||
|
||||
(lp:task-handler-bind
|
||||
((error
|
||||
(#+pgloader-image
|
||||
(error
|
||||
#'(lambda (c)
|
||||
;; we can't log-message a monitor thread error
|
||||
(lp:invoke-transfer-error
|
||||
@ -212,7 +214,8 @@
|
||||
(start
|
||||
(when (start-start-logger event)
|
||||
(pgloader.logs:start-logger))
|
||||
(cl-log:log-message :info "Starting monitor"))
|
||||
(cl-log:log-message :info "Starting monitor")
|
||||
(cl-log:log-message :log "pgloader version ~s" *version-string*))
|
||||
|
||||
(stop
|
||||
(cl-log:log-message :info "Stopping monitor")
|
||||
|
||||
@ -66,3 +66,22 @@
|
||||
(recompute-fs-and-retry ()
|
||||
(setf *fs* (walk-sources-and-build-fs))
|
||||
(sql url))))
|
||||
|
||||
(defun sql-url-for-variant (base filename &optional variant)
|
||||
"Build a SQL URL for given VARIANT"
|
||||
(flet ((sql-base-url (base filename)
|
||||
(format nil "/~a/~a" base filename)))
|
||||
(if variant
|
||||
(let ((sql-variant-url
|
||||
(format nil "/~a/~a/~a"
|
||||
base
|
||||
(string-downcase (typecase variant
|
||||
(symbol (symbol-name variant))
|
||||
(string variant)
|
||||
(t (princ-to-string variant))))
|
||||
filename)))
|
||||
(if (gethash sql-variant-url *fs*)
|
||||
sql-variant-url
|
||||
(sql-base-url base filename)))
|
||||
|
||||
(sql-base-url base filename))))
|
||||
|
||||
@ -7,7 +7,9 @@
|
||||
|
||||
(defun make-kernel (worker-count
|
||||
&key (bindings
|
||||
`((*monitoring-queue* . ,*monitoring-queue*)
|
||||
`((*print-circle* . ,*print-circle*)
|
||||
(*print-pretty* . ,*print-pretty*)
|
||||
(*monitoring-queue* . ,*monitoring-queue*)
|
||||
(*copy-batch-rows* . ,*copy-batch-rows*)
|
||||
(*copy-batch-size* . ,*copy-batch-size*)
|
||||
(*rows-per-range* . ,*rows-per-range*)
|
||||
@ -28,6 +30,10 @@
|
||||
;; bindings updates for libs
|
||||
;; CFFI is used by the SQLite lib
|
||||
(cffi:*default-foreign-encoding*
|
||||
. ,cffi:*default-foreign-encoding*))))
|
||||
. ,cffi:*default-foreign-encoding*)
|
||||
|
||||
;; CL+SSL can be picky about verifying certs
|
||||
(cl+ssl:*make-ssl-client-stream-verify-default*
|
||||
. ,cl+ssl:*make-ssl-client-stream-verify-default*))))
|
||||
"Wrapper around lparallel:make-kernel that sets our usual bindings."
|
||||
(lp:make-kernel worker-count :bindings bindings))
|
||||
|
||||
@ -53,7 +53,7 @@
|
||||
(string= "set" data-type))
|
||||
(let ((start-1 (position #\( column-type)) ; just before start position
|
||||
(end (position #\) column-type))) ; just before end position
|
||||
(when start-1
|
||||
(when (and start-1 (< (+ 1 start-1) end))
|
||||
(destructuring-bind (a &optional b)
|
||||
(mapcar #'parse-integer
|
||||
(sq:split-sequence #\, column-type
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
LOAD CSV
|
||||
FROM inline (a, b, c)
|
||||
INTO postgresql:///pgloader?allcols (a, b, c)
|
||||
INTO postgresql:///pgloader?allcols (a, b, c text using (subseq c 0))
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
|
||||
@ -8,7 +8,8 @@
|
||||
*/
|
||||
|
||||
LOAD ARCHIVE
|
||||
FROM http://pgsql.tapoueh.org/temp/foo.zip
|
||||
-- FROM http://pgsql.tapoueh.org/temp/foo.zip
|
||||
FROM http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip
|
||||
INTO postgresql:///ip4r
|
||||
|
||||
BEFORE LOAD
|
||||
|
||||
1
test/citus/.gitignore
vendored
Normal file
1
test/citus/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.csv
|
||||
20
test/citus/Makefile
Normal file
20
test/citus/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
DATASET = companies campaigns ads clicks impressions geo_ips
|
||||
CSV = $(addsuffix .csv,$(DATASET))
|
||||
DROP = DROP TABLE IF EXISTS companies, campaigns, ads, clicks, impressions, geo_ips
|
||||
|
||||
all: schema data ;
|
||||
|
||||
schema:
|
||||
psql --single-transaction -c "$(DROP)" -d hackathon
|
||||
psql --single-transaction -f company.sql -d hackathon
|
||||
|
||||
data: fetch
|
||||
psql -f copy.sql -d hackathon
|
||||
../../build/bin/pgloader ./data.load
|
||||
|
||||
fetch: $(CSV) ;
|
||||
|
||||
%.csv:
|
||||
curl -O https://examples.citusdata.com/mt_ref_arch/$@
|
||||
|
||||
.PHONY: schema data fetch
|
||||
42
test/citus/README.md
Normal file
42
test/citus/README.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Citus Multi-Tenant Automatic Distribution
|
||||
|
||||
In this test case we follow the following documentation:
|
||||
|
||||
https://docs.citusdata.com/en/v7.5/use_cases/multi_tenant.html
|
||||
|
||||
We install the schema before Citus migration, and load the data without the
|
||||
backfilling that is already done. For that we use pgloader to ignore the
|
||||
company_id column in the tables that didn't have this column prior to the
|
||||
Citus migration effort.
|
||||
|
||||
Then the following `company.load` file contains the pgloader command that
|
||||
runs a full migration from PostgreSQL to Citus:
|
||||
|
||||
```
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
distribute companies using id;
|
||||
```
|
||||
|
||||
Tables are marked distributed, the company_id column is added where it's
|
||||
needed, primary keys and foreign keys definitions are altered to the new
|
||||
model, and finally the data is backfilled automatically in the target table
|
||||
thanks to generating queries like the following:
|
||||
|
||||
~~~
|
||||
SELECT "campaigns".company_id::text,
|
||||
"impressions".id::text,
|
||||
"impressions".ad_id::text,
|
||||
"impressions".seen_at::text,
|
||||
"impressions".site_url::text,
|
||||
"impressions".cost_per_impression_usd::text,
|
||||
"impressions".user_ip::text,
|
||||
"impressions".user_data::text
|
||||
FROM "public"."impressions"
|
||||
JOIN "public"."ads" ON impressions.ad_id = ads.id
|
||||
JOIN "public"."campaigns" ON ads.campaign_id = campaigns.id
|
||||
~~~
|
||||
14
test/citus/company.load
Normal file
14
test/citus/company.load
Normal file
@ -0,0 +1,14 @@
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
cast column impressions.seen_at to "timestamp with time zone"
|
||||
|
||||
distribute companies using id
|
||||
-- distribute campaigns using company_id
|
||||
-- distribute ads using company_id from campaigns
|
||||
-- distribute clicks using company_id from ads, campaigns
|
||||
-- distribute impressions using company_id from ads, campaigns
|
||||
;
|
||||
51
test/citus/company.sql
vendored
Normal file
51
test/citus/company.sql
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
CREATE TABLE companies (
|
||||
id bigserial PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
image_url text,
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE campaigns (
|
||||
id bigserial PRIMARY KEY,
|
||||
company_id bigint REFERENCES companies (id),
|
||||
name text NOT NULL,
|
||||
cost_model text NOT NULL,
|
||||
state text NOT NULL,
|
||||
monthly_budget bigint,
|
||||
blacklisted_site_urls text[],
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE ads (
|
||||
id bigserial PRIMARY KEY,
|
||||
campaign_id bigint REFERENCES campaigns (id),
|
||||
name text NOT NULL,
|
||||
image_url text,
|
||||
target_url text,
|
||||
impressions_count bigint DEFAULT 0,
|
||||
clicks_count bigint DEFAULT 0,
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE clicks (
|
||||
id bigserial PRIMARY KEY,
|
||||
ad_id bigint REFERENCES ads (id),
|
||||
clicked_at timestamp without time zone NOT NULL,
|
||||
site_url text NOT NULL,
|
||||
cost_per_click_usd numeric(20,10),
|
||||
user_ip inet NOT NULL,
|
||||
user_data jsonb NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE impressions (
|
||||
id bigserial PRIMARY KEY,
|
||||
ad_id bigint REFERENCES ads (id),
|
||||
seen_at timestamp without time zone NOT NULL,
|
||||
site_url text NOT NULL,
|
||||
cost_per_impression_usd numeric(20,10),
|
||||
user_ip inet NOT NULL,
|
||||
user_data jsonb NOT NULL
|
||||
);
|
||||
5
test/citus/copy.sql
vendored
Normal file
5
test/citus/copy.sql
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
\copy companies from 'companies.csv' with csv
|
||||
\copy campaigns from 'campaigns.csv' with csv
|
||||
-- \copy ads from 'ads.csv' with csv
|
||||
-- \copy clicks from 'clicks.csv' with csv
|
||||
-- \copy impressions from 'impressions.csv' with csv
|
||||
68
test/citus/data.load
Normal file
68
test/citus/data.load
Normal file
@ -0,0 +1,68 @@
|
||||
--
|
||||
-- Ads
|
||||
--
|
||||
load csv
|
||||
from ads.csv
|
||||
(
|
||||
id, company_id, campaign_id, name, image_url, target_url,
|
||||
impressions_count, clicks_count, created_at, updated_at
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table ads
|
||||
target columns
|
||||
(
|
||||
id, campaign_id, name, image_url, target_url,
|
||||
impressions_count, clicks_count, created_at, updated_at
|
||||
)
|
||||
|
||||
with fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
||||
|
||||
--
|
||||
-- Clicks
|
||||
--
|
||||
load csv
|
||||
from clicks.csv
|
||||
(
|
||||
id, company_id, ad_id, clicked_at, site_url, cost_per_click_usd,
|
||||
user_ip, user_data
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table clicks
|
||||
target columns
|
||||
(
|
||||
id, ad_id, clicked_at, site_url, cost_per_click_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
with fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
||||
|
||||
|
||||
--
|
||||
-- Impressions
|
||||
--
|
||||
load csv
|
||||
from impressions.csv
|
||||
(
|
||||
id, company_id, ad_id, seen_at, site_url,
|
||||
cost_per_impression_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table impressions
|
||||
target columns
|
||||
(
|
||||
id, ad_id, seen_at, site_url, cost_per_impression_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
with drop indexes,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
||||
@ -15,11 +15,11 @@ LOAD CSV
|
||||
"repl$grpid" text,
|
||||
"repl$id" text,
|
||||
another text,
|
||||
fields text
|
||||
fields integer
|
||||
)
|
||||
$$;
|
||||
|
||||
|
||||
somefields,rekplcode,repl$grpid,repl$id,another,fields
|
||||
a,b,c,d,e,f
|
||||
foo,bar,baz,quux,foobar,fizzbuzz
|
||||
somefields,rekplcode,repl$grpid,repl$id,fields,another
|
||||
a,b,c,d,1,e
|
||||
foo,bar,baz,quux,2,foobar
|
||||
|
||||
22
test/csv-null-if.load
Normal file
22
test/csv-null-if.load
Normal file
@ -0,0 +1,22 @@
|
||||
LOAD CSV
|
||||
FROM INLINE (id, number, data)
|
||||
INTO postgresql:///pgloader?nullif
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists nullif; $$,
|
||||
$$ CREATE TABLE nullif
|
||||
(
|
||||
id serial primary key,
|
||||
number integer,
|
||||
data text
|
||||
);
|
||||
$$
|
||||
|
||||
WITH null if '\N',
|
||||
fields terminated by ',',
|
||||
fields enclosed by '"',
|
||||
fields escaped by backslash-quote;
|
||||
|
||||
|
||||
"1",\N,"testing nulls"
|
||||
"2","2","another test"
|
||||
@ -28,3 +28,4 @@ LOAD CSV
|
||||
1,10-02-1999 00-33-12.123456,"00:05.02"
|
||||
2,10-02-2014 00-33-13.123,"18:25.52"
|
||||
3,10-02-2014 00-33-14.1234,13:14.15
|
||||
4,10-09-2018 19-24-59,19:24.59
|
||||
|
||||
@ -4,7 +4,7 @@ LOAD DATABASE
|
||||
|
||||
WITH data only, truncate, create no tables
|
||||
|
||||
MATERIALIZE VIEWS proceed
|
||||
MATERIALIZE VIEWS proceed, foo as $$ select 1 as a; $$
|
||||
|
||||
INCLUDING ONLY TABLE NAMES MATCHING 'proceed'
|
||||
|
||||
@ -13,5 +13,6 @@ LOAD DATABASE
|
||||
$$ drop schema if exists db789 cascade; $$,
|
||||
$$ create schema db789; $$,
|
||||
$$ create table db789.refrain (id char(1) primary key); $$,
|
||||
$$ create table db789.proceed (id char(1) primary key); $$;
|
||||
$$ create table db789.proceed (id char(1) primary key); $$,
|
||||
$$ create table db789.foo (a integer primary key); $$;
|
||||
|
||||
|
||||
19
test/mysql/f1db-citus.load
Normal file
19
test/mysql/f1db-citus.load
Normal file
@ -0,0 +1,19 @@
|
||||
load database
|
||||
from mysql://root@localhost/f1db?useSSL=false
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with reset no sequences
|
||||
|
||||
distribute f1db.circuits as reference table
|
||||
distribute f1db.constructorResults using raceId
|
||||
distribute f1db.constructors as reference table
|
||||
distribute f1db.constructorStandings using raceId
|
||||
distribute f1db.drivers as reference table
|
||||
distribute f1db.driverStandings using raceId
|
||||
distribute f1db.lapTimes using raceId
|
||||
distribute f1db.pitStops using raceId
|
||||
distribute f1db.qualifying using raceId
|
||||
distribute f1db.races as reference table
|
||||
distribute f1db.results using raceId
|
||||
distribute f1db.seasons as reference table
|
||||
distribute f1db.status as reference table;
|
||||
@ -9,6 +9,7 @@ load database
|
||||
quote identifiers
|
||||
|
||||
ALTER SCHEMA 'pgloader' RENAME TO 'mysql'
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
|
||||
|
||||
CAST column utilisateurs__Yvelines2013-06-28.sexe
|
||||
to text drop not null using empty-string-to-null,
|
||||
|
||||
18
test/mysql/my.sql
vendored
18
test/mysql/my.sql
vendored
@ -100,6 +100,24 @@ create table bits
|
||||
|
||||
insert into bits(bool) values(0b00), (0b01);
|
||||
|
||||
/*
|
||||
* https://github.com/dimitri/pgloader/issues/811
|
||||
*/
|
||||
CREATE TABLE `domain_filter` (
|
||||
`id` binary(16) NOT NULL ,
|
||||
`type` varchar(50) NOT NULL ,
|
||||
`value` json DEFAULT NULL ,
|
||||
`negated` tinyint(1) NOT NULL DEFAULT '0' ,
|
||||
`report_id` varbinary(255) NOT NULL ,
|
||||
`query_id` varchar(255) NOT NULL ,
|
||||
`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ,
|
||||
`updated_at` datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP ,
|
||||
`updated_by` varbinary(255) DEFAULT NULL ,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `domain_filter_unq` (`report_id`,`query_id`,`type`),
|
||||
KEY `domain_filter` (`type`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=ascii;
|
||||
|
||||
/*
|
||||
* https://github.com/dimitri/pgloader/issues/703
|
||||
*/
|
||||
|
||||
14
test/pgsql-source.load
Normal file
14
test/pgsql-source.load
Normal file
@ -0,0 +1,14 @@
|
||||
load database
|
||||
from pgsql://localhost/pgloader
|
||||
into pgsql://localhost/copy
|
||||
|
||||
-- including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
|
||||
including only table names matching ~/geolocations/ in schema 'public'
|
||||
|
||||
materialize views some_usps
|
||||
as $$
|
||||
select usps, geoid, aland, awater, aland_sqmi, awater_sqmi, location
|
||||
from districts
|
||||
where usps in ('MT', 'DE', 'AK', 'WY', 'PR', 'VT', 'SD', 'DC', 'ND');
|
||||
$$
|
||||
;
|
||||
@ -1,2 +1,2 @@
|
||||
a b c d e f
|
||||
foo bar baz quux foobar fizzbuzz
|
||||
a b c d e 1
|
||||
foo bar baz quux foobar 2
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
1 1999-10-02 00:33:12.123456+02 00:05:02
|
||||
2 2014-10-02 00:33:13.123+02 18:25:52
|
||||
3 2014-10-02 00:33:14.1234+02 13:14:15
|
||||
4 2018-10-09 19:24:59+02 19:24:59
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user