Merge branch 'master' into debian

This commit is contained in:
Christoph Berg 2019-01-21 16:09:05 +01:00
commit de38a4473a
99 changed files with 3637 additions and 392 deletions

View File

@ -1,3 +1,5 @@
.git
.vagrant
build
Dockerfile
Dockerfile.ccl

View File

@ -1,20 +1,47 @@
FROM debian:stretch
MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
FROM debian:stable-slim as builder
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget curl make git bzip2 time \
ca-certificates \
libzip-dev libssl1.1 openssl \
patch unzip libsqlite3-dev gawk \
freetds-dev sbcl && \
rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bzip2 \
ca-certificates \
curl \
freetds-dev \
gawk \
git \
libsqlite3-dev \
libssl1.1 \
libzip-dev \
make \
openssl \
patch \
sbcl \
time \
unzip \
wget \
cl-ironclad \
cl-babel \
&& rm -rf /var/lib/apt/lists/*
ADD ./ /opt/src/pgloader
WORKDIR /opt/src/pgloader
COPY ./ /opt/src/pgloader
# build/ is in the .dockerignore file, but we actually need it now
RUN mkdir -p build/bin
RUN make
RUN mkdir -p /opt/src/pgloader/build/bin \
&& cd /opt/src/pgloader \
&& make
RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
FROM debian:stable-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
freetds-dev \
gawk \
libsqlite3-dev \
libzip-dev \
make \
sbcl \
unzip \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"

View File

@ -1,25 +1,51 @@
FROM debian:stretch
MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
FROM debian:stable-slim as builder
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget curl make git bzip2 time \
ca-certificates \
libzip-dev libssl1.1 openssl \
patch unzip libsqlite3-dev gawk \
freetds-dev sbcl && \
rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bzip2 \
ca-certificates \
curl \
freetds-dev \
gawk \
git \
libsqlite3-dev \
libssl1.1 \
libzip-dev \
make \
openssl \
patch \
sbcl \
time \
unzip \
wget \
cl-ironclad \
cl-babel \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/src
RUN curl --location -O https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz
RUN tar xf ccl-1.11.5-linuxx86.tar.gz
RUN cp /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \
| tar xz -C /usr/local/src/ \
&& mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
ADD ./ /opt/src/pgloader
WORKDIR /opt/src/pgloader
COPY ./ /opt/src/pgloader
# build/ is in the .dockerignore file, but we actually need it now
RUN mkdir -p build/bin
RUN make CL=ccl DYNSIZE=256
RUN mkdir -p /opt/src/pgloader/build/bin \
&& cd /opt/src/pgloader \
&& make CL=ccl DYNSIZE=256
RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
FROM debian:stable-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
freetds-dev \
gawk \
libsqlite3-dev \
libzip-dev \
make \
sbcl \
unzip \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"

View File

@ -1,6 +1,6 @@
# pgloader build tool
APP_NAME = pgloader
VERSION = 3.5.2
VERSION = 3.6.1
# use either sbcl or ccl
CL = sbcl
@ -24,7 +24,7 @@ QLDIR = $(BUILDDIR)/quicklisp
MANIFEST = $(BUILDDIR)/manifest.ql
LATEST = $(BUILDDIR)/pgloader-latest.tgz
BUNDLEDIST = 2018-04-30
BUNDLEDIST = 2019-01-07
BUNDLENAME = pgloader-bundle-$(VERSION)
BUNDLEDIR = $(BUILDDIR)/bundle/$(BUNDLENAME)
BUNDLE = $(BUILDDIR)/$(BUNDLENAME).tgz
@ -99,8 +99,11 @@ clones: $(QLDIR)/local-projects/cl-ixf \
$(QLDIR)/local-projects/cl-csv \
$(QLDIR)/local-projects/qmynd ;
$(LIBS): $(QLDIR)/setup.lisp clones
$(LIBS): $(QLDIR)/setup.lisp
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
--eval '(push :pgloader-image *features*)' \
--eval '(setf *print-circle* t *print-pretty* t)' \
--eval '(ql:quickload "pgloader")' \
--eval '(push "$(PWD)/" ql:*local-project-directories*)' \
--eval '(ql:quickload "pgloader")' \
--eval '(quit)'
@ -141,8 +144,11 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
--manifest-file $(MANIFEST) \
--asdf-tree $(QLDIR)/dists \
--asdf-path . \
--load-system $(APP_NAME) \
--load-system cffi \
--load-system cl+ssl \
--load-system mssql \
--load src/hooks.lisp \
--load-system $(APP_NAME) \
--entry pgloader:main \
--dynamic-space-size $(DYNSIZE) \
$(COMPRESS_CORE_OPT) \
@ -164,6 +170,12 @@ pgloader-standalone:
test: $(PGLOADER)
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
save: ./src/save.lisp $(LISP_SRC)
$(CL) $(CL_OPTS) --load ./src/save.lisp
check-saved: save
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
clean-bundle:
rm -rf $(BUNDLEDIR)
rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
@ -179,8 +191,12 @@ $(BUNDLEDIR):
--eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \
--load bundle/ql.lisp
$(BUNDLE): $(BUNDLEDIR)
$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR)
echo "\"$(VERSION)\"" > $@
$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp
cp bundle/README.md $(BUNDLEDIR)
cp bundle/save.lisp $(BUNDLEDIR)
sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile
git archive --format=tar --prefix=pgloader-$(VERSION)/ master \
| tar -C $(BUNDLEDIR)/local-projects/ -xf -

View File

@ -117,6 +117,16 @@ pgloader:
<https://github.com/dimitri/pgloader/issues?utf8=✓&q=label%3A%22Windows%20support%22%20>
### Building Docker image from sources
You can build a Docker image from source using SBCL by default:
$ docker build .
Or Clozure CL (CCL):
$ docker build -f Dockerfile.ccl .
## More options when building from source
The `Makefile` target `pgloader` knows how to produce a Self Contained

View File

@ -48,9 +48,12 @@ $(PGLOADER): $(BUILDAPP)
$(BUILDAPP_OPTS) \
--sbcl $(CL) \
--asdf-tree . \
--load-system cffi \
--load-system cl+ssl \
--load-system mssql \
--load $(SRCDIR)/src/hooks.lisp \
--load-system $(APP_NAME) \
--eval '(setf pgloader.params::*version-string* "$(VERSION)")' \
--load $(SRCDIR)/src/hooks.lisp \
--entry pgloader:main \
--dynamic-space-size $(DYNSIZE) \
$(COMPRESS_CORE_OPT) \
@ -61,4 +64,7 @@ $(PGLOADER): $(BUILDAPP)
test: $(PGLOADER)
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress
save:
sbcl --no-userinit --load ./save.lisp
check: test ;

47
bundle/save.lisp Normal file
View File

@ -0,0 +1,47 @@
;;;
;;; Create a build/bin/pgloader executable from the source code, using
;;; Quicklisp to load pgloader and its dependencies.
;;;
(in-package #:cl-user)
(require :asdf) ; should work in SBCL and CCL
(let* ((cwd (uiop:getcwd))
(bundle.lisp (uiop:merge-pathnames* "bundle.lisp" cwd))
(version-file (uiop:merge-pathnames* "version.sexp" cwd))
(version-string (uiop:read-file-form version-file))
(asdf:*central-registry* (list cwd)))
(format t "Loading bundle.lisp~%")
(load bundle.lisp)
(format t "Loading system pgloader ~a~%" version-string)
(asdf:load-system :pgloader :verbose nil)
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))
(let* ((pgl (find-package "PGLOADER"))
(version-symbol (find-symbol "*VERSION-STRING*" pgl)))
(setf (symbol-value version-symbol) version-string)))
(defun pgloader-image-main ()
(let ((argv #+sbcl sb-ext:*posix-argv*
#+ccl ccl:*command-line-argument-list*))
(pgloader::main argv)))
(let* ((cwd (uiop:getcwd))
(bin-dir (uiop:merge-pathnames* "bin/" cwd))
(bin-filename (uiop:merge-pathnames* "pgloader" bin-dir)))
(ensure-directories-exist bin-dir)
#+ccl
(ccl:save-application bin-filename
:toplevel-function #'cl-user::pgloader-image-main
:prepend-kernel t)
#+sbcl
(sb-ext:save-lisp-and-die bin-filename
:toplevel #'cl-user::pgloader-image-main
:executable t
:save-runtime-options t
:compression t))

View File

@ -83,7 +83,8 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
#html_theme = 'alabaster'
html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the

View File

@ -6,11 +6,242 @@
Welcome to pgloader's documentation!
====================================
pgloader loads data from various sources into PostgreSQL. It can transform
the data it reads on the fly and submit raw SQL before and after the
loading. It uses the `COPY` PostgreSQL protocol to stream the data into the
server, and manages errors by filling a pair of *reject.dat* and
*reject.log* files.
Thanks to being able to load data directly from a database source, pgloader
also supports from migrations from other productions to PostgreSQL. In this
mode of operations, pgloader handles both the schema and data parts of the
migration, in a single unmanned command, allowing to implement **Continuous
Migration**.
Features Overview
=================
pgloader has two modes of operation: loading from files, migrating
databases. In both cases, pgloader uses the PostgreSQL COPY protocol which
implements a **streaming** to send data in a very efficient way.
Loading file content in PostgreSQL
----------------------------------
When loading from files, pgloader implements the following features:
Many source formats supported
Support for a wide variety of file based formats are included in
pgloader: the CSV family, fixed columns formats, dBase files (``db3``),
and IBM IXF files.
The SQLite database engine is accounted for in the next section:
pgloader considers SQLite as a database source and implements schema
discovery from SQLite catalogs.
On the fly data transformation
Often enough the data as read from a CSV file (or another format) needs
some tweaking and clean-up before being sent to PostgreSQL.
For instance in the `geolite
<https://github.com/dimitri/pgloader/blob/master/test/archive.load>`_
example we can see that integer values are being rewritten as IP address
ranges, allowing to target an ``ip4r`` column directly.
Full Field projections
pgloader supports loading data into less fields than found on file, or
more, doing some computation on the data read before sending it to
PostgreSQL.
Reading files from an archive
Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the
archive is extracted in a temporary directly and expanded files are then
loaded.
HTTP(S) support
pgloader knows how to download a source file or a source archive using
HTTP directly. It might be better to use ``curl -O- http://... |
pgloader`` and read the data from *standard input*, then allowing for
streaming of the data from its source down to PostgreSQL.
Target schema discovery
When loading in an existing table, pgloader takes into account the
existing columns and may automatically guess the CSV format for you.
On error stop / On error resume next
In some cases the source data is so damaged as to be impossible to
migrate in full, and when loading from a file then the default for
pgloader is to use ``on error resume next`` option, where the rows
rejected by PostgreSQL are saved away and the migration continues with
the other rows.
In other cases loading only a part of the input data might not be a
great idea, and in such cases it's possible to use the ``on error stop``
option.
Pre/Post SQL commands
This feature allows pgloader commands to include SQL commands to run
before and after loading a file. It might be about creating a table
first, then loading the data into it, and then doing more processing
on-top of the data (implementing an *ELT* pipeline then), or creating
specific indexes as soon as the data has been made ready.
One-command migration to PostgreSQL
-----------------------------------
When migrating a full database in a single command, pgloader implements the
following features:
One-command migration
The whole migration is started with a single command line and then runs
unattended. pgloader is meant to be integrated in a fully automated
tooling that you can repeat as many times as needed.
Schema discovery
The source database is introspected using its SQL catalogs to get the
list of tables, attributes (with data types, default values, not null
constraints, etc), primary key constraints, foreign key constraints,
indexes, comments, etc. This feeds an internal database catalog of all
the objects to migrate from the source database to the target database.
User defined casting rules
Some source database have ideas about their data types that might not be
compatible with PostgreSQL implementaion of equivalent data types.
For instance, SQLite since version 3 has a `Dynamic Type System
<https://www.sqlite.org/datatype3.html>`_ which of course isn't
compatible with the idea of a `Relation
<https://en.wikipedia.org/wiki/Relation_(database)>`_. Or MySQL accepts
datetime for year zero, which doesn't exists in our calendar, and
doesn't have a boolean data type.
When migrating from another source database technology to PostgreSQL,
data type casting choices must be made. pgloader implements solid
defaults that you can rely upon, and a facility for **user defined data
type casting rules** for specific cases. The idea is to allow users to
specify the how the migration should be done, in order for it to be
repeatable and included in a *Continuous Migration* process.
On the fly data transformations
The user defined casting rules come with on the fly rewrite of the data.
For instance zero dates (it's not just the year, MySQL accepts
``0000-00-00`` as a valid datetime) are rewritten to NULL values by
default.
Partial Migrations
It is possible to include only a partial list of the source database
tables in the migration, or to exclude some of the tables on the source
database.
Schema only, Data only
This is the **ORM compatibility** feature of pgloader, where it is
possible to create the schema using your ORM and then have pgloader
migrate the data targeting this already created schema.
When doing this, it is possible for pgloader to *reindex* the target
schema: before loading the data from the source database into PostgreSQL
using COPY, pgloader DROPs the indexes and constraints, and reinstalls
the exact same definitions of them once the data has been loaded.
The reason for operating that way is of course data load performance.
Repeatable (DROP+CREATE)
By default, pgloader issues DROP statements in the target PostgreSQL
database before issing any CREATE statement, so that you can repeat the
migration as many times as necessary until migration specifications and
rules are bug free.
The schedule the data migration to run every night (or even more often!)
for the whole duration of the code migration project. See the
`Continuous Migration <https://pgloader.io/blog/continuous-migration/>`_
methodology for more details about the approach.
On error stop / On error resume next The default behavior of pgloader when
migrating from a database is ``on error stop``. The idea is to let the
user fix either the migration specifications or the source data, and run
the process again, until it works.
In some cases the source data is so damaged as to be impossible to
migrate in full, and it might be necessary to then resort to the ``on
error resume next`` option, where the rows rejected by PostgreSQL are
saved away and the migration continues with the other rows.
Pre/Post SQL commands, Post-Schema SQL commands
While pgloader takes care of rewriting the schema to PostgreSQL
expectations, and even provides *user-defined data type casting rules*
support to that end, sometimes it is necessary to add some specific SQL
commands around the migration. It's of course supported right from
pgloader itself, without having to script around it.
Online ALTER schema
At times migrating to PostgreSQL is also a good opportunity to review
and fix bad decisions that were made in the past, or simply that are not
relevant to PostgreSQL.
The pgloader command syntax allows to ALTER pgloader's internal
representation of the target catalogs so that the target schema can be
created a little different from the source one. Changes supported
include target a different *schema* or *table* name.
Materialized Views, or schema rewrite on-the-fly
In some cases the schema rewriting goes deeper than just renaming the
SQL objects to being a full normalization exercise. Because PostgreSQL
is great at running a normalized schema in production under most
workloads.
pgloader implements full flexibility in on-the-fly schema rewriting, by
making it possible to migrate from a view definition. The view attribute
list becomes a table definition in PostgreSQL, and the data is fetched
by querying the view on the source system.
A SQL view allows to implement both content filtering at the column
level using the SELECT projection clause, and at the row level using the
WHERE restriction clause. And backfilling from reference tables thanks
to JOINs.
Distribute to Citus
When migrating from PostgreSQL to Citus, a important part of the process
consists of adjusting the schema to the distribution key. Read
`Preparing Tables and Ingesting Data
<https://docs.citusdata.com/en/v8.0/use_cases/multi_tenant.html>`_ in
the Citus documentation for a complete example showing how to do that.
When using pgloader it's possible to specify the distribution keys and
reference tables and let pgloader take care of adjusting the table,
indexes, primary keys and foreign key definitions all by itself.
Encoding Overrides
MySQL doesn't actually enforce the encoding of the data in the database
to match the encoding known in the metadata, defined at the database,
table, or attribute level. Sometimes, it's necessary to override the
metadata in order to make sense of the text, and pgloader makes it easy
to do so.
Continuous Migration
--------------------
pgloader is meant to migrate a whole database in a single command line and
without any manual intervention. The goal is to be able to setup a
*Continuous Integration* environment as described in the `Project
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
1. Setup your target PostgreSQL Architecture
2. Fork a Continuous Integration environment that uses PostgreSQL
3. Migrate the data over and over again every night, from production
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
5. Migrate without suprise and enjoy!
In order to be able to follow this great methodology, you need tooling to
implement the third step in a fully automated way. That's pgloader.
.. toctree::
:maxdepth: 2
:caption: Table Of Contents:
intro
quickstart
tutorial/tutorial
pgloader
ref/csv
@ -22,6 +253,9 @@ Welcome to pgloader's documentation!
ref/mysql
ref/sqlite
ref/mssql
ref/pgsql
ref/pgsql-citus-target
ref/pgsql-redshift
ref/transforms
bugreport

View File

@ -10,13 +10,24 @@ the data into the server, and manages errors by filling a pair of
pgloader knows how to read data from different kind of sources:
* Files
* CSV
* Fixed Format
* DBF
* Databases
* SQLite
* MySQL
* MS SQL Server
* PostgreSQL
* Redshift
pgloader knows how to target different products using the PostgresQL Protocol:
* PostgreSQL
* `Citus <https://www.citusdata.com>`_
* Redshift
The level of automation provided by pgloader depends on the data source
type. In the case of CSV and Fixed Format files, a full description of the
@ -24,23 +35,47 @@ expected input properties must be given to pgloader. In the case of a
database, pgloader connects to the live service and knows how to fetch the
metadata it needs directly from it.
Continuous Migration
--------------------
Features Matrix
---------------
pgloader is meant to migrate a whole database in a single command line and
without any manual intervention. The goal is to be able to setup a
*Continuous Integration* environment as described in the `Project
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
Here's a comparison of the features supported depending on the source
database engine. Some features that are not supported can be added to
pgloader, it's just that nobody had the need to do so yet. Those features
are marked with ✗. Empty cells are used when the feature doesn't make sense
for the selected source database.
1. Setup your target PostgreSQL Architecture
2. Fork a Continuous Integration environment that uses PostgreSQL
3. Migrate the data over and over again every night, from production
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
5. Migrate without suprise and enjoy!
========================== ======= ====== ====== =========== =========
Feature SQLite MySQL MS SQL PostgreSQL Redshift
========================== ======= ====== ====== =========== =========
One-command migration ✓ ✓ ✓ ✓ ✓
Continuous Migration ✓ ✓ ✓ ✓ ✓
Schema discovery ✓ ✓ ✓ ✓ ✓
Partial Migrations ✓ ✓ ✓ ✓ ✓
Schema only ✓ ✓ ✓ ✓ ✓
Data only ✓ ✓ ✓ ✓ ✓
Repeatable (DROP+CREATE) ✓ ✓ ✓ ✓ ✓
User defined casting rules ✓ ✓ ✓ ✓ ✓
Encoding Overrides ✓
On error stop ✓ ✓ ✓ ✓ ✓
On error resume next ✓ ✓ ✓ ✓ ✓
Pre/Post SQL commands ✓ ✓ ✓ ✓ ✓
Post-Schema SQL commands ✗ ✓ ✓ ✓ ✓
Primary key support ✓ ✓ ✓ ✓ ✓
Foreign key support ✓ ✓ ✓ ✓
Online ALTER schema ✓ ✓ ✓ ✓ ✓
Materialized views ✗ ✓ ✓ ✓ ✓
Distribute to Citus ✗ ✓ ✓ ✓ ✓
========================== ======= ====== ====== =========== =========
For more details about what the features are about, see the specific
reference pages for your database source.
For some of the features, missing support only means that the feature is not
needed for the other sources, such as the capability to override MySQL
encoding metadata about a table or a column. Only MySQL in this list is left
completely unable to guarantee text encoding. Or Redshift not having foreign
keys.
In order to be able to follow this great methodology, you need tooling to
implement the third step in a fully automated way. That's pgloader.
Commands
--------

View File

@ -154,6 +154,18 @@ Those options are meant to tweak `pgloader` behavior when loading data.
machine code) another version of itself, usually a newer one like a very
recent git checkout.
* `--no-ssl-cert-verification`
Uses the OpenSSL option to accept a locally issued server-side
certificate, avoiding the following error message::
SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY
The right way to fix the SSL issue is to use a trusted certificate, of
course. Sometimes though it's useful to make progress with the pgloader
setup while the certificate chain of trust is being fixed, maybe by
another team. That's when this option is useful.
Command Line Only Operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -552,6 +564,22 @@ queries from a SQL file. Implements support for PostgreSQL dollar-quoting
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
they are the same thing).
AFTER CREATE SCHEMA DO
^^^^^^^^^^^^^^^^^^^^^^
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
section are executed once the schema has been craeted by pgloader, and
before the data is loaded. It's the right time to ALTER TABLE or do some
custom implementation on-top of what pgloader does, like maybe partitioning.
AFTER CREATE SCHEMA EXECUTE
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read
the SQL queries from a SQL file. Implements support for PostgreSQL
dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch
mode (where they are the same thing).
Connection String
^^^^^^^^^^^^^^^^^

View File

@ -1,10 +1,10 @@
PgLoader Quick Start
--------------------
Pgloader Quick Start
====================
In simple cases, pgloader is very easy to use.
CSV
^^^
---
Load data from a CSV file into a pre-existing table in your database::
@ -26,7 +26,7 @@ For documentation about the available syntaxes for the `--field` and
Note also that the PostgreSQL URI includes the target *tablename*.
Reading from STDIN
^^^^^^^^^^^^^^^^^^
------------------
File based pgloader sources can be loaded from the standard input, as in the
following example::
@ -46,7 +46,7 @@ pgloader with this technique, using the Unix pipe::
gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo
Loading from CSV available through HTTP
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
---------------------------------------
The same command as just above can also be run if the CSV file happens to be
found on a remote HTTP location::
@ -84,7 +84,7 @@ Also notice that the same command will work against an archived version of
the same data.
Streaming CSV data from an HTTP compressed file
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-----------------------------------------------
Finally, it's important to note that pgloader first fetches the content from
the HTTP URL it to a local file, then expand the archive when it's
@ -110,7 +110,7 @@ and the commands and pgloader will take care of streaming the data down to
PostgreSQL.
Migrating from SQLite
^^^^^^^^^^^^^^^^^^^^^
---------------------
The following command will open the SQLite database, discover its tables
definitions including indexes and foreign keys, migrate those definitions
@ -121,7 +121,7 @@ and then migrate the data over::
pgloader ./test/sqlite/sqlite.db postgresql:///newdb
Migrating from MySQL
^^^^^^^^^^^^^^^^^^^^
--------------------
Just create a database where to host the MySQL data and definitions and have
pgloader do the migration for you in a single command line::
@ -130,7 +130,7 @@ pgloader do the migration for you in a single command line::
pgloader mysql://user@localhost/sakila postgresql:///pagila
Fetching an archived DBF file from a HTTP remote location
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
---------------------------------------------------------
It's possible for pgloader to download a file from HTTP, unarchive it, and
only then open it to discover the schema then load the data::

View File

@ -31,7 +31,7 @@ MS SQL Database Migration Options: WITH
---------------------------------------
When loading from a `MS SQL` database, the same options as when loading a
`MySQL` database are supported. Please refer to the MySQL section. The
`MS SQL` database are supported. Please refer to the MS SQL section. The
following options are added:
- *create schemas*
@ -53,7 +53,39 @@ CAST
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
Please refer to the MySQL CAST clause for details.
Please refer to the MS SQL CAST clause for details.
MS SQL Views Support
--------------------
MS SQL views support allows pgloader to migrate view as if they were base
tables. This feature then allows for on-the-fly transformation from MS SQL
to PostgreSQL, as the view definition is used rather than the base data.
MATERIALIZE VIEWS
^^^^^^^^^^^^^^^^^
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will query
the data. It's not possible to just allow for plain `SQL` because we want to
know a lot about the exact data types of each column involved in the query
output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the following
expression::
*name* `AS` `$$` *sql query* `$$`
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
the beginning of the data loading, and the resulting view will then be
dropped at the end of the data loading.
MATERIALIZE ALL VIEWS
^^^^^^^^^^^^^^^^^^^^^
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
returned by MS SQL rather than asking the user to specify the list.
MS SQL Partial Migration
------------------------
@ -96,9 +128,35 @@ schema 'public' in the target database with this command::
ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
See the MySQL explanation for this clause above. It works the same in the
context of migrating from MS SQL, only with the added option to specify the
name of the schema where to find the definition of the target tables.
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
IN SCHEMA 'dbo'
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
(regexp or string).
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
the level of the pgloader in-memory representation of your source database
schema. In case of a name change, the mapping is kept and reused in the
*foreign key* and *index* support.
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
The matching is done in pgloader itself, with a Common Lisp regular
expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,

View File

@ -1,10 +1,9 @@
Migrating a MySQL Database to PostgreSQL
========================================
This command instructs pgloader to load data from a database connection. The
only supported database source is currently *MySQL*, and pgloader supports
dynamically converting the schema of the source database and the indexes
building.
This command instructs pgloader to load data from a database connection.
pgloader supports dynamically converting the schema of the source database
and the indexes building.
A default set of casting rules are provided and might be overloaded and
appended to by the command.
@ -500,9 +499,8 @@ ALTER TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. The only two
available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted
string as parameter::
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
SET SCHEMA 'mv'
@ -511,6 +509,8 @@ string as parameter::
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
@ -524,6 +524,9 @@ schema. In case of a name change, the mapping is kept and reused in the
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
MySQL Migration: limitations
----------------------------
@ -556,7 +559,7 @@ Numbers::
type int with extra auto_increment to serial when (< precision 10)
type int with extra auto_increment to bigserial when (<= 10 precision)
type int to int when (< precision 10)
type int to bigint when (<= 10 precision)
type int to bigint when (>= 10 precision)
type tinyint with extra auto_increment to serial
type smallint with extra auto_increment to serial
type mediumint with extra auto_increment to serial
@ -609,6 +612,14 @@ Date::
to timestamptz drop default
using zero-dates-to-null
type datetime with extra on update current timestamp when not null
to timestamptz drop not null drop default
using zero-dates-to-null
type datetime with extra on update current timestamp
to timestamptz drop default
using zero-dates-to-null
type timestamp when default "0000-00-00 00:00:00" and not null
to timestamptz drop not null drop default
using zero-dates-to-null

View File

@ -0,0 +1,196 @@
Migrating a PostgreSQL Database to Citus
========================================
This command instructs pgloader to load data from a database connection.
Automatic discovery of the schema is supported, including build of the
indexes, primary and foreign keys constraints. A default set of casting
rules are provided and might be overloaded and appended to by the command.
Automatic distribution column backfilling is supported, either from commands
that specify what is the distribution column in every table, or only in the
main table, then relying on foreign key constraints to discover the other
distribution keys.
Here's a short example of migrating a database from a PostgreSQL server to
another:
::
load database
from pgsql:///hackathon
into pgsql://localhost:9700/dim
with include drop, reset no sequences
cast column impressions.seen_at to "timestamp with time zone"
distribute companies using id
-- distribute campaigns using company_id
-- distribute ads using company_id from campaigns
-- distribute clicks using company_id from ads, campaigns
-- distribute impressions using company_id from ads, campaigns
;
Everything works exactly the same way as when doing a PostgreSQL to
PostgreSQL migration, with the added fonctionality of this new `distribute`
command.
Distribute Command
^^^^^^^^^^^^^^^^^^
The distribute command syntax is as following::
distribute <table name> using <column name>
distribute <table name> using <column name> from <table> [, <table>, ...]
distribute <table name> as reference table
When using the distribute command, the following steps are added to pgloader
operations when migrating the schema:
- if the distribution column does not exist in the table, it is added as
the first column of the table
- if the distribution column does not exists in the primary key of the
table, it is added as the first column of the primary of the table
- all the foreign keys that point to the table are added the distribution
key automatically too, including the source tables of the foreign key
constraints
- once the schema has been created on the target database, pgloader then
issues Citus specific command `create_reference_table()
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-reference-table>`_
and `create_distributed_table()
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-distributed-table>`_
to make the tables distributed
Those operations are done in the schema section of pgloader, before the data
is loaded. When the data is loaded, the newly added columns need to be
backfilled from referenced data. pgloader knows how to do that by generating
a query like the following and importing the result set of such a query
rather than the raw data from the source table.
Citus Migration Example
^^^^^^^^^^^^^^^^^^^^^^^
With the migration command as above, pgloader adds the column ``company_id``
to the tables that have a direct or indirect foreign key reference to the
``companies`` table.
We run pgloader using the following command, where the file
`./test/citus/company.load
<https://github.com/dimitri/pgloader/blob/master/test/citus/company.load>`_
contains the pgloader command as shown above.
::
$ pgloader --client-min-messages sql ./test/citus/company.load
The following SQL statements are all extracted from the log messages that
the pgloader command outputs. We are going to have a look at the
`impressions` table. It gets created with a new column `company_id` in the
first position, as follows:
::
CREATE TABLE "public"."impressions"
(
company_id bigint,
"id" bigserial,
"ad_id" bigint default NULL,
"seen_at" timestamp with time zone default NULL,
"site_url" text default NULL,
"cost_per_impression_usd" numeric(20,10) default NULL,
"user_ip" inet default NULL,
"user_data" jsonb default NULL
);
The original schema for this table does not have the `company_id` column,
which means pgloader now needs to change the primary key definition, the
foreign keys constraints definitions from and to this table, and also to
*backfill* the `company_id` data to this table when doing the COPY phase of
the migration.
Then once the tables have been created, pgloader executes the following SQL
statements::
SELECT create_distributed_table('"public"."companies"', 'id');
SELECT create_distributed_table('"public"."campaigns"', 'company_id');
SELECT create_distributed_table('"public"."ads"', 'company_id');
SELECT create_distributed_table('"public"."clicks"', 'company_id');
SELECT create_distributed_table('"public"."impressions"', 'company_id');
Then when copying the data from the source PostgreSQL database to the new
Citus tables, the new column (here ``company_id``) needs to be backfilled
from the source tables. Here's the SQL query that pgloader uses as a data
source for the ``ads`` table in our example:
::
SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text,
"ads".name::text, "ads".image_url::text, "ads".target_url::text,
"ads".impressions_count::text, "ads".clicks_count::text,
"ads".created_at::text, "ads".updated_at::text
FROM "public"."ads"
JOIN "public"."campaigns"
ON ads.campaign_id = campaigns.id
The ``impressions`` table has an indirect foreign key reference to the
``company`` table, which is the table where the distribution key is
specified. pgloader will discover that itself from walking the PostgreSQL
catalogs, and you may also use the following specification in the pgloader
command to explicitely add the indirect dependency:
::
distribute impressions using company_id from ads, campaigns
Given this schema, the SQL query used by pgloader to fetch the data for the
`impressions` table is the following, implementing online backfilling of the
data:
::
SELECT "campaigns".company_id::text, "impressions".id::text,
"impressions".ad_id::text, "impressions".seen_at::text,
"impressions".site_url::text,
"impressions".cost_per_impression_usd::text,
"impressions".user_ip::text,
"impressions".user_data::text
FROM "public"."impressions"
JOIN "public"."ads"
ON impressions.ad_id = ads.id
JOIN "public"."campaigns"
ON ads.campaign_id = campaigns.id
When the data copying is done, then pgloader also has to install the indexes
supporting the primary keys, and add the foreign key definitions to the
schema. Those definitions are not the same as in the source schema, because
of the adding of the distribution column to the table: we need to also add
the column to the primary key and the foreign key constraints.
Here's the commands issued by pgloader for the ``impressions`` table:
::
CREATE UNIQUE INDEX "impressions_pkey"
ON "public"."impressions" (company_id, id);
ALTER TABLE "public"."impressions"
ADD CONSTRAINT "impressions_ad_id_fkey"
FOREIGN KEY(company_id,ad_id)
REFERENCES "public"."ads"(company_id,id)
Given a single line of specification ``distribute companies using id`` then
pgloader implements all the necessary schema changes on the fly when
migrating to Citus, and also dynamically backfills the data.
Citus Migration: Limitations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The way pgloader implements *reset sequence* does not work with Citus at
this point, so sequences need to be taken care of separately at this point.

View File

@ -0,0 +1,70 @@
Support for Redshift in pgloader
================================
The command and behavior are the same as when migration from a PostgreSQL
database source. pgloader automatically discovers that it's talking to a
Redshift database by parsing the output of the `SELECT version()` SQL query.
Redhift as a data source
^^^^^^^^^^^^^^^^^^^^^^^^
Redshit is a variant of PostgreSQL version 8.0.2, which allows pgloader to
work with only a very small amount of adaptation in the catalog queries
used. In other words, migrating from Redshift to PostgreSQL works just the
same as when migrating from a PostgreSQL data source, including the
connection string specification.
Redshift as a data destination
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The Redshift variant of PostgreSQL 8.0.2 does not have support for the
``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY
with Redshift, the data must first be made available in an S3 bucket.
First, pgloader must authenticate to Amazon S3. pgloader uses the following
setup for that:
- ``~/.aws/config``
This INI formatted file contains sections with your default region and
other global values relevant to using the S3 API. pgloader parses it to
get the region when it's setup in the ``default`` INI section.
The environment variable ``AWS_DEFAULT_REGION`` can be used to override
the configuration file value.
- ``~/.aws/credentials``
The INI formatted file contains your authentication setup to Amazon,
with the properties ``aws_access_key_id`` and ``aws_secret_access_key``
in the section ``default``. pgloader parses this file for those keys,
and uses their values when communicating with Amazon S3.
The environment variables ``AWS_ACCESS_KEY_ID`` and
``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file
- ``AWS_S3_BUCKET_NAME``
Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is
used by pgloader as the name of the S3 bucket where to upload the files
to COPY to the Redshift database. The bucket name defaults to
``pgloader``.
Then pgloader works as usual, see the other sections of the documentation
for the details, depending on the data source (files, other databases, etc).
When preparing the data for PostgreSQL, pgloader now uploads each batch into
a single CSV file, and then issue such as the following, for each batch:
::
COPY <target_table_name>
FROM 's3://<s3 bucket>/<s3-filename-just-uploaded>'
FORMAT CSV
TIMEFORMAT 'auto'
REGION '<aws-region>'
ACCESS_KEY_ID '<aws-access-key-id>'
SECRET_ACCESS_KEY '<aws-secret-access-key>;
This is the only difference with a PostgreSQL core version, where pgloader
can rely on the classic ``COPY FROM STDIN`` command, which allows to send
data through the already established connection to PostgreSQL.

408
docs/ref/pgsql.rst Normal file
View File

@ -0,0 +1,408 @@
Migrating a PostgreSQL Database to PostgreSQL
=============================================
This command instructs pgloader to load data from a database connection.
Automatic discovery of the schema is supported, including build of the
indexes, primary and foreign keys constraints. A default set of casting
rules are provided and might be overloaded and appended to by the command.
Here's a short example of migrating a database from a PostgreSQL server to
another:
::
load database
from pgsql://localhost/pgloader
into pgsql://localhost/copy
including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
including only table names matching ~/geolocations/ in schema 'public'
;
PostgreSQL Database Source Specification: FROM
----------------------------------------------
Must be a connection URL pointing to a PostgreSQL database.
See the `SOURCE CONNECTION STRING` section above for details on how to write
the connection string.
::
pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
PostgreSQL Database Migration Options: WITH
-------------------------------------------
When loading from a `PostgreSQL` database, the following options are
supported, and the default *WITH* clause is: *no truncate*, *create schema*,
*create tables*, *include drop*, *create indexes*, *reset sequences*,
*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*.
- *include drop*
When this option is listed, pgloader drops all the tables in the target
PostgreSQL database whose names appear in the MySQL database. This
option allows for using the same command several times in a row until
you figure out all the options, starting automatically from a clean
environment. Please note that `CASCADE` is used to ensure that tables
are dropped even if there are foreign keys pointing to them. This is
precisely what `include drop` is intended to do: drop all target tables
and recreate them.
Great care needs to be taken when using `include drop`, as it will
cascade to *all* objects referencing the target tables, possibly
including other tables that are not being loaded from the source DB.
- *include no drop*
When this option is listed, pgloader will not include any `DROP`
statement when loading the data.
- *truncate*
When this option is listed, pgloader issue the `TRUNCATE` command
against each PostgreSQL table just before loading data into it.
- *no truncate*
When this option is listed, pgloader issues no `TRUNCATE` command.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *create tables*
When this option is listed, pgloader creates the table using the meta
data found in the `MySQL` file, which must contain a list of fields with
their data type. A standard data type conversion from DBF to PostgreSQL
is done.
- *create no tables*
When this option is listed, pgloader skips the creation of table before
loading data, target tables must then already exist.
Also, when using *create no tables* pgloader fetches the metadata from
the current target database and checks type casting, then will remove
constraints and indexes prior to loading the data and install them back
again once the loading is done.
- *create indexes*
When this option is listed, pgloader gets the definitions of all the
indexes found in the MySQL database and create the same set of index
definitions against the PostgreSQL database.
- *create no indexes*
When this option is listed, pgloader skips the creating indexes.
- *drop indexes*
When this option is listed, pgloader drops the indexes in the target
database before loading the data, and creates them again at the end
of the data copy.
- *reindex*
When this option is used, pgloader does both *drop indexes* before
loading the data and *create indexes* once data is loaded.
- *drop schema*
When this option is listed, pgloader drops the target schema in the
target PostgreSQL database before creating it again and all the objects
it contains. The default behavior doesn't drop the target schemas.
- *foreign keys*
When this option is listed, pgloader gets the definitions of all the
foreign keys found in the MySQL database and create the same set of
foreign key definitions against the PostgreSQL database.
- *no foreign keys*
When this option is listed, pgloader skips creating foreign keys.
- *reset sequences*
When this option is listed, at the end of the data loading and after the
indexes have all been created, pgloader resets all the PostgreSQL
sequences created to the current maximum value of the column they are
attached to.
The options *schema only* and *data only* have no effects on this
option.
- *reset no sequences*
When this option is listed, pgloader skips resetting sequences after the
load.
The options *schema only* and *data only* have no effects on this
option.
- *downcase identifiers*
When this option is listed, pgloader converts all MySQL identifiers
(table names, index names, column names) to *downcase*, except for
PostgreSQL *reserved* keywords.
The PostgreSQL *reserved* keywords are determined dynamically by using
the system function `pg_get_keywords()`.
- *quote identifiers*
When this option is listed, pgloader quotes all MySQL identifiers so
that their case is respected. Note that you will then have to do the
same thing in your application code queries.
- *schema only*
When this option is listed pgloader refrains from migrating the data
over. Note that the schema in this context includes the indexes when the
option *create indexes* has been listed.
- *data only*
When this option is listed pgloader only issues the `COPY` statements,
without doing any other processing.
- *rows per range*
How many rows are fetched per `SELECT` query when using *multiple
readers per thread*, see above for details.
PostgreSQL Database Casting Rules
---------------------------------
The command *CAST* introduces user-defined casting rules.
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
A casting rule is expected to follow one of the forms::
type <type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
column <table-name>.<column-name> [ <guards> ] to ...
It's possible for a *casting rule* to either match against a PostgreSQL data
type or against a given *column name* in a given *table name*. So it's
possible to migrate a table from a PostgreSQL database while changing and
`int` column to a `bigint` one, automatically.
The *casting rules* are applied in order, the first match prevents following
rules to be applied, and user defined rules are evaluated first.
The supported guards are:
- *when default 'value'*
The casting rule is only applied against MySQL columns of the source
type that have given *value*, which must be a single-quoted or a
double-quoted string.
- *when typemod expression*
The casting rule is only applied against MySQL columns of the source
type that have a *typemod* value matching the given *typemod
expression*. The *typemod* is separated into its *precision* and *scale*
components.
Example of a cast rule using a *typemod* guard::
type char when (= precision 1) to char keep typemod
This expression casts MySQL `char(1)` column to a PostgreSQL column of
type `char(1)` while allowing for the general case `char(N)` will be
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
- *with extra auto_increment*
The casting rule is only applied against PostgreSQL attached to a
sequence. This can be the result of doing that manually, using a
`serial` or a `bigserial` data type, or an `identity` column.
The supported casting options are:
- *drop default*, *keep default*
When the option *drop default* is listed, pgloader drops any
existing default expression in the MySQL database for columns of the
source type from the `CREATE TABLE` statement it generates.
The spelling *keep default* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *drop not null*, *keep not null*, *set not null*
When the option *drop not null* is listed, pgloader drops any
existing `NOT NULL` constraint associated with the given source
MySQL datatype when it creates the tables in the PostgreSQL
database.
The spelling *keep not null* explicitly prevents that behaviour and
can be used to overload the default casting rules.
When the option *set not null* is listed, pgloader sets a `NOT NULL`
constraint on the target column regardless whether it has been set
in the source MySQL column.
- *drop typemod*, *keep typemod*
When the option *drop typemod* is listed, pgloader drops any
existing *typemod* definition (e.g. *precision* and *scale*) from
the datatype definition found in the MySQL columns of the source
type when it created the tables in the PostgreSQL database.
The spelling *keep typemod* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *using*
This option takes as its single argument the name of a function to
be found in the `pgloader.transforms` Common Lisp package. See above
for details.
It's possible to augment a default cast rule (such as one that
applies against `ENUM` data type for example) with a *transformation
function* by omitting entirely the `type` parts of the casting rule,
as in the following example::
column enumerate.foo using empty-string-to-null
PostgreSQL Views Support
------------------------
PostgreSQL views support allows pgloader to migrate view as if they were
base tables. This feature then allows for on-the-fly transformation of the
source schema, as the view definition is used rather than the base data.
MATERIALIZE VIEWS
^^^^^^^^^^^^^^^^^
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will query
the data. It's not possible to just allow for plain `SQL` because we want to
know a lot about the exact data types of each column involved in the query
output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the following
expression::
*name* `AS` `$$` *sql query* `$$`
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
the beginning of the data loading, and the resulting view will then be
dropped at the end of the data loading.
MATERIALIZE ALL VIEWS
^^^^^^^^^^^^^^^^^^^^^
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
returned by PostgreSQL rather than asking the user to specify the list.
PostgreSQL Partial Migration
----------------------------
INCLUDING ONLY TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to limit the tables to migrate to a sublist.
Example::
including only table names matching ~/film/, 'actor' in schema 'public'
EXCLUDING TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to exclude table names from the migration. This filter only applies to the
result of the *INCLUDING* filter.
::
excluding table names matching ~<ory> in schema 'public'
PostgreSQL Schema Transformations
---------------------------------
ALTER TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
IN SCHEMA 'public'
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'public' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET TABLESPACE 'pg_default'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
(regexp or string).
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
the level of the pgloader in-memory representation of your source database
schema. In case of a name change, the mapping is kept and reused in the
*foreign key* and *index* support.
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
PostgreSQL Migration: limitations
---------------------------------
The only PostgreSQL objects supported at this time in pgloader are
extensions, schema, tables, indexes and constraints. Anything else is ignored.
- Views are not migrated,
Supporting views might require implementing a full SQL parser for the
MySQL dialect with a porting engine to rewrite the SQL against
PostgreSQL, including renaming functions and changing some constructs.
While it's not theoretically impossible, don't hold your breath.
- Triggers are not migrated
The difficulty of doing so is not yet assessed.
- Stored Procedures and Functions are not migrated.
Default PostgreSQL Casting Rules
--------------------------------
When migrating from PostgreSQL the following Casting Rules are provided::
type int with extra auto_increment to serial
type bigint with extra auto_increment to bigserial
type "character varying" to text drop typemod

View File

@ -127,7 +127,8 @@ It's possible to use the *MATERIALIZE VIEWS* clause and give both the name
and the SQL (in MySQL dialect) definition of view, then pgloader creates the
view before loading the data, then drops it again at the end.
## Loading the data
Loading the data
^^^^^^^^^^^^^^^^
Let's start the `pgloader` command with our `sakila.load` command file::

View File

@ -1,7 +1,6 @@
PgLoader Tutorial
Pgloader Tutorial
=================
.. include:: quickstart.rst
.. include:: csv.rst
.. include:: fixed.rst
.. include:: geolite.rst

View File

@ -69,6 +69,7 @@
(:file "quoting" :depends-on ("utils"))
(:file "catalog" :depends-on ("quoting"))
(:file "alter-table" :depends-on ("catalog"))
(:file "citus" :depends-on ("catalog"))
;; State, monitoring, reporting
(:file "reject" :depends-on ("state"))
@ -95,6 +96,7 @@
:components
((:file "connection")
(:file "pgsql-ddl")
(:file "pgsql-ddl-citus")
(:file "pgsql-schema")
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
(:file "pgsql-trigger")
@ -149,40 +151,37 @@
;(:file "syslog") ; experimental...
(:module "sqlite"
:serial t
:depends-on ("common")
:components
((:file "sqlite-cast-rules")
(:file "sqlite-schema"
:depends-on ("sqlite-cast-rules"))
(:file "sqlite"
:depends-on ("sqlite-cast-rules"
"sqlite-schema"))))
(:file "sqlite-schema")
(:file "sqlite")))
(:module "mssql"
:serial t
:depends-on ("common")
:components
((:file "mssql-cast-rules")
(:file "mssql-schema"
:depends-on ("mssql-cast-rules"))
(:file "mssql"
:depends-on ("mssql-cast-rules"
"mssql-schema"))
(:file "mssql-index-filters"
:depends-on ("mssql"))))
(:file "mssql-schema")
(:file "mssql")
(:file "mssql-index-filters")))
(:module "mysql"
:serial t
:depends-on ("common")
:components
((:file "mysql-cast-rules")
(:file "mysql-connection")
(:file "mysql-schema"
:depends-on ("mysql-connection"
"mysql-cast-rules"))
;; (:file "mysql-csv"
;; :depends-on ("mysql-schema"))
(:file "mysql"
:depends-on ("mysql-cast-rules"
"mysql-schema"))))))
(:file "mysql-schema")
(:file "mysql")))
(:module "pgsql"
:serial t
:depends-on ("common")
:components ((:file "pgsql-cast-rules")
(:file "pgsql-schema")
(:file "pgsql")))))
;; package pgloader.copy
(:module "pg-copy"
@ -243,10 +242,12 @@
(:file "command-cast-rules")
(:file "command-materialize-views")
(:file "command-alter-table")
(:file "command-distribute")
(:file "command-mysql")
(:file "command-including-like")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-pgsql")
(:file "command-archive")
(:file "command-parser")
(:file "parse-sqlite-type-name")

View File

@ -9,10 +9,16 @@
;;; :cl+ssl in its system definition.
;;;
(in-package #:cl-user)
;; So that we can #+pgloader-image some code away, see main.lisp
(push :pgloader-image *features*)
(in-package #:cl-user)
;;;
;;; We need to support *print-circle* for the debug traces of the catalogs,
;;; and while at it let's enforce *print-pretty* too.
;;;
(setf *print-circle* t *print-pretty* t)
(defun close-foreign-libs ()
"Close Foreign libs in use by pgloader at application save time."
@ -30,10 +36,8 @@
;; handles some context and things around loading with CFFI.
(cl+ssl:reload)))
#|
#+ccl (push #'open-foreign-libs *lisp-startup-functions*)
#+sbcl (push #'open-foreign-libs sb-ext:*init-hooks*)
|#
#+ccl (push #'close-foreign-libs *save-exit-functions*)
#+sbcl (push #'close-foreign-libs sb-ext:*save-hooks*)
@ -42,6 +46,10 @@
;;; Register all loaded systems in the image, so that ASDF don't search for
;;; them again when doing --self-upgrade
;;;
;;; FIXME: this idea kept failing.
#|
(defun register-preloaded-system (system)
(unless (string= "pgloader" (asdf::coerce-name system))
(let ((version (slot-value system 'asdf::version)))
@ -59,3 +67,4 @@
(asdf:find-system system-name)))
when (typep o 'asdf:load-source-op)
append (asdf:input-files o c)))
|#

View File

@ -74,13 +74,16 @@
(incf task-count)))
(lp:task-handler-bind
((copy-init-error
(#+pgloader-image
(copy-init-error
#'(lambda (condition)
;; everything has been handled already
;; stop the other tasks and then transfer the control
(log-message :log "COPY INIT ERROR")
(lp:invoke-transfer-error condition)))
(on-error-stop
#'(lambda (condition)
;; everything has been handled already
(log-message :log "ON ERROR STOP")
(lp:kill-tasks :default)
(lp:invoke-transfer-error condition)))
#+pgloader-image
(error

View File

@ -42,7 +42,10 @@
(handler-case
(with-pgsql-connection (pgconn)
(setf pgsql-catalog
(fetch-pgsql-catalog (db-name pgconn) :table (target copy)))
(fetch-pgsql-catalog (db-name pgconn)
:table (target copy)
:variant (pgconn-variant pgconn)
:pgversion (pgconn-major-version pgconn)))
;; if the user didn't tell us the column list of the table, now is
;; a proper time to set it in the copy object
@ -95,6 +98,8 @@
(loop :for path-spec :in path-list
:count t
:do (let ((table-source (clone-copy-for copy path-spec)))
(when (and (header table-source) (null (fields table-source)))
(parse-header table-source))
(incf task-count
(copy-from table-source
:concurrency concurrency

View File

@ -46,6 +46,12 @@
(with-stats-collection ("Create SQL Types" :section :pre
:use-result-as-read t
:use-result-as-rows t)
;; some SQL types come from extensions (ip4r, hstore, etc)
(create-extensions catalog
:include-drop include-drop
:if-not-exists t
:client-min-messages :error)
(create-sqltypes catalog
:include-drop include-drop
:client-min-messages :error))
@ -64,9 +70,11 @@
;;
;; to be able to do that properly, get the constraints from
;; the pre-existing target database catalog
(let ((pgsql-catalog
(fetch-pgsql-catalog (db-name (target-db copy))
:source-catalog catalog)))
(let* ((pgversion (pgconn-major-version (target-db copy)))
(pgsql-catalog
(fetch-pgsql-catalog (db-name (target-db copy))
:source-catalog catalog
:pgversion pgversion)))
(merge-catalogs catalog pgsql-catalog))
;; now the foreign keys and only then the indexes, because a
@ -110,6 +118,20 @@
:include-drop include-drop
:client-min-messages :error))))
;; Citus Support
;;
;; We need a separate transaction here in some cases, because of the
;; distributed DDL support from Citus, to avoid the following error:
;;
;; ERROR Database error 25001: cannot establish a new connection for
;; placement 2299, since DDL has been executed on a connection that is in
;; use
;;
(when (catalog-distribution-rules catalog)
(with-pgsql-transaction (:pgconn (target-db copy))
(with-stats-collection ("Citus Distribute Tables" :section :pre)
(create-distributed-table (catalog-distribution-rules catalog)))))
;; log the catalog we just fetched and (maybe) merged
(log-message :data "CATALOG: ~s" catalog))
@ -207,9 +229,11 @@
:reset-sequences reset-sequences))))
(defun process-catalog (copy catalog &key alter-table alter-schema)
(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
rewriting, pgloader level alter schema and alter table commands."
(log-message :info "Processing source catalogs")
;; cast the catalog into something PostgreSQL can work on
(cast catalog)
@ -223,7 +247,13 @@
;; if asked, now alter the catalog with given rules: the alter-table
;; keyword parameter actually contains a set of alter table rules.
(when alter-table
(alter-table catalog alter-table)))
(alter-table catalog alter-table))
;; we also support schema changes necessary for Citus distribution
(when distribute
(log-message :info "Applying distribution rules")
(setf (catalog-distribution-rules catalog)
(citus-distribute-schema catalog distribute))))
;;;
@ -249,6 +279,8 @@
(reset-sequences t)
(foreign-keys t)
(reindex nil)
(after-schema nil)
distribute
only-tables
including
excluding
@ -289,19 +321,33 @@
(copy-kernel (make-kernel worker-count))
(copy-channel (let ((lp:*kernel* copy-kernel)) (lp:make-channel)))
(catalog (fetch-metadata
copy
(make-catalog
:name (typecase (source-db copy)
(db-connection (db-name (source-db copy)))
(fd-connection (pathname-name
(fd-path (source-db copy))))))
:materialize-views materialize-views
:create-indexes create-indexes
:foreign-keys foreign-keys
:only-tables only-tables
:including including
:excluding excluding))
(catalog (handler-case
(fetch-metadata
copy
(make-catalog
:name (typecase (source-db copy)
(db-connection
(db-name (source-db copy)))
(fd-connection
(pathname-name
(fd-path (source-db copy))))))
:materialize-views materialize-views
:create-indexes create-indexes
:foreign-keys foreign-keys
:only-tables only-tables
:including including
:excluding excluding)
(mssql::mssql-error (e)
(log-message :error "MSSQL ERROR: ~a" e)
(log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
(return-from copy-database))
#+pgloader-image
(condition (e)
(log-message :error
"~a: ~a"
(conn-type (source-db copy))
e)
(return-from copy-database))))
pkeys
(writers-count (make-hash-table :size (count-tables catalog)))
(max-indexes (when create-indexes
@ -317,23 +363,44 @@
;; apply catalog level transformations to support the database migration
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
(process-catalog copy catalog
:alter-table alter-table
:alter-schema alter-schema)
(handler-case
(process-catalog copy catalog
:alter-table alter-table
:alter-schema alter-schema
:distribute distribute)
#+pgloader-image
((or citus-rule-table-not-found citus-rule-is-missing-from-list) (e)
(log-message :fatal "~a" e)
(return-from copy-database))
#+pgloader-image
(condition (e)
(log-message :fatal "Failed to process catalogs: ~a" e)
(return-from copy-database)))
;; if asked, first drop/create the tables on the PostgreSQL side
(handler-case
(prepare-pgsql-database copy
catalog
:truncate truncate
:create-tables create-tables
:create-schemas create-schemas
:drop-indexes drop-indexes
:drop-schema drop-schema
:include-drop include-drop
:foreign-keys foreign-keys
:set-table-oids set-table-oids
:materialize-views materialize-views)
(progn
(prepare-pgsql-database copy
catalog
:truncate truncate
:create-tables create-tables
:create-schemas create-schemas
:drop-indexes drop-indexes
:drop-schema drop-schema
:include-drop include-drop
:foreign-keys foreign-keys
:set-table-oids set-table-oids
:materialize-views materialize-views)
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
;; to run it.
(when after-schema
(pgloader.parser::execute-sql-code-block (target-db copy)
:pre
after-schema
"after schema")))
;;
;; In case some error happens in the preparatory transaction, we
;; need to stop now and refrain from trying to load the data into

View File

@ -51,6 +51,10 @@
("on-error-stop" :type boolean
:documentation "Refrain from handling errors properly.")
("no-ssl-cert-verification"
:type boolean
:documentation "Instruct OpenSSL to bypass verifying certificates.")
(("context" #\C) :type string :documentation "Command Context Variables")
(("with") :type string :list t :optional t
@ -197,6 +201,7 @@
client-min-messages log-min-messages summary
root-dir self-upgrade
with set field cast type encoding before after
no-ssl-cert-verification
regress)
options
@ -238,6 +243,11 @@
;; Then process options
(when debug
(format t "pgloader version ~a~%" *version-string*)
#+pgloader-image
(format t "compiled with ~a ~a~%"
(lisp-implementation-type)
(lisp-implementation-version))
#+sbcl
(format t "sb-impl::*default-external-format* ~s~%"
sb-impl::*default-external-format*)
@ -249,11 +259,15 @@
(lisp-implementation-type)
(lisp-implementation-version)))
(when help
(when (or help)
(usage argv))
(when (or help version) (uiop:quit +os-code-success+))
(when (null arguments)
(usage argv)
(uiop:quit +os-code-error-usage+))
(when list-encodings
(show-encodings)
(uiop:quit +os-code-success+))
@ -316,6 +330,9 @@
(uiop:native-namestring *log-filename*))
(log-message :log "Data errors in '~a'~%" *root-dir*)
(when no-ssl-cert-verification
(setf cl+ssl:*make-ssl-client-stream-verify-default* nil))
(cond
((and regress (= 1 (length arguments)))
(process-regression-test (first arguments)))

View File

@ -93,8 +93,9 @@
(:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2))
(:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4))
(:syb-int8 (mem-ref data :int8))
(:syb-real (mem-ref data :float))
(:syb-flt8 (mem-ref data :double))
((:syb-datetime :syb-datetime4 :syb-msdate)
((:syb-datetime :syb-datetime4 :syb-msdate :syb-mstime)
(with-foreign-pointer (%buf +numeric-buf-sz+)
(let ((count
(%dbconvert %dbproc

View File

@ -49,8 +49,9 @@
#:catalog
#:schema
#:table
#:extension
#:sqltype
#:table
#:column
#:index
#:fkey
@ -76,12 +77,15 @@
#:catalog-name
#:catalog-schema-list
#:catalog-types-without-btree
#:catalog-distribution-rules
#:schema-name
#:schema-catalog
#:schema-source-name
#:schema-table-list
#:schema-view-list
#:schema-extension-list
#:schema-sqltype-list
#:schema-in-search-path
#:table-name
@ -90,17 +94,23 @@
#:table-oid
#:table-comment
#:table-storage-parameter-list
#:table-tablespace
#:table-field-list
#:table-column-list
#:table-index-list
#:table-fkey-list
#:table-trigger-list
#:table-citus-rule
#:extension-name
#:extension-schema
#:sqltype-name
#:sqltype-schema
#:sqltype-type
#:sqltype-source-def
#:sqltype-extra
#:sqltype-extension
#:column-name
#:column-type-name
@ -110,6 +120,7 @@
#:column-comment
#:column-transform
#:column-extra
#:column-transform-default
#:index-name
#:index-type
@ -152,9 +163,15 @@
#:table-list
#:view-list
#:extension-list
#:sqltype-list
#:add-schema
#:find-schema
#:maybe-add-schema
#:add-extension
#:find-extension
#:maybe-add-extension
#:add-sqltype
#:add-table
#:find-table
#:maybe-add-table
@ -174,6 +191,7 @@
#:count-indexes
#:count-fkeys
#:max-indexes-per-table
#:field-name
#:push-to-end
#:with-schema
@ -194,6 +212,17 @@
#:match-rule-action
#:match-rule-args
#:citus-reference-rule
#:citus-distributed-rule
#:make-citus-reference-rule
#:make-citus-distributed-rule
#:citus-reference-rule-rule
#:citus-distributed-rule-table
#:citus-distributed-rule-using
#:citus-distributed-rule-from
#:citus-format-sql-select
#:citus-backfill-table-p
#:format-table-name))
(defpackage #:pgloader.state
@ -260,7 +289,30 @@
(defpackage #:pgloader.queries
(:use #:cl #:pgloader.params)
(:export #:*queries*
#:sql))
#:sql
#:sql-url-for-variant))
(defpackage #:pgloader.citus
(:use #:cl
#:pgloader.params
#:pgloader.catalog
#:pgloader.quoting
#:pgloader.monitor)
(:export #:citus-distribute-schema
#:citus-format-sql-select
#:citus-backfill-table-p
#:citus-rule-table-not-found
#:citus-rule-is-missing-from-list
#:citus-reference-rule
#:citus-reference-rule-p
#:citus-reference-rule-table
#:citus-distributed-rule
#:citus-distributed-rule-p
#:citus-distributed-rule-table
#:citus-distributed-rule-using
#:citus-distributed-rule-from))
(defpackage #:pgloader.utils
(:use #:cl
@ -269,7 +321,8 @@
#:pgloader.quoting
#:pgloader.catalog
#:pgloader.monitor
#:pgloader.state)
#:pgloader.state
#:pgloader.citus)
(:import-from #:alexandria
#:appendf
#:read-file-into-string)
@ -300,7 +353,8 @@
(cl-user::export-inherited-symbols "pgloader.quoting" "pgloader.utils")
(cl-user::export-inherited-symbols "pgloader.catalog" "pgloader.utils")
(cl-user::export-inherited-symbols "pgloader.monitor" "pgloader.utils")
(cl-user::export-inherited-symbols "pgloader.state" "pgloader.utils"))
(cl-user::export-inherited-symbols "pgloader.state" "pgloader.utils")
(cl-user::export-inherited-symbols "pgloader.citus" "pgloader.utils"))
;;
@ -389,6 +443,7 @@
#:truncate-tables
#:set-table-oids
#:create-extensions
#:create-sqltypes
#:create-schemas
#:add-to-search-path
@ -408,6 +463,11 @@
#:reset-sequences
#:comment-on-tables-and-columns
#:create-distributed-table
#:make-including-expr-from-catalog
#:make-including-expr-from-view-names
;; finalizing catalogs support (redshift and other variants)
#:finalize-catalogs
#:adjust-data-types
@ -417,6 +477,7 @@
#:process-index-definitions
;; postgresql introspection queries
#:list-all-sqltypes
#:list-all-columns
#:list-all-indexes
#:list-all-fkeys
@ -674,6 +735,14 @@
#:*mysql-default-cast-rules*
#:with-mysql-connection))
(defpackage #:pgloader.source.pgsql
(:use #:cl
#:pgloader.params #:pgloader.utils #:pgloader.connection
#:pgloader.sources #:pgloader.pgsql #:pgloader.catalog)
(:import-from #:pgloader.transforms #:precision #:scale)
(:export #:copy-pgsql
#:*pgsql-default-cast-rules*))
(defpackage #:pgloader.source.sqlite
(:use #:cl
#:pgloader.params #:pgloader.utils #:pgloader.connection
@ -763,6 +832,9 @@
(:import-from #:pgloader.source.copy
#:copy-copy
#:copy-connection)
(:import-from #:pgloader.source.pgsql
#:copy-pgsql
#:*pgsql-default-cast-rules*)
(:import-from #:pgloader.source.mysql
#:copy-mysql
#:mysql-connection
@ -785,6 +857,7 @@
(:export #:parse-commands
#:parse-commands-from-file
#:initialize-context
#:execute-sql-code-block
;; tools to enable complete cli parsing in main.lisp
#:process-relative-pathnames

View File

@ -40,11 +40,11 @@
(in-package :pgloader.params)
(defparameter *release* nil
(defparameter *release* t
"non-nil when this build is a release build.")
(defparameter *major-version* "3.5")
(defparameter *minor-version* "2")
(defparameter *major-version* "3.6")
(defparameter *minor-version* "1")
(defun git-hash ()
"Return the current abbreviated git hash of the development tree."

View File

@ -47,9 +47,14 @@
(bind (((_ _ parameters _) stmt))
(list #'pgloader.catalog::alter-table-set-storage-parameters parameters))))
(defrule set-tablespace (and kw-set kw-tablespace quoted-namestring)
(:lambda (stmt)
(list #'pgloader.catalog::alter-table-set-tablespace (third stmt))))
(defrule alter-table-action (or rename-to
set-schema
set-storage-parameters))
set-storage-parameters
set-tablespace))
(defrule alter-table-command (and alter-table-names-matching
(? in-schema)

View File

@ -134,7 +134,8 @@
option-fields-terminated-by
option-trim-unquoted-blanks
option-keep-unquoted-blanks
option-csv-escape-mode))
option-csv-escape-mode
option-null-if))
(defrule csv-options (and kw-with
(and csv-option (* (and comma csv-option))))
@ -231,11 +232,6 @@
(destructuring-bind (field1 fields) source
(list* field1 fields))))
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
(:constant :open-paren))
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
(:constant :close-paren))
(defrule having-fields (and kw-having kw-fields) (:constant nil))
(defrule csv-source-field-list (and (? having-fields)
@ -434,26 +430,35 @@
(progn
,(sql-code-block pg-db-conn :pre before "before load")
(let ((on-error-stop (getf ',options :on-error-stop))
(truncate (getf ',options :truncate))
(disable-triggers (getf ',options :disable-triggers))
(drop-indexes (getf ',options :drop-indexes))
(max-parallel-create-index (getf ',options :max-parallel-create-index))
(source
(make-instance 'copy-csv
:target-db ,pg-db-conn
:source source-db
:target (create-table ',target-table-name)
:encoding ,encoding
:fields ',fields
:columns ',columns
,@(remove-batch-control-option
options :extras '(:worker-count
:concurrency
:truncate
:drop-indexes
:disable-triggers
:max-parallel-create-index)))))
(let* ((on-error-stop (getf ',options :on-error-stop))
(truncate (getf ',options :truncate))
(disable-triggers (getf ',options :disable-triggers))
(drop-indexes (getf ',options :drop-indexes))
(max-parallel-create-index (getf ',options :max-parallel-create-index))
(fields
',(let ((null-as (getf options :null-as)))
(if null-as
(mapcar (lambda (field)
(if (member :null-as field) field
(append field (list :null-as null-as))))
fields)
fields)))
(source
(make-instance 'copy-csv
:target-db ,pg-db-conn
:source source-db
:target (create-table ',target-table-name)
:encoding ,encoding
:fields fields
:columns ',columns
,@(remove-batch-control-option
options :extras '(:null-as
:worker-count
:concurrency
:truncate
:drop-indexes
:disable-triggers
:max-parallel-create-index)))))
(copy-database source
,@ (when worker-count
(list :worker-count worker-count))

View File

@ -25,7 +25,7 @@
(defrule doubled-at-sign (and "@@") (:constant "@"))
(defrule doubled-colon (and "::") (:constant ":"))
(defrule password (+ (or (not "@") doubled-at-sign)) (:text t))
(defrule username (and (or #\_ (alpha-char-p character))
(defrule username (and (or #\_ (alpha-char-p character) (digit-char-p character))
(* (or (alpha-char-p character)
(digit-char-p character)
#\.
@ -87,10 +87,11 @@
(append (list :host (when host (process-hostname host)))
port))))
(defrule dsn-dbname (and "/" (? maybe-quoted-namestring))
(:destructure (slash dbname)
(declare (ignore slash))
(list :dbname dbname)))
(defrule dsn-dbname (and "/" (? (* (or (alpha-char-p character)
(digit-char-p character)
punct))))
(:lambda (dbn)
(list :dbname (text (second dbn)))))
(defrule dsn-option-ssl-disable "disable" (:constant :no))
(defrule dsn-option-ssl-allow "allow" (:constant :try))

View File

@ -0,0 +1,73 @@
#|
distribute billers using id
distribute bills using biller_id
distribute receivable_accounts using biller_id
distribute payments using biller_id
distribute splits using biller_id
from receivable_accounts
distribute ach_accounts as reference table
|#
(in-package :pgloader.parser)
(defun create-table-from-dsn-table-name (dsn-table-name
&optional (schema-name "public"))
(let ((table (create-table (cdr (second dsn-table-name)))))
(unless (table-schema table)
(setf (table-schema table)
(make-schema :catalog nil
:source-name schema-name
:name (apply-identifier-case schema-name))))
table))
(defrule distribute-reference (and kw-distribute dsn-table-name
kw-as kw-reference kw-table)
(:lambda (d-r)
(make-citus-reference-rule :table (create-table-from-dsn-table-name d-r))))
(defrule distribute-using (and kw-distribute dsn-table-name
kw-using maybe-quoted-namestring)
(:lambda (d-u)
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u)
:using (make-column :name (fourth d-u)))))
;;;
;;; The namestring rule allows for commas and we use them as a separator
;;; here, so we need to have our own table name parsing. That's a bummer,
;;; maybe we should revisit the whole table names parsing code?
;;;
(defrule distribute-from-tablename
(or double-quoted-namestring
quoted-namestring
(and (or #\_ (alpha-char-p character))
(* (or (alpha-char-p character)
(digit-char-p character)))))
(:text t))
(defrule maybe-qualified-dist-from-table-name
(and distribute-from-tablename (? (and "." distribute-from-tablename)))
(:lambda (name)
(if (second name)
(cons (first name) (second (second name)))
(cons "public" (first name)))))
(defrule distribute-from-list (+ (and maybe-qualified-dist-from-table-name
(? (and "," ignore-whitespace))))
(:lambda (from-list)
(mapcar #'first from-list)))
(defrule distribute-using-from (and kw-distribute dsn-table-name
kw-using maybe-quoted-namestring
kw-from distribute-from-list)
(:lambda (d-u-f)
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u-f)
:using (make-column :name (fourth d-u-f))
:from (mapcar #'create-table (sixth d-u-f)))))
(defrule distribute-commands (+ (or distribute-using-from
distribute-using
distribute-reference))
(:lambda (commands)
(cons :distribute commands)))

View File

@ -26,6 +26,7 @@
(def-keyword-rule "with")
(def-keyword-rule "when")
(def-keyword-rule "set")
(def-keyword-rule "tablespace")
(def-keyword-rule "database")
(def-keyword-rule "messages")
(def-keyword-rule "matches")
@ -103,6 +104,9 @@
(def-keyword-rule "trim")
(def-keyword-rule "unquoted")
(def-keyword-rule "delimiter")
;; option for Citus support
(def-keyword-rule "distribute")
(def-keyword-rule "reference")
;; option for MySQL imports
(def-keyword-rule "schema")
(def-keyword-rule "schemas")

View File

@ -6,11 +6,11 @@
;;;
(in-package #:pgloader.parser)
(defrule view-name (and (alpha-char-p character)
(* (or (alpha-char-p character)
(digit-char-p character)
#\_)))
(:text t))
(defrule view-name (or qualified-table-name maybe-quoted-namestring)
(:lambda (vn)
(etypecase vn
(cons vn)
(string (cons nil vn)))))
(defrule view-sql (and kw-as dollar-quoted)
(:destructure (as sql) (declare (ignore as)) sql))
@ -18,7 +18,7 @@
(defrule view-definition (and view-name (? view-sql))
(:destructure (name sql) (cons name sql)))
(defrule another-view-definition (and comma view-definition)
(defrule another-view-definition (and comma-separator view-definition)
(:lambda (source)
(bind (((_ view) source)) view)))

View File

@ -83,6 +83,8 @@
casts
alter-schema
alter-table
materialize-views
distribute-commands
before-load
after-load
including-like-in-schema
@ -139,7 +141,8 @@
(defun lisp-code-for-loading-from-mssql (ms-db-conn pg-db-conn
&key
gucs mssql-gucs
casts before after options
casts before after
options distribute views
alter-schema alter-table
including excluding
&allow-other-keys)
@ -167,6 +170,8 @@
:excluding ',excluding
:alter-schema ',alter-schema
:alter-table ',alter-table
:materialize-views ',views
:distribute ',distribute
:set-table-oids t
:on-error-stop on-error-stop
,@(remove-batch-control-option options))
@ -177,8 +182,8 @@
(:lambda (source)
(bind (((ms-db-uri pg-db-uri
&key
gucs mssql-gucs casts before after
alter-schema alter-table
gucs mssql-gucs casts views before after
alter-schema alter-table distribute
including excluding options)
source))
(cond (*dry-run*
@ -188,10 +193,12 @@
:gucs gucs
:mssql-gucs mssql-gucs
:casts casts
:views views
:before before
:after after
:alter-schema alter-schema
:alter-table alter-table
:distribute distribute
:options options
:including including
:excluding excluding))))))

View File

@ -89,15 +89,13 @@
excluding-matching
decoding-tables-as
before-load
after-load))
after-load
distribute-commands))
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))
(defrule mysql-prefix "mysql://" (:constant (list :type :mysql)))
(defrule mysql-dsn-dbname (and "/" maybe-quoted-namestring)
(:lambda (m-d-d) (list :dbname (text (second m-d-d)))))
(defrule mysql-dsn-option-usessl-true "true" (:constant :yes))
(defrule mysql-dsn-option-usessl-false "false" (:constant :no))
@ -123,7 +121,7 @@
(defrule mysql-uri (and mysql-prefix
(? dsn-user-password)
(? dsn-hostname)
mysql-dsn-dbname
dsn-dbname
(? mysql-dsn-options))
(:lambda (uri)
(destructuring-bind (&key type
@ -167,7 +165,7 @@
&key
gucs mysql-gucs
casts views before after options
alter-table alter-schema
alter-table alter-schema distribute
((:including incl))
((:excluding excl))
((:decoding decoding-as))
@ -194,6 +192,7 @@
:materialize-views ',views
:alter-table ',alter-table
:alter-schema ',alter-schema
:distribute ',distribute
:set-table-oids t
:on-error-stop on-error-stop
,@(remove-batch-control-option options))
@ -206,7 +205,7 @@
pg-db-uri
&key
gucs mysql-gucs casts views before after options
alter-table alter-schema
alter-table alter-schema distribute
including excluding decoding)
source
(cond (*dry-run*
@ -222,6 +221,7 @@
:options options
:alter-table alter-table
:alter-schema alter-schema
:distribute distribute
:including including
:excluding excluding
:decoding decoding))))))

View File

@ -17,6 +17,7 @@
load-copy-file
load-dbf-file
load-ixf-file
load-pgsql-database
load-mysql-database
load-mssql-database
load-sqlite-database
@ -160,12 +161,12 @@
(declare (ignore abs paths no-path-p))
(let ((dotted-parts (reverse (sq:split-sequence #\. filename))))
(when (<= 2 (length dotted-parts))
(destructuring-bind (extension name-or-ext &rest parts)
(destructuring-bind (ext name-or-ext &rest parts)
dotted-parts
(declare (ignore parts))
(if (string-equal "tar" name-or-ext) :archive
(loop :for (type . extensions) :in *data-source-filename-extensions*
:when (member extension extensions :test #'string-equal)
:when (member ext extensions :test #'string-equal)
:return type)))))))
(defvar *parse-rule-for-source-types*
@ -266,6 +267,7 @@
(:dbf 'dbf-option)
(:ixf 'ixf-option)
(:sqlite 'sqlite-option)
(:pgsql 'pgsql-option)
(:mysql 'mysql-option)
(:mssql 'mysql-option))
option))))

View File

@ -0,0 +1,171 @@
;;;
;;; Parse the pgloader commands grammar
;;;
(in-package :pgloader.parser)
;;;
;;; PostgreSQL options
;;;
(defrule pgsql-option (or option-on-error-stop
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size
option-prefetch-rows
option-max-parallel-create-index
option-reindex
option-truncate
option-disable-triggers
option-data-only
option-schema-only
option-include-drop
option-drop-schema
option-create-tables
option-create-indexes
option-index-names
option-reset-sequences
option-foreign-keys
option-identifiers-case))
(defrule pgsql-options (and kw-with
(and pgsql-option (* (and comma pgsql-option))))
(:function flatten-option-list))
;;;
;;; Including only some tables or excluding some others
;;;
(defrule including-matching-in-schema-filter
(and kw-including kw-only kw-table kw-names kw-matching filter-list-matching
kw-in kw-schema quoted-namestring)
(:lambda (source)
(bind (((_ _ _ _ _ filter-list _ _ schema) source))
(cons schema filter-list))))
(defrule including-matching-in-schema
(and including-matching-in-schema-filter
(* including-matching-in-schema-filter))
(:lambda (source)
(destructuring-bind (inc1 incs) source
(cons :including (list* inc1 incs)))))
(defrule excluding-matching-in-schema-filter
(and kw-excluding kw-table kw-names kw-matching filter-list-matching
kw-in kw-schema quoted-namestring)
(:lambda (source)
(bind (((_ _ _ _ filter-list _ _ schema) source))
(cons schema filter-list))))
(defrule excluding-matching-in-schema
(and excluding-matching-in-schema-filter
(* excluding-matching-in-schema-filter))
(:lambda (source)
(destructuring-bind (excl1 excls) source
(cons :excluding (list* excl1 excls)))))
;;;
;;; Allow clauses to appear in any order
;;;
(defrule load-pgsql-optional-clauses (* (or pgsql-options
gucs
casts
alter-table
alter-schema
materialize-views
including-matching-in-schema
excluding-matching-in-schema
decoding-tables-as
before-load
after-schema
after-load
distribute-commands))
(:lambda (clauses-list)
(alexandria:alist-plist clauses-list)))
(defrule pgsql-source (and kw-load kw-database kw-from pgsql-uri)
(:lambda (source) (bind (((_ _ _ uri) source)) uri)))
(defrule load-pgsql-command (and pgsql-source target
load-pgsql-optional-clauses)
(:lambda (command)
(destructuring-bind (source target clauses) command
`(,source ,target ,@clauses))))
;;; LOAD DATABASE FROM pgsql://
(defun lisp-code-for-pgsql-dry-run (pg-src-db-conn pg-dst-db-conn)
`(lambda ()
(log-message :log "DRY RUN, only checking connections.")
(check-connection ,pg-src-db-conn)
(check-connection ,pg-dst-db-conn)))
(defun lisp-code-for-loading-from-pgsql (pg-src-db-conn pg-dst-db-conn
&key
gucs
casts options
before after after-schema
alter-table alter-schema
((:including incl))
((:excluding excl))
views
distribute
&allow-other-keys)
`(lambda ()
(let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
(*cast-rules* ',casts)
(*identifier-case* :quote)
(on-error-stop (getf ',options :on-error-stop t))
,@(pgsql-connection-bindings pg-dst-db-conn gucs)
,@(batch-control-bindings options)
(source
(make-instance 'copy-pgsql
:target-db ,pg-dst-db-conn
:source-db ,pg-src-db-conn)))
,(sql-code-block pg-dst-db-conn :pre before "before load")
(copy-database source
:including ',incl
:excluding ',excl
:materialize-views ',views
:alter-table ',alter-table
:alter-schema ',alter-schema
:index-names :preserve
:set-table-oids t
:on-error-stop on-error-stop
:after-schema ',after-schema
:distribute ',distribute
,@(remove-batch-control-option options))
,(sql-code-block pg-dst-db-conn :post after "after load"))))
(defrule load-pgsql-database load-pgsql-command
(:lambda (source)
(destructuring-bind (pg-src-db-uri
pg-dst-db-uri
&key
gucs casts before after after-schema options
alter-table alter-schema views distribute
including excluding decoding)
source
(cond (*dry-run*
(lisp-code-for-pgsql-dry-run pg-src-db-uri pg-dst-db-uri))
(t
(lisp-code-for-loading-from-pgsql pg-src-db-uri pg-dst-db-uri
:gucs gucs
:casts casts
:views views
:before before
:after after
:after-schema after-schema
:options options
:alter-table alter-table
:alter-schema alter-schema
:distribute distribute
:including including
:excluding excluding
:decoding decoding))))))

View File

@ -58,17 +58,26 @@
(bind (((_ _ sql-list-of-list) after))
(cons :after (apply #'append sql-list-of-list)))))
(defrule after-schema (and kw-after kw-create kw-schema
(+ (or load-do load-execute)))
(:lambda (after)
(bind (((_ _ _ sql-list-of-list) after))
(cons :after-schema (apply #'append sql-list-of-list)))))
(defun sql-code-block (pgconn section commands label)
"Return lisp code to run COMMANDS against DBNAME, updating STATE."
(when commands
`(with-stats-collection (,label
:dbname ,(db-name pgconn)
:section ,section
:use-result-as-read t
:use-result-as-rows t)
(log-message :notice "Executing SQL block for ~a" ,label)
(with-pgsql-transaction (:pgconn ,pgconn)
(loop for command in ',commands
do
(pgsql-execute command :client-min-messages :error)
counting command)))))
`(execute-sql-code-block ,pgconn ,section ',commands ,label)))
(defun execute-sql-code-block (pgconn section commands label)
"Exceute given SQL commands."
(with-stats-collection (label
:dbname (db-name pgconn)
:section section
:use-result-as-read t
:use-result-as-rows t)
(log-message :notice "Executing SQL block for ~a" label)
(with-pgsql-transaction (:pgconn pgconn)
(loop :for command :in commands
:do (pgsql-execute command :client-min-messages :error)
:counting command))))

View File

@ -30,7 +30,7 @@
(defrule ignore-whitespace (* whitespace)
(:constant nil))
(defrule punct (or #\, #\- #\_ #\$ #\%)
(defrule punct (or #\- #\_ #\$ #\%)
(:text t))
(defrule namestring (and (or #\_ (alpha-char-p character))
@ -57,3 +57,11 @@
quoted-namestring
namestring))
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
(:constant :open-paren))
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
(:constant :close-paren))
(defrule comma-separator (and ignore-whitespace #\, ignore-whitespace)
(:constant ","))

View File

@ -38,11 +38,12 @@
:for ragged-end := (when end
(cond ((member name '(:msecs :usecs))
;; take any number of digits up to
;; the specified field lenght
;; the specified field length
;; (less digits are allowed)
(min end (length date-string)))
(when (<= start (length date-string))
(min end (length date-string))))
(t end)))
:when (and start end)
:when (and start ragged-end)
:append (list name (subseq date-string start ragged-end)))
(if (or (string= year "0000")
(string= month "00")

View File

@ -14,16 +14,22 @@
(defrule pgpass-escaped-char (and #\\ (or #\\ #\:))
(:lambda (c) (second c)))
(defrule pgpass-ipv6-hostname (and #\[
(+ (or (digit-char-p character) ":"))
#\])
(:lambda (ipv6) (text (second ipv6))))
(defrule pgpass-entry (or "*"
(+ (or pgpass-escaped-char
(+ (or pgpass-ipv6-hostname
pgpass-escaped-char
(pgpass-char-p character))))
(:lambda (e) (text e)))
(defrule pgpass-line (and pgpass-entry #\: pgpass-entry #\:
(defrule pgpass-line (and (? pgpass-entry) #\: pgpass-entry #\:
pgpass-entry #\: pgpass-entry #\:
(? pgpass-entry))
(:lambda (pl)
(make-pgpass :hostname (first pl)
(make-pgpass :hostname (or (first pl) "localhost")
:port (third pl)
:database (fifth pl)
:username (seventh pl)

View File

@ -15,14 +15,16 @@
(? " "))
(:lambda (noise) (second noise)))
(defrule sqlite-single-typemod (and #\( (+ (digit-char-p character)) #\))
(defrule sqlite-single-typemod (and open-paren
(+ (digit-char-p character))
close-paren)
(:lambda (st) (cons (parse-integer (text (second st))) nil)))
(defrule sqlite-double-typemod (and #\(
(defrule sqlite-double-typemod (and open-paren
(+ (digit-char-p character))
(* (or #\, #\Space))
comma-separator
(+ (digit-char-p character))
#\))
close-paren)
(:lambda (dt) (cons (parse-integer (text (second dt)))
(parse-integer (text (fourth dt))))))
@ -31,9 +33,9 @@
(defrule sqlite-type-name (and (* extra-qualifiers)
(+ (alpha-char-p character))
(* extra-qualifiers)
(* #\Space)
ignore-whitespace
(? sqlite-typemod)
(* #\Space)
ignore-whitespace
(* extra-qualifiers))
(:lambda (tn) (list (text (second tn))
(fifth tn)

View File

@ -118,7 +118,19 @@
(uiop:native-namestring crt-file)))
(pomo::*ssl-key-file* (when (and (ssl-enable-p pgconn)
(probe-file key-file))
(uiop:native-namestring key-file))))
(uiop:native-namestring key-file)))
;;
;; It's ok to set :verify-mode to NONE here because
;; cl+ssl:*make-ssl-client-stream-verify-default* defaults to
;; :require and takes precedence.
;;
;; Only when --no-ssl-cert-verification is passed as a command line
;; option do we set cl+ssl:*make-ssl-client-stream-verify-default*
;; to NIL, then allowing the NONE behaviour set here.
;;
(ssl-context
(CL+SSL:MAKE-CONTEXT :disabled-protocols nil
:verify-mode CL+SSL:+SSL-VERIFY-NONE+)))
(flet ((connect (pgconn username)
(handler-case
;; in some cases (client_min_messages set to debug5
@ -128,20 +140,29 @@
#'(lambda (w)
(log-message :warning "~a" w)
(muffle-warning))))
(pomo:connect (db-name pgconn)
(or username (db-user pgconn))
(db-pass pgconn)
(let ((host (db-host pgconn)))
(if (and (consp host) (eq :unix (car host)))
:unix
host))
:port (db-port pgconn)
:use-ssl (or (pgconn-use-ssl pgconn) :no)))
(CL+SSL:WITH-GLOBAL-CONTEXT (ssl-context :auto-free-p t)
(pomo:connect (db-name pgconn)
(or username (db-user pgconn))
(db-pass pgconn)
(let ((host (db-host pgconn)))
(if (and (consp host) (eq :unix (car host)))
:unix
host))
:port (db-port pgconn)
:use-ssl (or (pgconn-use-ssl pgconn) :no))))
((or too-many-connections configuration-limit-exceeded) (e)
(log-message :error
"Failed to connect to ~a: ~a; will try again in ~fs"
pgconn e *retry-connect-delay*)
(sleep *retry-connect-delay*)))))
(sleep *retry-connect-delay*))
(CL+SSL:SSL-ERROR-VERIFY (e)
(log-message :error
"Connecting to PostgreSQL ~a: ~a"
(db-host pgconn) e)
(log-message :log "You may try --no-ssl-cert-verification")
(error e)))))
(loop :while (null (conn-handle pgconn))
:repeat *retry-connect-times*
:do (setf (conn-handle pgconn) (connect pgconn username))))
@ -389,10 +410,11 @@
;;;
;;; PostgreSQL 8.0.2 on i686-pc-linux-gnu, compiled by GCC gcc (GCC) 3.4.2 20041017 (Red Hat 3.4.2-6.fc3), Redshift 1.0.2058
;;; PostgreSQL 10.1 on x86_64-apple-darwin14.5.0, compiled by Apple LLVM version 7.0.0 (clang-700.1.76), 64-bit
;;; PostgreSQL 10.6 (Ubuntu 10.6-1.pgdg14.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4, 64-bit
(defun parse-postgresql-version-string (version-string)
"Parse PostgreSQL select version() output."
(cl-ppcre:register-groups-bind (full-version maybe-variant)
("PostgreSQL ([0-9.]+) on .*, [^,]+, (.*)" version-string)
("PostgreSQL ([0-9.]+) [^,]+, [^,]+, (.*)" version-string)
(let* ((version-dots (split-sequence:split-sequence #\. full-version))
(major-version (if (= 3 (length version-dots))
(format nil "~a.~a"

View File

@ -13,17 +13,7 @@
include-drop
(client-min-messages :notice))
"Create the needed data types for given CATALOG."
(let ((sqltype-list))
;; build the sqltype list
(loop :for table :in (append (table-list catalog)
(view-list catalog))
:do (loop :for column :in (table-column-list table)
:do (when (typep (column-type-name column) 'sqltype)
(pushnew (column-type-name column) sqltype-list
:test #'string-equal
:key #'sqltype-name))))
;; now create the types
(let ((sqltype-list (sqltype-list catalog)))
(loop :for sqltype :in sqltype-list
:when include-drop
:count t
@ -114,6 +104,19 @@
:log-level log-level
:client-min-messages client-min-messages)))))
(defun create-extensions (catalog
&key
if-not-exists
include-drop
(client-min-messages :notice))
"Create all extensions from the given database CATALOG."
(let ((sql
(loop :for extension :in (extension-list catalog)
:when include-drop
:collect (format-drop-sql extension :if-exists t :cascade t)
:collect (format-create-sql extension :if-not-exists if-not-exists))))
(pgsql-execute sql :client-min-messages client-min-messages)))
(defun create-tables (catalog
&key
if-not-exists
@ -150,7 +153,7 @@
:collect (format-create-sql (trigger-procedure trigger))
:collect (format-create-sql trigger)))))
(pgsql-execute-with-timing section label sql-list
:log-level :log
:log-level :sql
:client-min-messages client-min-messages)))
@ -462,3 +465,14 @@ $$; " tables)))
(column-name column)
quote (column-comment column) quote)))))
(pgsql-execute-with-timing section label sql-list)))
;;;
;;; Citus Disitribution support
;;;
(defun create-distributed-table (distribute-rules)
(let ((citus-sql
(loop :for rule :in distribute-rules
:collect (format-create-sql rule))))
(pgsql-execute citus-sql)))

View File

@ -0,0 +1,20 @@
;;;
;;; PostgreSQL Citus support for calling functions.
;;;
(in-package :pgloader.pgsql)
(defmethod format-create-sql ((rule citus-reference-rule)
&key (stream nil) if-not-exists)
(declare (ignore if-not-exists))
(format stream "SELECT create_reference_table('~a');"
(format-table-name (citus-reference-rule-table rule))))
(defmethod format-create-sql ((rule citus-distributed-rule)
&key (stream nil) if-not-exists)
(declare (ignore if-not-exists))
(let* ((rule-table (citus-distributed-rule-table rule))
(rule-col-name (column-name (citus-distributed-rule-using rule))))
(format stream "SELECT create_distributed_table('~a', '~a');"
(format-table-name rule-table)
(apply-identifier-case rule-col-name))))

View File

@ -38,6 +38,25 @@
(sqltype-name sqltype)
cascade))
;;;
;;; Extensions
;;;
(defmethod format-create-sql ((extension extension)
&key (stream nil) if-not-exists)
(format stream "CREATE EXTENSION~:[~; IF NOT EXISTS~] ~a WITH SCHEMA ~a;"
if-not-exists
(extension-name extension)
(schema-name (extension-schema extension))))
(defmethod format-drop-sql ((extension extension)
&key (stream nil) cascade if-exists)
(format stream "DROP EXTENSION~:[~; IF EXISTS~] ~a~@[ CASCADE~];"
if-exists
(extension-name extension)
cascade))
;;;
;;; Tables
@ -73,6 +92,9 @@
(alexandria:alist-plist
(table-storage-parameter-list table))))
(when (table-tablespace table)
(format s "~%TABLESPACE ~a" (table-tablespace table)))
(format s ";~%"))))
(defmethod format-drop-sql ((table table) &key (stream nil) cascade (if-exists t))
@ -126,26 +148,30 @@
"Common normalized default values and their PostgreSQL spelling.")
(defmethod format-default-value ((column column) &key (stream nil))
(let* ((default (column-default column))
(clean-default (cdr (assoc default *pgsql-default-values*)))
(transform (column-transform column)))
(or clean-default
(if transform
(let* ((transformed-default
(handler-case
(funcall transform default)
(condition (c)
(log-message :warning
"Failed to transform default value ~s: ~a"
default c)
;; can't transform: return nil
nil)))
(transformed-column
(make-column :default transformed-default)))
(format-default-value transformed-column))
(if default
(ensure-quoted default #\')
(format stream "NULL"))))))
(if (column-transform-default column)
(let* ((default (column-default column))
(clean-default (cdr (assoc default *pgsql-default-values*)))
(transform (column-transform column)))
(or clean-default
(if transform
(let* ((transformed-default
(handler-case
(funcall transform default)
(condition (c)
(log-message :warning
"Failed to transform default value ~s: ~a"
default c)
;; can't transform: return nil
nil)))
(transformed-column
(make-column :default transformed-default)))
(format-default-value transformed-column))
(if default
(ensure-quoted default #\')
(format stream "NULL")))))
;; else, when column-transform-default is nil:
(column-default column)))
;;;
@ -181,8 +207,9 @@
;; don't use the index schema name here, PostgreSQL doesn't
;; like it, might be implicit from the table's schema
;; itself...
"ALTER TABLE ~a ADD ~a USING INDEX ~a;"
"ALTER TABLE ~a ADD~@[ CONSTRAINT ~a~] ~a USING INDEX ~a;"
(format-table-name table)
(index-conname index)
(cond ((index-primary index) "PRIMARY KEY")
((index-unique index) "UNIQUE"))
index-name)))

View File

@ -15,6 +15,8 @@
(in-package #:pgloader.pgsql)
(defun finalize-catalogs (catalog variant)
"Finalize the target PostgreSQL catalogs, dumbing down datatypes when the
target actually is Redshift rather than core PostgreSQL."
;;
;; For Core PostgreSQL, we also want to find data types names that have
;; no Btree support and fetch alternatives. This allows for supporting
@ -30,7 +32,9 @@
;;
(adjust-data-types catalog variant))
(defgeneric adjust-data-types (catalog variant))
(defgeneric adjust-data-types (catalog variant)
(:documentation
"Adjust PostgreSQL data types depending on the variant we target."))
;;;
;;; Nothing needs to be done for PostgreSQL variant :pgdg, of course.

View File

@ -5,7 +5,13 @@
(in-package :pgloader.pgsql)
(defun fetch-pgsql-catalog (dbname
&key table source-catalog including excluding)
&key
table
source-catalog
including
excluding
(variant :pgdg)
pgversion)
"Fetch PostgreSQL catalogs for the target database. A PostgreSQL
connection must be opened."
(let* ((*identifier-case* :quote)
@ -18,6 +24,10 @@
(t
including))))
(when (eq :pgdg variant)
(list-all-sqltypes catalog
:including including
:excluding excluding))
(list-all-columns catalog
:table-type :table
@ -25,17 +35,19 @@
:excluding excluding)
(list-all-indexes catalog
:including including
:excluding excluding
:pgversion pgversion)
(when (eq :pgdg variant)
(list-all-fkeys catalog
:including including
:excluding excluding)
(list-all-fkeys catalog
:including including
:excluding excluding)
;; fetch fkey we depend on with UNIQUE indexes but that have been
;; excluded from the target list, we still need to take care of them to
;; be able to DROP then CREATE those indexes again
(list-missing-fk-deps catalog)
;; fetch fkey we depend on with UNIQUE indexes but that have been
;; excluded from the target list, we still need to take care of them to
;; be able to DROP then CREATE those indexes again
(list-missing-fk-deps catalog))
(log-message :debug "fetch-pgsql-catalog: ~d tables, ~d indexes, ~d+~d fkeys"
(count-tables catalog)
@ -96,7 +108,7 @@
(defun format-table-name-as-including-exp (table)
"Return a table name suitable for a catalog lookup using ~ operator."
(let ((table-name (table-name table)))
(format nil "^~a$" (ensure-unquoted table-name))))
(make-string-match-rule :target (ensure-unquoted table-name))))
(defun query-table-schema (table)
"Get PostgreSQL schema name where to locate TABLE-NAME by following the
@ -107,6 +119,27 @@
(table-name table))
:single)))
(defun make-including-expr-from-view-names (view-names)
"Turn MATERIALIZING VIEWs list of view names into an INCLUDING parameter."
(let (including current-schema)
(loop :for (schema-name . view-name) :in view-names
:do (let* ((schema-name
(if schema-name
(ensure-unquoted schema-name)
(or
current-schema
(setf current-schema
(pomo:query "select current_schema()" :single)))))
(table-expr
(make-string-match-rule :target (ensure-unquoted view-name)))
(schema-entry
(or (assoc schema-name including :test #'string=)
(progn (push (cons schema-name nil) including)
(assoc schema-name including :test #'string=)))))
(push-to-end table-expr (cdr schema-entry))))
;; return the including alist
including))
(defvar *table-type*
'((:table . ("r" "f" "p")) ; ordinary, foreign and partitioned
@ -116,18 +149,34 @@
"Associate internal table type symbol with what's found in PostgreSQL
pg_class.relkind column.")
(defun filter-list-to-where-clause (filter-list
(defun filter-list-to-where-clause (schema-filter-list
&optional
not
(schema-col "table_schema")
(table-col "table_name"))
"Given an INCLUDING or EXCLUDING clause, turn it into a PostgreSQL WHERE
clause."
(loop :for (schema . table-name-list) :in filter-list
:append (mapcar (lambda (table-name)
(format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
schema-col schema table-col not table-name))
table-name-list)))
(loop :for (schema . filter-list) :in schema-filter-list
:append (mapcar (lambda (filter)
(typecase filter
(string-match-rule
(format nil "(~a = '~a' and ~a ~:[~;!~]= '~a')"
schema-col
schema
table-col
not
(string-match-rule-target filter)))
(regex-match-rule
(format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
schema-col
schema
table-col
not
(regex-match-rule-target filter)))))
filter-list)))
(defun normalize-extra (extra)
(cond ((string= "auto_increment" extra) :auto-increment)))
(defun list-all-columns (catalog
&key
@ -137,7 +186,8 @@
&aux
(table-type-name (cdr (assoc table-type *table-type*))))
"Get the list of PostgreSQL column names per table."
(loop :for (schema-name table-name table-oid name type typmod notnull default)
(loop :for (schema-name table-name table-oid
name type typmod notnull default extra)
:in
(query nil
(format nil
@ -156,23 +206,28 @@
:do
(let* ((schema (maybe-add-schema catalog schema-name))
(table (maybe-add-table schema table-name :oid table-oid))
(field (make-column :name name
(field (make-column :table table
:name name
:type-name type
:type-mod typmod
:nullable (not notnull)
:default default)))
:default default
:transform-default nil
:extra (normalize-extra extra))))
(add-field table field))
:finally (return catalog)))
(defun list-all-indexes (catalog &key including excluding)
(defun list-all-indexes (catalog &key including excluding pgversion)
"Get the list of PostgreSQL index definitions per table."
(loop
:for (schema-name name oid
table-schema table-name
primary unique sql conname condef)
primary unique cols sql conname condef)
:in (query nil
(format nil
(sql "/pgsql/list-all-indexes.sql")
(sql (sql-url-for-variant "pgsql"
"list-all-indexes.sql"
pgversion))
including ; do we print the clause?
(filter-list-to-where-clause including
nil
@ -186,17 +241,20 @@
:do (let* ((schema (find-schema catalog schema-name))
(tschema (find-schema catalog table-schema))
(table (find-table tschema table-name))
(columns (parse-index-column-names cols sql))
(pg-index
(make-index :name name
(make-index :name (ensure-quoted name)
:oid oid
:schema schema
:table table
:primary primary
:unique unique
:columns nil
:columns columns
:sql sql
:conname (unless (eq :null conname) conname)
:condef (unless (eq :null condef) condef))))
:conname (unless (eq :null conname)
(ensure-quoted conname))
:condef (unless (eq :null condef)
condef))))
(maybe-add-index table name pg-index :key #'index-name))
:finally (return catalog)))
@ -204,7 +262,7 @@
"Get the list of PostgreSQL index definitions per table."
(loop
:for (schema-name table-name fschema-name ftable-name
conoid conname condef
conoid pkeyoid conname condef
cols fcols
updrule delrule mrule deferrable deferred)
:in (query nil
@ -246,9 +304,13 @@
(table (find-table schema table-name))
(fschema (find-schema catalog fschema-name))
(ftable (find-table fschema ftable-name))
(pkey (find pkeyoid (table-index-list ftable)
:test #'=
:key #'index-oid))
(fk
(make-fkey :name conname
(make-fkey :name (ensure-quoted conname)
:oid conoid
:pkey pkey
:condef condef
:table table
:columns (split-sequence:split-sequence #\, cols)
@ -259,6 +321,13 @@
:match-rule (pg-fk-match-rule-to-match-clause mrule)
:deferrable deferrable
:initially-deferred deferred)))
;; add the fkey reference to the pkey index too
(unless (find conoid
(index-fk-deps pkey)
:test #'=
:key #'fkey-oid)
(push-to-end fk (index-fk-deps pkey)))
;; check that both tables are in pgloader's scope
(if (and table ftable)
(add-fkey table fk)
(log-message :notice "Foreign Key ~a is ignored, one of its table is missing from pgloader table selection"
@ -355,3 +424,71 @@
(sql "/pgsql/list-table-oids-from-temp-table.sql"))))
:do (setf (gethash name oidmap) oid)))
oidmap))
;;;
;;; PostgreSQL specific support for extensions and user defined data types.
;;;
(defun list-all-sqltypes (catalog &key including excluding)
"Set the catalog's schema extension list and sqltype list"
(loop :for (schema-name extension-name type-name enum-values)
:in (query nil
(format nil
(sql "/pgsql/list-all-sqltypes.sql")
including ; do we print the clause?
(filter-list-to-where-clause including
nil
"n.nspname"
"c.relname")
excluding ; do we print the clause?
(filter-list-to-where-clause excluding
nil
"n.nspname"
"c.relname")))
:do
(let* ((schema (maybe-add-schema catalog schema-name))
(sqltype
(make-sqltype :name (ensure-quoted type-name)
:schema schema
:type (when enum-values :enum)
:extra (when (and enum-values
(not (eq enum-values :null)))
(coerce enum-values 'list)))))
(if (and extension-name (not (eq :null extension-name)))
;; then create extension will create the type
(maybe-add-extension schema extension-name)
;; only create a specific entry for types that we need to create
;; ourselves, when extension is not null "create extension" is
;; going to take care of creating the type.
(add-sqltype schema sqltype)))
:finally (return catalog)))
;;;
;;; Extra utils like parsing a list of column names from an index definition.
;;;
(defun parse-index-column-names (columns index-definition)
"Return a list of column names for the given index."
(if (and columns (not (eq :null columns)))
;; the normal case, no much parsing to do, the data has been prepared
;; for us in the SQL query
(split-sequence:split-sequence #\, columns)
;; the redshift variant case, where there's no way to string_agg or
;; even array_to_string(array_agg(...)) and so we need to parse the
;; index-definition instead.
;;
;; CREATE UNIQUE INDEX pg_amproc_opc_proc_index ON pg_amproc USING btree (amopclaid, amprocsubtype, amprocnum)
(when index-definition
(let ((open-paren-pos (position #\( index-definition))
(close-paren-pos (position #\) index-definition)))
(when (and open-paren-pos close-paren-pos)
(mapcar (lambda (colname) (string-trim " " colname))
(split-sequence:split-sequence #\,
index-definition
:start (+ 1 open-paren-pos)
:end close-paren-pos)))))))

View File

@ -0,0 +1,4 @@
Redshift is a fork of PostgreSQL 8.0, and our catalog queries must then
target this old PostgreSQL version to work on Redshift. Parts of what we
would usually implement in SQL is implemented in pgloader code instead, in
order to support such an old PostgreSQL version.

View File

@ -0,0 +1,29 @@
-- params: including
-- filter-list-to-where-clause for including
-- excluding
-- filter-list-to-where-clause for excluding
select n.nspname,
i.relname,
i.oid,
rn.nspname,
r.relname,
indisprimary,
indisunique,
null,
pg_get_indexdef(indexrelid),
c.conname,
pg_get_constraintdef(c.oid)
from pg_index x
join pg_class i ON i.oid = x.indexrelid
join pg_class r ON r.oid = x.indrelid
join pg_namespace n ON n.oid = i.relnamespace
join pg_namespace rn ON rn.oid = r.relnamespace
left join pg_depend d on d.classid = 'pg_class'::regclass
and d.objid = i.oid
and d.refclassid = 'pg_constraint'::regclass
and d.deptype = 'i'
left join pg_constraint c ON c.oid = d.refobjid
where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
~:[~*~;and (~{~a~^~&~10t or ~})~]
~:[~*~;and (~{~a~^~&~10t and ~})~]
order by n.nspname, r.relname;

View File

@ -3,17 +3,37 @@
-- filter-list-to-where-clause for including
-- excluding
-- filter-list-to-where-clause for excluding
with seqattr as
(
select adrelid,
adnum,
adsrc,
case when adsrc ~~ 'nextval'
then substring(pg_get_expr(d.adbin, d.adrelid)
from '''([^'']+)'''
)
else null
end as seqname
from pg_attrdef d
)
select nspname, relname, c.oid, attname,
t.oid::regtype as type,
case when atttypmod > 0 then atttypmod - 4 else null end as typmod,
case when atttypmod > 0
then substring(format_type(t.oid, atttypmod) from '\d+(?:,\d+)?')
else null
end as typmod,
attnotnull,
case when atthasdef then def.adsrc end as default
case when atthasdef then def.adsrc end as default,
case when s.seqname is not null then 'auto_increment' end as extra
from pg_class c
join pg_namespace n on n.oid = c.relnamespace
left join pg_attribute a on c.oid = a.attrelid
join pg_type t on t.oid = a.atttypid and attnum > 0
left join pg_attrdef def on a.attrelid = def.adrelid
and a.attnum = def.adnum
and a.atthasdef
left join seqattr s on def.adrelid = s.adrelid
and def.adnum = s.adnum
where nspname !~~ '^pg_' and n.nspname <> 'information_schema'
and relkind in (~{'~a'~^, ~})

View File

@ -0,0 +1,4 @@
select nspname, extname
from pg_extension e
join pg_namespace n on n.oid = e.extnamespace
where nspname !~ '^pg_';

View File

@ -7,7 +7,9 @@
-- excluding (ftable)
-- filter-list-to-where-clause for excluding
select n.nspname, c.relname, nf.nspname, cf.relname as frelname,
r.oid, conname,
r.oid,
d.refobjid as pkeyoid,
conname,
pg_catalog.pg_get_constraintdef(r.oid, true) as condef,
(select string_agg(attname, ',')
from pg_attribute
@ -26,6 +28,9 @@
JOIN pg_namespace n on c.relnamespace = n.oid
JOIN pg_class cf on r.confrelid = cf.oid
JOIN pg_namespace nf on cf.relnamespace = nf.oid
JOIN pg_depend d on d.classid = 'pg_constraint'::regclass
and d.objid = r.oid
and d.refobjsubid = 0
where r.contype = 'f'
AND c.relkind in ('r', 'f', 'p')
AND cf.relkind in ('r', 'f', 'p')

View File

@ -9,6 +9,11 @@
r.relname,
indisprimary,
indisunique,
(select string_agg(attname, ',')
from pg_attribute
where attrelid = r.oid
and array[attnum::integer] <@ indkey::integer[]
) as cols,
pg_get_indexdef(indexrelid),
c.conname,
pg_get_constraintdef(c.oid)
@ -17,10 +22,11 @@
join pg_class r ON r.oid = x.indrelid
join pg_namespace n ON n.oid = i.relnamespace
join pg_namespace rn ON rn.oid = r.relnamespace
left join pg_constraint c ON c.conindid = i.oid
and c.conrelid = r.oid
-- filter out self-fkeys
and c.confrelid <> r.oid
left join pg_depend d on d.classid = 'pg_class'::regclass
and d.objid = i.oid
and d.refclassid = 'pg_constraint'::regclass
and d.deptype = 'i'
left join pg_constraint c ON c.oid = d.refobjid
where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
~:[~*~;and (~{~a~^~&~10t or ~})~]
~:[~*~;and (~{~a~^~&~10t and ~})~]

View File

@ -0,0 +1,43 @@
--
-- get user defined SQL types
--
select nt.nspname,
extname,
typname,
case when enum.enumtypid is not null
then array_agg(enum.enumlabel order by enumsortorder)
end as enumvalues
from pg_class c
join pg_namespace n on n.oid = c.relnamespace
left join pg_attribute a on c.oid = a.attrelid and a.attnum > 0
join pg_type t on t.oid = a.atttypid
left join pg_namespace nt on nt.oid = t.typnamespace
left join pg_depend d on d.classid = 'pg_type'::regclass
and d.refclassid = 'pg_extension'::regclass
and d.objid = t.oid
left join pg_extension e on refobjid = e.oid
left join pg_enum enum on enum.enumtypid = t.oid
where nt.nspname !~~ '^pg_' and nt.nspname <> 'information_schema'
and n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
and c.relkind in ('r', 'f', 'p')
~:[~*~;and (~{~a~^~&~10t or ~})~]
~:[~*~;and (~{~a~^~&~10t and ~})~]
and
( t.typrelid = 0
or
(select c.relkind = 'c'
from pg_class c
where c.oid = t.typrelid)
)
and not exists
(
select 1
from pg_type el
where el.oid = t.typelem
and el.typarray = t.oid
)
group by nt.nspname, extname, typname, enumtypid
order by nt.nspname, extname, typname, enumtypid;

84
src/save.lisp Normal file
View File

@ -0,0 +1,84 @@
;;;
;;; Create a build/bin/pgloader executable from the source code, using
;;; Quicklisp to load pgloader and its dependencies.
;;;
(in-package #:cl-user)
;; ccl provides an implementation of getenv already.
#+sbcl
(defun getenv (name &optional default)
"Return the current value for the environment variable NAME, or default
when unset."
(or (sb-ext:posix-getenv name) default))
;; So that we can #+pgloader-image some code away, see main.lisp
(push :pgloader-image *features*)
;;;
;;; We need to support *print-circle* for the debug traces of the catalogs,
;;; and while at it let's enforce *print-pretty* too.
;;;
(setf *print-circle* t *print-pretty* t)
(require :asdf) ; should work in SBCL and CCL
(defvar *quicklisp.lisp* "http://beta.quicklisp.org/quicklisp.lisp")
(let* ((cwd (uiop:getcwd))
(build-dir (uiop:merge-pathnames* "build/" cwd))
(ql.lisp (uiop:merge-pathnames* "quicklisp.lisp" build-dir))
(qldir (uiop:merge-pathnames* "quicklisp/" build-dir))
(qlsetup (uiop:merge-pathnames* "setup.lisp" qldir)))
;;
;; We might have to install Quicklisp in build/quicklisp
;;
(unless (probe-file qlsetup)
(format t "File ~a is not found, installing Quicklisp from ~a~%"
qlsetup *quicklisp.lisp*)
(let ((command (format nil "curl -o ~a ~a" ql.lisp *quicklisp.lisp*)))
(format t "Running command: ~a~%" command)
(uiop:run-program command))
(load ql.lisp)
(let* ((quickstart (find-package "QUICKLISP-QUICKSTART"))
(ql-install (find-symbol "INSTALL" quickstart)))
(funcall ql-install :path qldir :proxy (getenv "http_proxy"))))
;;
;; Now that we have Quicklisp, load it and push our copy of pgloader in
;; ql:*local-project-directories* where Quicklisp will find it.
;;
(format t "Loading file ~a~%" qlsetup)
(load qlsetup)
(let* ((ql (find-package "QL"))
(lpd (find-symbol "*LOCAL-PROJECT-DIRECTORIES*" ql))
(quickload (find-symbol "QUICKLOAD" ql)))
(push cwd (symbol-value lpd))
;;
;; And finally load pgloader and its image-based hooks
;;
(format t "Loading system pgloader~%")
(funcall quickload :pgloader)
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))))
(defun pgloader-image-main ()
(let ((argv #+sbcl sb-ext:*posix-argv*
#+ccl ccl:*command-line-argument-list*))
(pgloader::main argv)))
(let* ((cwd (uiop:getcwd))
(build-dir (uiop:merge-pathnames* "build/bin/" cwd))
(image-filename (uiop:merge-pathnames* "pgloader" build-dir)))
#+ccl
(ccl:save-application image-filename
:toplevel-function #'cl-user::pgloader-image-main
:prepend-kernel t)
#+sbcl
(sb-ext:save-lisp-and-die image-filename
:toplevel #'cl-user::pgloader-image-main
:executable t
:save-runtime-options t
:compression t))

View File

@ -95,7 +95,7 @@
:initform nil)) ;
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
(defgeneric parse-header (md-copy header)
(defgeneric parse-header (md-copy)
(:documentation "Parse the file header and return a list of fields."))
(defgeneric process-rows (md-copy stream process-fn)

View File

@ -58,7 +58,8 @@
;; otherwide, we do the full dance
(and
(or (and t-s-p (string= type rule-source-type)))
(or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod))
(or (null tm-s-p) (when typemod
(typemod-expr-matches-p typemod-expr typemod)))
(or (null d-s-p) (string= default rule-source-default))
(or (null u-s-p) (eq unsigned rule-unsigned))
(or (null n-s-p) (eq not-null rule-source-not-null))

View File

@ -4,7 +4,7 @@
(in-package #:pgloader.sources)
(defmethod parse-header ((copy md-copy) header)
(defmethod parse-header ((copy md-copy))
"Unsupported by default, to be implemented in each md-copy subclass."
(error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
@ -59,12 +59,8 @@
;; about skipping the first line
(loop :repeat (skip-lines copy) :do (read-line input nil nil))
;; we might now have to read the fields from the header line
(when (header copy)
(setf (fields copy)
(parse-header copy (read-line input nil nil)))
(log-message :debug "Parsed header columns ~s" (fields copy)))
;; we might now have to skip the header line
(when (header copy) (read-line input nil nil))
;; read in the text file, split it into columns
(process-rows copy input process-row-fn))))

View File

@ -20,7 +20,6 @@
nil
col))
(lambda (col)
(declare (optimize speed))
(if (string= null-as col) nil col))))
(field-name-as-symbol (field-name-or-list)
@ -116,7 +115,7 @@
sexp))
(t sexp)))))
`(lambda (row)
(declare (optimize speed) (type list row))
(declare (type list row))
(destructuring-bind (&optional ,@args &rest extra) row
(declare (ignorable ,@args) (ignore extra))
(let ,values

View File

@ -57,19 +57,29 @@
;;;
;;; Read a file format in CSV format, and call given function on each line.
;;;
(defmethod parse-header ((csv copy-csv) header)
(defmethod parse-header ((csv copy-csv))
"Parse the header line given csv setup."
;; a field entry is a list of field name and options
(mapcar #'list
(car ; parsing a single line
(cl-csv:read-csv header
:separator (csv-separator csv)
:quote (csv-quote csv)
:escape (csv-escape csv)
:unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv)
:newline (csv-newline csv)))))
(with-connection (cnx (source csv)
:direction :input
:external-format (encoding csv)
:if-does-not-exist nil)
(let ((input (md-strm cnx)))
(loop :repeat (skip-lines csv) :do (read-line input nil nil))
(let* ((header-line (read-line input nil nil))
(field-name-list
(mapcar #'list ; we need each field to be a list
(car ; parsing a single line
(cl-csv:read-csv header-line
:separator (csv-separator csv)
:quote (csv-quote csv)
:escape (csv-escape csv)
:unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv)
:newline (csv-newline csv))))))
(log-message :notice "Parsed header columns ~s" (fields csv))
(setf (fields csv) field-name-list )))))
(defmethod process-rows ((csv copy-csv) stream process-fn)
"Process rows from STREAM according to COPY specifications and PROCESS-FN."

View File

@ -88,6 +88,9 @@
datetime-precision
character-set-name collation-name)
(defmethod field-name ((field mssql-column) &key)
(mssql-column-name field))
(defmethod mssql-column-ctype ((col mssql-column))
"Build the ctype definition from the full mssql-column information."
(let ((type (mssql-column-type col)))
@ -125,7 +128,7 @@
field
(declare (ignore schema)) ; FIXME
(let* ((ctype (mssql-column-ctype field))
(extra (when (mssql-column-identity field) "auto_increment"))
(extra (when (mssql-column-identity field) :auto-increment))
(pgcol
(apply-casting-rules table-name name type ctype default nullable extra)))
;; the MS SQL driver smartly maps data to the proper CL type, but the

View File

@ -144,8 +144,14 @@
:columns nil
:filter filter))
(index
(maybe-add-index table index-name pg-index :key #'index-name)))
(add-column index colname))
(when table
(maybe-add-index table index-name pg-index :key #'index-name))))
(unless table
(log-message :warning
"Failed to find table ~s in schema ~s for index ~s, skipping the index"
table-name schema-name index-name))
(when index
(add-column index colname)))
:finally (return catalog)))
(defun list-all-fkeys (catalog &key including excluding)
@ -195,6 +201,7 @@
Mostly we just use the name, and make try to avoid parsing dates."
(case (intern (string-upcase type) "KEYWORD")
(:time (format nil "convert(varchar, [~a], 114)" name))
(:datetime (format nil "convert(varchar, [~a], 126)" name))
(:smalldatetime (format nil "convert(varchar, [~a], 126)" name))
(:date (format nil "convert(varchar, [~a], 126)" name))
@ -206,3 +213,43 @@
(loop :for col :in columns
:collect (with-slots (name type) col
(get-column-sql-expression name type))))
;;;
;;; Materialize Views support
;;;
(defun create-ms-views (views-alist)
"VIEWS-ALIST associates view names with their SQL definition, which might
be empty for already existing views. Create only the views for which we
have an SQL definition."
(unless (eq :all views-alist)
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(loop :for (name . def) :in views
:for sql := (destructuring-bind (schema . v-name) name
(format nil
"CREATE VIEW ~@[~s~].~s AS ~a"
schema v-name def))
:do (progn
(log-message :info "MS SQL: ~a" sql)
(mssql-query sql)))))))
(defun drop-ms-views (views-alist)
"See `create-ms-views' for VIEWS-ALIST description. This time we DROP the
views to clean out after our work."
(unless (eq :all views-alist)
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(let ((sql
(with-output-to-string (sql)
(format sql "DROP VIEW ")
(loop :for view-definition :in views
:for i :from 0
:do (destructuring-bind (name . def) view-definition
(declare (ignore def))
(format sql
"~@[, ~]~@[~s.~]~s"
(not (zerop i)) (car name) (cdr name)))))))
(log-message :info "PostgreSQL Source: ~a" sql)
(mssql-query sql))))))

View File

@ -72,30 +72,63 @@
including
excluding)
"MS SQL introspection to prepare the migration."
(declare (ignore materialize-views only-tables))
(declare (ignore only-tables))
(with-stats-collection ("fetch meta data"
:use-result-as-rows t
:use-result-as-read t
:section :pre)
(with-connection (*mssql-db* (source-db mssql))
(list-all-columns catalog
:including including
:excluding excluding)
(with-connection (*mssql-db* (source-db mssql))
;; If asked to MATERIALIZE VIEWS, now is the time to create them in MS
;; SQL, when given definitions rather than existing view names.
(when (and materialize-views (not (eq :all materialize-views)))
(create-ms-views materialize-views))
(when create-indexes
(list-all-indexes catalog
:including including
:excluding excluding))
(list-all-columns catalog
:including including
:excluding excluding)
(when foreign-keys
(list-all-fkeys catalog
;; fetch view (and their columns) metadata, covering comments too
(let* ((view-names (unless (eq :all materialize-views)
(mapcar #'car materialize-views)))
(including
(loop :for (schema-name . view-name) :in view-names
:do (let* ((schema-name (or schema-name "dbo"))
(schema-entry
(or (assoc schema-name including :test #'string=)
(progn (push (cons schema-name nil) including)
(assoc schema-name including
:test #'string=)))))
(push-to-end view-name (cdr schema-entry))))))
(cond (view-names
(list-all-columns catalog
:including including
:table-type :view))
((eq :all materialize-views)
(list-all-columns catalog :table-type :view))))
(when create-indexes
(list-all-indexes catalog
:including including
:excluding excluding))
;; return how many objects we're going to deal with in total
;; for stats collection
(+ (count-tables catalog) (count-indexes catalog))))
(when foreign-keys
(list-all-fkeys catalog
:including including
:excluding excluding))
;; return how many objects we're going to deal with in total
;; for stats collection
(+ (count-tables catalog) (count-indexes catalog))))
;; be sure to return the catalog itself
catalog)
(defmethod cleanup ((mssql copy-mssql) (catalog catalog) &key materialize-views)
"When there is a PostgreSQL error at prepare-pgsql-database step, we might
need to clean-up any view created in the MS SQL connection for the
migration purpose."
(when materialize-views
(with-connection (*mssql-db* (source-db mssql))
(drop-ms-views materialize-views))))

View File

@ -119,6 +119,10 @@
:target (:type "timestamptz" :drop-default t :drop-not-null t)
:using pgloader.transforms::zero-dates-to-null)
(:source (:type "datetime" :on-update-current-timestamp t :not-null nil)
:target (:type "timestamptz" :drop-default t)
:using pgloader.transforms::zero-dates-to-null)
(:source (:type "timestamp" :default "0000-00-00 00:00:00" :not-null t)
:target (:type "timestamptz" :drop-default t :drop-not-null t)
:using pgloader.transforms::zero-dates-to-null)
@ -131,6 +135,10 @@
:target (:type "timestamptz" :drop-default t :drop-not-null t)
:using pgloader.transforms::zero-dates-to-null)
(:source (:type "timestamp" :on-update-current-timestamp t :not-null nil)
:target (:type "timestamptz" :drop-default t)
:using pgloader.transforms::zero-dates-to-null)
(:source (:type "date" :default "0000-00-00")
:target (:type "date" :drop-default t)
:using pgloader.transforms::zero-dates-to-null)
@ -178,6 +186,9 @@
(table-name name comment dtype ctype default nullable extra)))
table-name name dtype ctype default nullable extra comment)
(defmethod field-name ((field mysql-column) &key)
(mysql-column-name field))
(defun explode-mysql-enum (ctype)
"Convert MySQL ENUM expression into a list of labels."
(cl-ppcre:register-groups-bind (list)

View File

@ -235,13 +235,7 @@ Illegal ~a character starting at position ~a~@[: ~a~].~%"
(defun apply-decoding-as-filters (table-name filters)
"Return a generialized boolean which is non-nil only if TABLE-NAME matches
one of the FILTERS."
(flet ((apply-filter (filter)
;; we close over table-name here.
(typecase filter
(string (string-equal filter table-name))
(list (destructuring-bind (type val) filter
(ecase type
(:regex (cl-ppcre:scan val table-name))))))))
(flet ((apply-filter (filter) (matches filter table-name)))
(some #'apply-filter filters)))
(defmethod instanciate-table-copy-object ((copy copy-mysql) (table table))

View File

@ -0,0 +1,80 @@
;;;
;;; Tools to handle PostgreSQL data type casting rules
;;;
(in-package :pgloader.source.pgsql)
(defparameter *pgsql-default-cast-rules*
'((:source (:type "integer" :auto-increment t)
:target (:type "serial" :drop-default t))
(:source (:type "bigint" :auto-increment t)
:target (:type "bigserial" :drop-default t))
(:source (:type "character varying")
:target (:type "text" :drop-typemod t)))
"Data Type Casting to migrate from PostgtreSQL to PostgreSQL")
(defmethod pgsql-column-ctype ((column column))
"Build the ctype definition from the PostgreSQL column information."
(let ((type-name (column-type-name column))
(type-mod (unless (or (null (column-type-mod column))
(eq :null (column-type-mod column)))
(column-type-mod column))))
(format nil "~a~@[(~a)~]" type-name type-mod)))
(defmethod cast ((field column) &key &allow-other-keys)
"Return the PostgreSQL type definition from the given PostgreSQL column
definition"
(with-slots (pgloader.catalog::table
pgloader.catalog::name
pgloader.catalog::type-name
pgloader.catalog::type-mod
pgloader.catalog::nullable
pgloader.catalog::default
pgloader.catalog::comment
pgloader.catalog::transform
pgloader.catalog::extra)
field
(let* ((ctype (pgsql-column-ctype field))
(extra (or pgloader.catalog::extra
(when (and (stringp (column-default field))
(search "identity" (column-default field)))
:auto-increment)))
(pgcol (apply-casting-rules (table-source-name pgloader.catalog::table)
pgloader.catalog::name
pgloader.catalog::type-name
ctype
pgloader.catalog::default
pgloader.catalog::nullable
extra)))
;; re-install our instruction not to transform default value: it comes
;; from PostgreSQL, and we trust it.
(setf (column-transform-default pgcol)
(column-transform-default field))
;; Redshift may be using DEFAULT getdate() instead of now()
(let ((default (column-default pgcol)))
(setf (column-default pgcol)
(cond
((and (stringp default) (string= "NULL" default))
:null)
((and (stringp default) (string= "getdate()" default))
:current-timestamp)
;; get rid of the identity default value, we already added
;; an hint in the column-extra field.
;;
;; "identity"(347358, 0, ('1,1'::character varying)::text)
((and (stringp default) (search "identity" default))
:null)
(t (column-default pgcol))))
;; we usually trust defaults that come from PostgreSQL... but we
;; also have support for Redshift.
(when (member (column-default pgcol) '(:null :current-timestamp))
(setf (column-transform-default pgcol) t)))
pgcol)))

View File

@ -0,0 +1,50 @@
(in-package :pgloader.source.pgsql)
(defun create-pg-views (views-alist)
"VIEWS-ALIST associates view names with their SQL definition, which might
be empty for already existing views. Create only the views for which we
have an SQL definition."
(unless (eq :all views-alist)
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(loop :for (name . def) :in views
:for sql := (destructuring-bind (schema . v-name) name
(format nil
"CREATE VIEW ~@[~s.~]~s AS ~a"
schema v-name def))
:do (progn
(log-message :info "PostgreSQL Source: ~a" sql)
#+pgloader-image
(pgsql-execute sql)
#-pgloader-image
(restart-case
(pgsql-execute sql)
(use-existing-view ()
:report "Use the already existing view and continue"
nil)
(replace-view ()
:report
"Replace the view with the one from pgloader's command"
(let ((drop-sql (format nil "DROP VIEW ~a;" (car name))))
(log-message :info "PostgreSQL Source: ~a" drop-sql)
(pgsql-execute drop-sql)
(pgsql-execute sql))))))))))
(defun drop-pg-views (views-alist)
"See `create-pg-views' for VIEWS-ALIST description. This time we DROP the
views to clean out after our work."
(unless (eq :all views-alist)
(let ((views (remove-if #'null views-alist :key #'cdr)))
(when views
(let ((sql
(with-output-to-string (sql)
(format sql "DROP VIEW ")
(loop :for view-definition :in views
:for i :from 0
:do (destructuring-bind (name . def) view-definition
(declare (ignore def))
(format sql
"~@[, ~]~@[~s.~]~s"
(not (zerop i)) (car name) (cdr name)))))))
(log-message :info "PostgreSQL Source: ~a" sql)
(pgsql-execute sql))))))

View File

@ -0,0 +1,142 @@
;;;
;;; Read from a PostgreSQL database.
;;;
(in-package :pgloader.source.pgsql)
(defclass copy-pgsql (db-copy) ()
(:documentation "pgloader PostgreSQL Data Source"))
(defmethod initialize-instance :after ((source copy-pgsql) &key)
"Add a default value for transforms in case it's not been provided."
(let* ((transforms (when (slot-boundp source 'transforms)
(slot-value source 'transforms))))
(when (and (slot-boundp source 'fields) (slot-value source 'fields))
;; cast typically happens in copy-database in the schema structure,
;; and the result is then copied into the copy-mysql instance.
(unless (and (slot-boundp source 'columns) (slot-value source 'columns))
(setf (slot-value source 'columns)
(mapcar #'cast (slot-value source 'fields))))
(unless transforms
(setf (slot-value source 'transforms)
(mapcar #'column-transform (slot-value source 'columns)))))))
(defmethod map-rows ((pgsql copy-pgsql) &key process-row-fn)
"Extract PostgreSQL data and call PROCESS-ROW-FN function with a single
argument (a list of column values) for each row"
(let ((map-reader
;;
;; Build a Postmodern row reader that prepares a vector of strings
;; and call PROCESS-ROW-FN with the vector as single argument.
;;
(cl-postgres:row-reader (fields)
(let ((nb-cols (length fields)))
(loop :while (cl-postgres:next-row)
:do (let ((row (make-array nb-cols)))
(loop :for i :from 0
:for field :across fields
:do (setf (aref row i)
(cl-postgres:next-field field)))
(funcall process-row-fn row)))))))
(with-pgsql-connection ((source-db pgsql))
(if (citus-backfill-table-p (target pgsql))
;;
;; SELECT dist_key, * FROM source JOIN dist ON ...
;;
(let ((sql (citus-format-sql-select (source pgsql) (target pgsql))))
(log-message :sql "~a" sql)
(cl-postgres:exec-query pomo:*database* sql map-reader))
;;
;; No JOIN to add to backfill data in the SQL query here.
;;
(let* ((cols (mapcar #'column-name (fields pgsql)))
(sql
(format nil
"SELECT ~{~s::text~^, ~} FROM ~s.~s"
cols
(schema-source-name (table-schema (source pgsql)))
(table-source-name (source pgsql)))))
(log-message :sql "~a" sql)
(cl-postgres:exec-query pomo:*database* sql map-reader))))))
(defmethod copy-column-list ((pgsql copy-pgsql))
"We are sending the data in the MySQL columns ordering here."
(mapcar #'column-name (fields pgsql)))
(defmethod fetch-metadata ((pgsql copy-pgsql)
(catalog catalog)
&key
materialize-views
only-tables
create-indexes
foreign-keys
including
excluding)
"PostgreSQL introspection to prepare the migration."
(declare (ignore only-tables))
(with-stats-collection ("fetch meta data"
:use-result-as-rows t
:use-result-as-read t
:section :pre)
(with-pgsql-transaction (:pgconn (source-db pgsql))
(let ((variant (pgconn-variant (source-db pgsql)))
(pgversion (pgconn-major-version (source-db pgsql))))
;;
;; First, create the source views that we're going to materialize in
;; the target database.
;;
(when (and materialize-views (not (eq :all materialize-views)))
(create-pg-views materialize-views))
(when (eq :pgdg variant)
(list-all-sqltypes catalog
:including including
:excluding excluding))
(list-all-columns catalog
:including including
:excluding excluding)
(let* ((view-names (unless (eq :all materialize-views)
(mapcar #'car materialize-views)))
(including (make-including-expr-from-view-names view-names)))
(cond (view-names
(list-all-columns catalog
:including including
:table-type :view))
((eq :all materialize-views)
(list-all-columns catalog :table-type :view))))
(when create-indexes
(list-all-indexes catalog
:including including
:excluding excluding
:pgversion pgversion))
(when (and (eq :pgdg variant) foreign-keys)
(list-all-fkeys catalog
:including including
:excluding excluding))
;; return how many objects we're going to deal with in total
;; for stats collection
(+ (count-tables catalog)
(count-views catalog)
(count-indexes catalog)
(count-fkeys catalog)))))
;; be sure to return the catalog itself
catalog)
(defmethod cleanup ((pgsql copy-pgsql) (catalog catalog) &key materialize-views)
"When there is a PostgreSQL error at prepare-pgsql-database step, we might
need to clean-up any view created in the source PostgreSQL connection for
the migration purpose."
(when materialize-views
(with-pgsql-transaction (:pgconn (source-db pgsql))
(drop-pg-views materialize-views))))

View File

@ -0,0 +1 @@
select sql from sqlite_master where name = '~a'

View File

@ -67,6 +67,47 @@
(loop for (name) in (sqlite:execute-to-list db sql)
collect name)))
(defun find-sequence (db table-name column-name)
"Find if table-name.column-name is attached to a sequence in
sqlite_sequence catalog."
(let* ((sql (format nil (sql "/sqlite/find-sequence.sql") table-name))
(seq (sqlite:execute-single db sql)))
(when (and seq (not (zerop seq)))
;; magic marker for `apply-casting-rules'
(log-message :notice "SQLite column ~a.~a uses a sequence"
table-name column-name)
seq)))
(defun find-auto-increment-in-create-sql (db table-name column-name)
"The sqlite_sequence catalog is only created when some content has been
added to the table. So we might fail to FIND-SEQUENCE, and still need to
consider the column has an autoincrement. Parse the SQL definition of the
table to find out."
(let* ((sql (format nil (sql "/sqlite/get-create-table.sql") table-name))
(create-table (sqlite:execute-single db sql))
(open-paren (+ 1 (position #\( create-table)))
(close-paren (position #\) create-table :from-end t))
(coldefs
(mapcar (lambda (def) (string-trim (list #\Space) def))
(split-sequence:split-sequence #\,
create-table
:start open-paren
:end close-paren))))
(loop :for coldef :in coldefs
:do (let* ((words (mapcar (lambda (w) (string-trim '(#\" #\') w))
(split-sequence:split-sequence #\Space coldef)))
(colname (first words))
(props (rest words)))
(when (and (string= colname column-name)
(member "autoincrement" props :test #'string-equal))
;; we know the target column has no sequence because we
;; looked into that first by calling find-sequence, and we
;; only call find-auto-increment-in-create-sql when
;; find-sequence failed to find anything.
(log-message :notice "SQLite column ~a.~a is autoincrement, but has no sequence"
table-name column-name)
(return t))))))
(defun list-columns (table &key db-has-sequences (db *sqlite-db*) )
"Return the list of columns found in TABLE-NAME."
(let* ((table-name (table-source-name table))
@ -85,17 +126,14 @@
pk-id)))
(when (and db-has-sequences
(not (zerop pk-id))
(string-equal (coldef-ctype field) "integer"))
(string-equal (coldef-ctype field) "integer")
(or (find-sequence db table-name name)
(find-auto-increment-in-create-sql db
table-name
name)))
;; then it might be an auto_increment, which we know by
;; looking at the sqlite_sequence catalog
(let* ((sql
(format nil (sql "/sqlite/find-sequence.sql") table-name))
(seq (sqlite:execute-single db sql)))
(when (and seq (not (zerop seq)))
;; magic marker for `apply-casting-rules'
(log-message :notice "SQLite column ~a.~a uses a sequence"
table-name name)
(setf (coldef-extra field) :auto-increment))))
(setf (coldef-extra field) :auto-increment))
(add-field table field)))))
(defun list-all-columns (schema
@ -150,7 +188,7 @@
"Return the list of columns in INDEX-NAME."
(let ((sql (format nil (sql "/sqlite/list-index-cols.sql") index-name)))
(loop :for (index-pos table-pos col-name) :in (sqlite:execute-to-list db sql)
:collect col-name)))
:collect (apply-identifier-case col-name))))
(defun list-indexes (table &optional (db *sqlite-db*))
"Return the list of indexes attached to TABLE."

View File

@ -96,7 +96,7 @@
"Send the data in the SQLite column ordering."
(mapcar #'apply-identifier-case (mapcar #'coldef-name (fields sqlite))))
(defmethod fetch-metadata (sqlite catalog
(defmethod fetch-metadata ((sqlite copy-sqlite) (catalog catalog)
&key
materialize-views
only-tables

View File

@ -75,6 +75,10 @@
"Alter the storage parameters of TABLE."
(setf (table-storage-parameter-list table) parameters))
(defun alter-table-set-tablespace (table tablespace)
"Alter the tablespace slot of TABLE"
(setf (table-tablespace table) tablespace))
;;;
;;; Apply the match rules as given by the parser to a table name.

View File

@ -42,32 +42,44 @@
;;; Column structures details depend on the specific source type and are
;;; implemented in each source separately.
;;;
(defstruct catalog name schema-list types-without-btree)
(defstruct schema source-name name catalog table-list view-list in-search-path)
(defstruct table source-name name schema oid comment storage-parameter-list
(defstruct catalog name schema-list types-without-btree distribution-rules)
(defstruct schema source-name name catalog in-search-path
table-list view-list extension-list sqltype-list)
(defstruct table source-name name schema oid comment
storage-parameter-list tablespace
;; field is for SOURCE
;; column is for TARGET
field-list column-list index-list fkey-list trigger-list)
;; citus is an extra slot for citus support
field-list column-list index-list fkey-list trigger-list citus-rule)
;;;
;;; When migrating from PostgreSQL to PostgreSQL we might have to install
;;; extensions to have data type coverage.
;;;
(defstruct extension name schema)
;;;
;;; When migrating from another database to PostgreSQL some data types might
;;; need to be tranformed dynamically into User Defined Types: ENUMs, SET,
;;; etc.
;;;
(defstruct sqltype name schema type source-def extra)
(defstruct sqltype name schema type source-def extra extension)
;;;
;;; The generic PostgreSQL column that the CAST generic function is asked to
;;; produce, so that we know how to CREATE TABLEs in PostgreSQL whatever the
;;; source is.
;;;
(defstruct column name type-name type-mod nullable default comment transform extra)
(defstruct column table name type-name type-mod nullable default comment
transform extra (transform-default t))
;;;
;;; Index and Foreign Keys
;;;
(defstruct fkey
name oid table columns foreign-table foreign-columns condef
name oid table columns pkey foreign-table foreign-columns condef
update-rule delete-rule match-rule deferrable initially-deferred)
;;;
@ -94,13 +106,18 @@
;;;
;;; Main data collection API
;;;
(defgeneric add-schema (object schema-name &key))
(defgeneric add-table (object table-name &key))
(defgeneric add-view (object view-name &key))
(defgeneric add-column (object column &key))
(defgeneric add-index (object index &key))
(defgeneric add-fkey (object fkey &key))
(defgeneric add-comment (object comment &key))
(defgeneric add-schema (object schema-name &key))
(defgeneric add-extension (object extension-name &key))
(defgeneric add-table (object table-name &key))
(defgeneric add-view (object view-name &key))
(defgeneric add-sqltype (object column &key))
(defgeneric add-column (object column &key))
(defgeneric add-index (object index &key))
(defgeneric add-fkey (object fkey &key))
(defgeneric add-comment (object comment &key))
(defgeneric extension-list (object &key)
(:documentation "Return the list of extensions found in OBJECT."))
(defgeneric table-list (object &key)
(:documentation "Return the list of tables found in OBJECT."))
@ -112,6 +129,10 @@
(:documentation
"Find a schema by SCHEMA-NAME in a catalog OBJECT and return the schema"))
(defgeneric find-extension (object extension-name &key)
(:documentation
"Find an extension by EXTENSION-NAME in a schema OBJECT and return the table"))
(defgeneric find-table (object table-name &key)
(:documentation
"Find a table by TABLE-NAME in a schema OBJECT and return the table"))
@ -131,6 +152,9 @@
(defgeneric maybe-add-schema (object schema-name &key)
(:documentation "Add a new schema or return existing one."))
(defgeneric maybe-add-extension (object extension-name &key)
(:documentation "Add a new extension or return existing one."))
(defgeneric maybe-add-table (object table-name &key)
(:documentation "Add a new table or return existing one."))
@ -163,10 +187,44 @@
"Cast a FIELD definition from a source database into a PostgreSQL COLUMN
definition."))
(defgeneric field-name (object &key)
(:documentation "Get the source database column name, or field-name."))
;;;
;;; Implementation of the methods
;;;
(defmethod extension-list ((schema schema) &key)
"Return the list of extensions for SCHEMA."
(schema-extension-list schema))
(defmethod extension-list ((catalog catalog) &key)
"Return the list of extensions for CATALOG."
(apply #'append (mapcar #'extension-list (catalog-schema-list catalog))))
(defmethod sqltype-list ((column column) &key)
"Return the list of sqltypes for SCHEMA."
(when (typep (column-type-name column) 'sqltype)
(column-type-name column)))
(defmethod sqltype-list ((table table) &key)
"Return the list of sqltypes for SCHEMA."
(mapcar #'sqltype-list (table-column-list table)))
(defmethod sqltype-list ((schema schema) &key)
"Return the list of sqltypes for SCHEMA."
(append (schema-sqltype-list schema)
(apply #'append
(mapcar #'sqltype-list (schema-table-list schema)))))
(defmethod sqltype-list ((catalog catalog) &key)
"Return the list of sqltypes for CATALOG."
(remove-duplicates
(remove-if #'null
(apply #'append
(mapcar #'sqltype-list (catalog-schema-list catalog))))
:test #'string-equal :key #'sqltype-name))
(defmethod table-list ((schema schema) &key)
"Return the list of tables for SCHEMA."
(schema-table-list schema))
@ -212,6 +270,17 @@
:in-search-path in-search-path)))
(push-to-end schema (catalog-schema-list catalog))))
(defmethod add-extension ((schema schema) extension-name &key)
"Add EXTENSION-NAME to SCHEMA and return the new extension instance."
(let ((extension
(make-extension :name extension-name
:schema schema)))
(push-to-end extension (schema-extension-list schema))))
(defmethod add-sqltype ((schema schema) sqltype &key)
"Add SQLTYPE instance to SCHEMA and return SQLTYPE."
(push-to-end sqltype (schema-sqltype-list schema)))
(defmethod add-table ((schema schema) table-name &key comment oid)
"Add TABLE-NAME to SCHEMA and return the new table instance."
(let ((table
@ -238,6 +307,11 @@
(find schema-name (catalog-schema-list catalog)
:key #'schema-source-name :test 'string=))
(defmethod find-extension ((schema schema) extension-name &key)
"Find EXTENSION-NAME in SCHEMA and return the EXTENSION object of this name."
(find extension-name (schema-extension-list schema)
:key #'extension-name :test 'string=))
(defmethod find-table ((schema schema) table-name &key)
"Find TABLE-NAME in SCHEMA and return the TABLE object of this name."
(find table-name (schema-table-list schema)
@ -254,6 +328,12 @@
(let ((schema (find-schema catalog schema-name)))
(or schema (add-schema catalog schema-name))))
(defmethod maybe-add-extension ((schema schema) extension-name &key)
"Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
of the same name if it already exists in the schema table-list."
(let ((extension (find-extension schema extension-name)))
(or extension (add-extension schema extension-name))))
(defmethod maybe-add-table ((schema schema) table-name &key comment oid)
"Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
of the same name if it already exists in the schema table-list."
@ -297,6 +377,9 @@
(loop :for schema :in (catalog-schema-list catalog)
:do (cast schema)))
(defmethod field-name ((column column) &key)
(column-name column))
;;;
;;; There's no simple equivalent to array_agg() in MS SQL, so the index and
;;; fkey queries return a row per index|fkey column rather than per

382
src/utils/citus.lisp Normal file
View File

@ -0,0 +1,382 @@
;;;
;;; Citus support in pgloader allows to declare what needs to change in the
;;; source schema in terms of Citus concepts: reference and distributed
;;; table.
;;;
#|
distribute billers using id
distribute bills using biller_id
distribute receivable_accounts using biller_id
distribute payments using biller_id
distribute splits using biller_id
from receivable_accounts
distribute ach_accounts as reference table
|#
(in-package #:pgloader.citus)
;;;
;;; Main data structures to host our distribution rules.
;;;
(defstruct citus-reference-rule table)
(defstruct citus-distributed-rule table using from)
(defun citus-distribute-schema (catalog distribution-rules)
"Distribute a CATALOG with given user provided DISTRIBUTION-RULES. Return
the list of rules applied."
(let ((processed-rules '())
(derived-rules
(loop :for rule :in distribution-rules
:append (progn
(citus-set-table rule catalog)
(compute-foreign-rules rule (citus-rule-table rule))))))
;;
;; Apply rules only once.
;;
;; ERROR Database error 42P16: table ;; "campaigns" is already distributed
;;
;; In the PostgreSQL source case, we have the table OIDs already at this
;; point, but in the general case we don't. Use the names to match what
;; we did up to now.
;;
(loop :for rule :in (append distribution-rules derived-rules)
:unless (member (table-source-name (citus-rule-table rule))
processed-rules
:key (lambda (rule)
(table-source-name (citus-rule-table rule)))
:test #'equal)
:collect (progn
(push rule processed-rules)
(apply-citus-rule rule)
rule))))
(define-condition citus-rule-table-not-found (error)
((schema-name :initarg :schema-name
:accessor citus-rule-table-not-found-schema-name)
(table-name :initarg :table-name
:accessor citus-rule-table-not-found-table-name))
(:report
(lambda (err stream)
(let ((*print-circle* nil))
(with-slots (schema-name table-name)
err
(format stream
"Could not find table ~s in schema ~s for distribution rules."
table-name schema-name))))))
(defun citus-find-table (catalog table)
(let* ((table-name (cdr (table-source-name table)))
(schema-name (schema-name (table-schema table))))
(or (find-table (find-schema catalog schema-name) table-name)
(error (make-condition 'citus-rule-table-not-found
:table-name table-name
:schema-name schema-name)))))
(defgeneric citus-rule-table (rule)
(:documentation "Returns the RULE's table.")
(:method ((rule citus-reference-rule)) (citus-reference-rule-table rule))
(:method ((rule citus-distributed-rule)) (citus-distributed-rule-table rule)))
(defgeneric citus-set-table (rule catalog)
(:documentation "Find citus RULE table in CATALOG and update the
placeholder with the table found there.")
(:method ((rule citus-reference-rule) (catalog catalog))
(let ((table (citus-reference-rule-table rule)))
(setf (citus-reference-rule-table rule)
(citus-find-table catalog table))))
(:method ((rule citus-distributed-rule) (catalog catalog))
(let ((table (citus-distributed-rule-table rule)))
(map-into (citus-distributed-rule-from rule)
(lambda (from) (citus-find-table catalog from))
(citus-distributed-rule-from rule))
(setf (citus-distributed-rule-table rule)
(citus-find-table catalog table)))))
(defmethod print-object ((rule citus-reference-rule) stream)
(print-unreadable-object (rule stream :type t :identity t)
(with-slots (table) rule
(format stream
"distribute ~a as reference"
(format-table-name table)))))
(defmethod print-object ((rule citus-distributed-rule) stream)
(print-unreadable-object (rule stream :type t :identity t)
(with-slots (table using from) rule
(format stream
"distribute ~a :using ~a~@[ :from ~{~a~^, ~}~]"
(format-table-name table)
(column-name using)
(mapcar #'format-table-name from)))))
;;;
;;; When distributing a table on a given key, we can follow foreign keys
;;; pointing to this table. We might find out that when computing the
;;; following rule:
;;;
;;; distribute companies using id
;;;
;;; We then want to add the set of rules that we find walking the foreign
;;; keys:
;;;
;;; distribute campaigns using company_id
;;; distribute ads using company_id from campaigns
;;; distribute clicks using company_id from ads, campaigns
;;; distribute impressions using company_id from ads, campaigns
;;;
(defgeneric compute-foreign-rules (rule table &key)
(:documentation
"Compute rules to apply that derive from the distribution rule RULE when
following foreign-keys from TABLE."))
(defmethod compute-foreign-rules ((rule citus-reference-rule)
(table table)
&key)
"There's nothing to do here, reference table doesn't impact the schema."
nil)
(defmethod compute-foreign-rules ((rule citus-distributed-rule)
(table table)
&key fkey-list)
"Find every foreign key that points to TABLE and add return a list of new
rules for the source of those foreign keys."
(let ((pkey (find-if #'index-primary (table-index-list table))))
(when (and pkey (member (column-name (citus-distributed-rule-using rule))
(index-columns pkey)
:test #'string=))
(loop :for fkey :in (index-fk-deps pkey)
:for new-fkey-list := (cons fkey fkey-list)
:for new-rule := (make-distributed-table-from-fkey rule new-fkey-list)
:collect new-rule :into new-rule-list
:collect (compute-foreign-rules rule (fkey-table fkey)
:fkey-list new-fkey-list)
:into dep-rule-list
:finally (return (append new-rule-list
;; flatten sub-lists as we go
(apply #'append dep-rule-list)))))))
(defun make-distributed-table-from-fkey (rule fkey-list)
"Make a new Citus distributed table rule from an existing rule and a fkey
definition."
;;
;; We have a list of foreign keys pointing from a current table,
;; (fkey-table fkey), to the root table that is distributed,
;; (fkey-foreign-table fkey).
;;
;; For the distribution key name, we consider the name of the column used
;; in the last entry from the fkey-list, the column name that points to
;; the root.id distribution key and might be named root_id or something.
;;
;; Then we only need to specifying USING the intermediate tables, the last
;; entry gives us the data we need to backfill our tables.
;;
(let* ((fkey (car (last fkey-list)))
(dist-key (column-name (citus-distributed-rule-using rule)))
(dist-key-pos (position dist-key
(fkey-foreign-columns fkey)
:test #'string=))
(fkey-table-dist-key (nth dist-key-pos (fkey-columns fkey)))
(from-table-list (butlast (mapcar #'fkey-foreign-table fkey-list))))
(make-citus-distributed-rule :table (fkey-table (first fkey-list))
:using (make-column :name fkey-table-dist-key)
:from from-table-list)))
;;;
;;; Apply a citus distribution rule to given table, and store the rule
;;; itself to the table-citus-rule slot so that we later know to generate a
;;; proper SELECT query that includes the backfilling.
;;;
(define-condition citus-rule-is-missing-from-list (error)
((rule :initarg :rule :accessor citus-rule))
(:report
(lambda (err stream)
(let ((*print-circle* nil))
(format stream
"Failed to add column ~s to table ~a for lack of a FROM clause in the distribute rule:~% distribute ~a using ~a from ?"
(column-name (citus-distributed-rule-using (citus-rule err)))
(format-table-name (citus-distributed-rule-table (citus-rule err)))
(format-table-name (citus-distributed-rule-table (citus-rule err)))
(column-name (citus-distributed-rule-using (citus-rule err))))))))
(defgeneric apply-citus-rule (rule)
(:documentation "Apply a Citus distribution RULE to given TABLE."))
(defmethod apply-citus-rule ((rule citus-reference-rule))
;; for a reference table, we have nothing to do really.
(setf (table-citus-rule (citus-reference-rule-table rule)) rule)
t)
(defmethod apply-citus-rule ((rule citus-distributed-rule))
;; ok now we need to check if the USING column exists or if we need to add
;; it to our model
(setf (table-citus-rule (citus-distributed-rule-table rule)) rule)
(let* ((table (citus-distributed-rule-table rule))
(column (find (column-name (citus-distributed-rule-using rule))
(table-field-list table)
:test #'string=
:key #'field-name)))
(if column
;; add it to the PKEY definition, in first position
(add-column-to-pkey table
(column-name (citus-distributed-rule-using rule)))
;; The column doesn't exist, we need to find it in the :FROM rule's
;; list. The :FROM slot of the rule is a list of tables to
;; "traverse" when backfilling the data. The list follows the
;; foreign-key relationships from TABLE to the source of the
;; distribution key.
;;
;; To find the column definition to add to the current TABLE, look
;; it up in the last entry of the FROM rule's list.
(let* ((last-from-rule (car (last (citus-distributed-rule-from rule))))
(column-definition
(when last-from-rule
(find (column-name (citus-distributed-rule-using rule))
(table-field-list last-from-rule)
:test #'string=
:key #'column-name)))
(new-column
(when column-definition
(make-column :name (column-name column-definition)
:type-name (column-type-name column-definition)
:nullable (column-nullable column-definition)
:transform (column-transform column-definition)))))
(if column-definition
(progn
;;
;; Here also we need to add the new column to the PKEY
;; definition, in first position.
;;
(add-column-to-pkey table (column-name new-column))
;;
;; We need to backfill the distribution key in the data,
;; which we're implementing with a JOIN when we SELECT from
;; the source table. We add the new field here.
;;
(push new-column (table-field-list table))
(push new-column (table-column-list table)))
;;
;; We don't have any table-field-list in the citus rule,
;; meaning that the distribute ... using ... clause is lacking
;; the FROM part, and we need it.
;;
(error
(make-condition 'citus-rule-is-missing-from-list :rule rule)))))))
(defun add-column-to-pkey (table column-name)
"Add COLUMN in the first position of the TABLE's primary key index."
(let* ((index (find-if #'index-primary (table-index-list table)))
(idxcol (when index
(find column-name (index-columns index) :test #'string=))))
(when (and index (null idxcol))
;; add a new column
(push column-name (index-columns index))
;; now remove origin schema sql and condef, we need to redo them
(setf (index-sql index) nil)
(setf (index-condef index) nil)
;; now tweak the fkey definitions that are using this index
(loop :for fkey :in (index-fk-deps index)
:do (push column-name (fkey-columns fkey))
:do (push column-name (fkey-foreign-columns fkey))
:do (setf (fkey-condef fkey) nil)))))
;;;
;;; Format a query for backfilling the data right from pgloader:
;;;
;;; SELECT dist_key, * FROM source JOIN pivot ON ...
;;;
(defun format-citus-join-clause (table distribution-rule)
"Format a JOIN clause to backfill the distribution key data in tables that
are referencing (even indirectly) the main distribution table."
(with-output-to-string (s)
(loop :for current-table := table :then rel
:for rel :in (citus-distributed-rule-from distribution-rule)
:do (let* ((fkey
(find (ensure-unquoted (table-name rel))
(table-fkey-list current-table)
:test #'string=
:key (lambda (fkey)
(ensure-unquoted
(table-name (fkey-foreign-table fkey))))))
(ftable (fkey-foreign-table fkey)))
(format s
" JOIN ~s.~s"
(schema-source-name (table-schema ftable))
(table-source-name ftable))
;;
;; Skip the first column in the fkey definition, that's the
;; distribution key that was just added by pgloader: we don't
;; have it on the source database, we are going to create it on
;; the target database.
;;
(loop :for first := t :then nil
:for c :in (cdr (fkey-columns fkey))
:for fc :in (cdr (fkey-foreign-columns fkey))
:do (format s
" ~:[AND~;ON~] ~a.~a = ~a.~a"
first
(table-source-name (fkey-table fkey))
c
(table-source-name (fkey-foreign-table fkey))
fc))))))
(defun citus-format-sql-select (source-table target-table)
"Return the SQL statement to use to fetch data from the COPY context,
including backfilling the distribution key in related tables."
;;
;; SELECT from.id, id, ... from source join from-table ...
;;
;; So we must be careful to prefix the column names with the
;; proper table name, because of the join(s), and the first column
;; in the output is taken from the main FROM table (the last one
;; in the rule).
;;
(let* ((last-from-rule
(car (last (citus-distributed-rule-from
(table-citus-rule target-table)))))
(cols
(append (list
(format nil "~a.~a"
(table-name last-from-rule)
(column-name (first (table-field-list source-table)))))
(mapcar (lambda (field)
(format nil "~a.~a"
(table-name source-table)
(column-name field)))
(rest (table-field-list source-table)))))
(joins
(format-citus-join-clause source-table
(table-citus-rule target-table))))
(format nil
"SELECT ~{~a::text~^, ~} FROM ~s.~s ~a"
cols
(schema-source-name (table-schema source-table))
(table-source-name source-table)
joins)))
;;;
;;; Predicate to see if a table needs backfilling
;;;
(defun citus-backfill-table-p (table)
"Returns non-nil when given TABLE should be backfilled with the
distribution key."
(and (table-citus-rule table)
(typep (table-citus-rule table) 'citus-distributed-rule)
(not (null (citus-distributed-rule-from (table-citus-rule table))))))

View File

@ -147,7 +147,8 @@
(*summary-pathname* . ,*summary-pathname*)
(*sections* . ',*sections*)))
(kernel (lp:make-kernel 1 :bindings bindings))
(lparallel:*kernel* kernel))
(lparallel:*kernel* kernel)
(lparallel:*task-category* :monitor))
;; make our kernel and channel visible from the outside
(setf *monitoring-kernel* kernel
@ -155,7 +156,8 @@
*monitoring-queue* (lq:make-queue))
(lp:task-handler-bind
((error
(#+pgloader-image
(error
#'(lambda (c)
;; we can't log-message a monitor thread error
(lp:invoke-transfer-error
@ -212,7 +214,8 @@
(start
(when (start-start-logger event)
(pgloader.logs:start-logger))
(cl-log:log-message :info "Starting monitor"))
(cl-log:log-message :info "Starting monitor")
(cl-log:log-message :log "pgloader version ~s" *version-string*))
(stop
(cl-log:log-message :info "Stopping monitor")

View File

@ -66,3 +66,22 @@
(recompute-fs-and-retry ()
(setf *fs* (walk-sources-and-build-fs))
(sql url))))
(defun sql-url-for-variant (base filename &optional variant)
"Build a SQL URL for given VARIANT"
(flet ((sql-base-url (base filename)
(format nil "/~a/~a" base filename)))
(if variant
(let ((sql-variant-url
(format nil "/~a/~a/~a"
base
(string-downcase (typecase variant
(symbol (symbol-name variant))
(string variant)
(t (princ-to-string variant))))
filename)))
(if (gethash sql-variant-url *fs*)
sql-variant-url
(sql-base-url base filename)))
(sql-base-url base filename))))

View File

@ -7,7 +7,9 @@
(defun make-kernel (worker-count
&key (bindings
`((*monitoring-queue* . ,*monitoring-queue*)
`((*print-circle* . ,*print-circle*)
(*print-pretty* . ,*print-pretty*)
(*monitoring-queue* . ,*monitoring-queue*)
(*copy-batch-rows* . ,*copy-batch-rows*)
(*copy-batch-size* . ,*copy-batch-size*)
(*rows-per-range* . ,*rows-per-range*)
@ -28,6 +30,10 @@
;; bindings updates for libs
;; CFFI is used by the SQLite lib
(cffi:*default-foreign-encoding*
. ,cffi:*default-foreign-encoding*))))
. ,cffi:*default-foreign-encoding*)
;; CL+SSL can be picky about verifying certs
(cl+ssl:*make-ssl-client-stream-verify-default*
. ,cl+ssl:*make-ssl-client-stream-verify-default*))))
"Wrapper around lparallel:make-kernel that sets our usual bindings."
(lp:make-kernel worker-count :bindings bindings))

View File

@ -53,7 +53,7 @@
(string= "set" data-type))
(let ((start-1 (position #\( column-type)) ; just before start position
(end (position #\) column-type))) ; just before end position
(when start-1
(when (and start-1 (< (+ 1 start-1) end))
(destructuring-bind (a &optional b)
(mapcar #'parse-integer
(sq:split-sequence #\, column-type

View File

@ -13,7 +13,7 @@
LOAD CSV
FROM inline (a, b, c)
INTO postgresql:///pgloader?allcols (a, b, c)
INTO postgresql:///pgloader?allcols (a, b, c text using (subseq c 0))
WITH fields optionally enclosed by '"',
fields escaped by double-quote,

View File

@ -8,7 +8,8 @@
*/
LOAD ARCHIVE
FROM http://pgsql.tapoueh.org/temp/foo.zip
-- FROM http://pgsql.tapoueh.org/temp/foo.zip
FROM http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip
INTO postgresql:///ip4r
BEFORE LOAD

1
test/citus/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.csv

20
test/citus/Makefile Normal file
View File

@ -0,0 +1,20 @@
DATASET = companies campaigns ads clicks impressions geo_ips
CSV = $(addsuffix .csv,$(DATASET))
DROP = DROP TABLE IF EXISTS companies, campaigns, ads, clicks, impressions, geo_ips
all: schema data ;
schema:
psql --single-transaction -c "$(DROP)" -d hackathon
psql --single-transaction -f company.sql -d hackathon
data: fetch
psql -f copy.sql -d hackathon
../../build/bin/pgloader ./data.load
fetch: $(CSV) ;
%.csv:
curl -O https://examples.citusdata.com/mt_ref_arch/$@
.PHONY: schema data fetch

42
test/citus/README.md Normal file
View File

@ -0,0 +1,42 @@
# Citus Multi-Tenant Automatic Distribution
In this test case we follow the following documentation:
https://docs.citusdata.com/en/v7.5/use_cases/multi_tenant.html
We install the schema before Citus migration, and load the data without the
backfilling that is already done. For that we use pgloader to ignore the
company_id column in the tables that didn't have this column prior to the
Citus migration effort.
Then the following `company.load` file contains the pgloader command that
runs a full migration from PostgreSQL to Citus:
```
load database
from pgsql:///hackathon
into pgsql://localhost:9700/dim
with include drop, reset no sequences
distribute companies using id;
```
Tables are marked distributed, the company_id column is added where it's
needed, primary keys and foreign keys definitions are altered to the new
model, and finally the data is backfilled automatically in the target table
thanks to generating queries like the following:
~~~
SELECT "campaigns".company_id::text,
"impressions".id::text,
"impressions".ad_id::text,
"impressions".seen_at::text,
"impressions".site_url::text,
"impressions".cost_per_impression_usd::text,
"impressions".user_ip::text,
"impressions".user_data::text
FROM "public"."impressions"
JOIN "public"."ads" ON impressions.ad_id = ads.id
JOIN "public"."campaigns" ON ads.campaign_id = campaigns.id
~~~

14
test/citus/company.load Normal file
View File

@ -0,0 +1,14 @@
load database
from pgsql:///hackathon
into pgsql://localhost:9700/dim
with include drop, reset no sequences
cast column impressions.seen_at to "timestamp with time zone"
distribute companies using id
-- distribute campaigns using company_id
-- distribute ads using company_id from campaigns
-- distribute clicks using company_id from ads, campaigns
-- distribute impressions using company_id from ads, campaigns
;

51
test/citus/company.sql vendored Normal file
View File

@ -0,0 +1,51 @@
CREATE TABLE companies (
id bigserial PRIMARY KEY,
name text NOT NULL,
image_url text,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE campaigns (
id bigserial PRIMARY KEY,
company_id bigint REFERENCES companies (id),
name text NOT NULL,
cost_model text NOT NULL,
state text NOT NULL,
monthly_budget bigint,
blacklisted_site_urls text[],
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE ads (
id bigserial PRIMARY KEY,
campaign_id bigint REFERENCES campaigns (id),
name text NOT NULL,
image_url text,
target_url text,
impressions_count bigint DEFAULT 0,
clicks_count bigint DEFAULT 0,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE clicks (
id bigserial PRIMARY KEY,
ad_id bigint REFERENCES ads (id),
clicked_at timestamp without time zone NOT NULL,
site_url text NOT NULL,
cost_per_click_usd numeric(20,10),
user_ip inet NOT NULL,
user_data jsonb NOT NULL
);
CREATE TABLE impressions (
id bigserial PRIMARY KEY,
ad_id bigint REFERENCES ads (id),
seen_at timestamp without time zone NOT NULL,
site_url text NOT NULL,
cost_per_impression_usd numeric(20,10),
user_ip inet NOT NULL,
user_data jsonb NOT NULL
);

5
test/citus/copy.sql vendored Normal file
View File

@ -0,0 +1,5 @@
\copy companies from 'companies.csv' with csv
\copy campaigns from 'campaigns.csv' with csv
-- \copy ads from 'ads.csv' with csv
-- \copy clicks from 'clicks.csv' with csv
-- \copy impressions from 'impressions.csv' with csv

68
test/citus/data.load Normal file
View File

@ -0,0 +1,68 @@
--
-- Ads
--
load csv
from ads.csv
(
id, company_id, campaign_id, name, image_url, target_url,
impressions_count, clicks_count, created_at, updated_at
)
into postgresql:///hackathon
target table ads
target columns
(
id, campaign_id, name, image_url, target_url,
impressions_count, clicks_count, created_at, updated_at
)
with fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ',';
--
-- Clicks
--
load csv
from clicks.csv
(
id, company_id, ad_id, clicked_at, site_url, cost_per_click_usd,
user_ip, user_data
)
into postgresql:///hackathon
target table clicks
target columns
(
id, ad_id, clicked_at, site_url, cost_per_click_usd, user_ip, user_data
)
with fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ',';
--
-- Impressions
--
load csv
from impressions.csv
(
id, company_id, ad_id, seen_at, site_url,
cost_per_impression_usd, user_ip, user_data
)
into postgresql:///hackathon
target table impressions
target columns
(
id, ad_id, seen_at, site_url, cost_per_impression_usd, user_ip, user_data
)
with drop indexes,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ',';

View File

@ -15,11 +15,11 @@ LOAD CSV
"repl$grpid" text,
"repl$id" text,
another text,
fields text
fields integer
)
$$;
somefields,rekplcode,repl$grpid,repl$id,another,fields
a,b,c,d,e,f
foo,bar,baz,quux,foobar,fizzbuzz
somefields,rekplcode,repl$grpid,repl$id,fields,another
a,b,c,d,1,e
foo,bar,baz,quux,2,foobar

22
test/csv-null-if.load Normal file
View File

@ -0,0 +1,22 @@
LOAD CSV
FROM INLINE (id, number, data)
INTO postgresql:///pgloader?nullif
BEFORE LOAD DO
$$ drop table if exists nullif; $$,
$$ CREATE TABLE nullif
(
id serial primary key,
number integer,
data text
);
$$
WITH null if '\N',
fields terminated by ',',
fields enclosed by '"',
fields escaped by backslash-quote;
"1",\N,"testing nulls"
"2","2","another test"

View File

@ -28,3 +28,4 @@ LOAD CSV
1,10-02-1999 00-33-12.123456,"00:05.02"
2,10-02-2014 00-33-13.123,"18:25.52"
3,10-02-2014 00-33-14.1234,13:14.15
4,10-09-2018 19-24-59,19:24.59

View File

@ -4,7 +4,7 @@ LOAD DATABASE
WITH data only, truncate, create no tables
MATERIALIZE VIEWS proceed
MATERIALIZE VIEWS proceed, foo as $$ select 1 as a; $$
INCLUDING ONLY TABLE NAMES MATCHING 'proceed'
@ -13,5 +13,6 @@ LOAD DATABASE
$$ drop schema if exists db789 cascade; $$,
$$ create schema db789; $$,
$$ create table db789.refrain (id char(1) primary key); $$,
$$ create table db789.proceed (id char(1) primary key); $$;
$$ create table db789.proceed (id char(1) primary key); $$,
$$ create table db789.foo (a integer primary key); $$;

View File

@ -0,0 +1,19 @@
load database
from mysql://root@localhost/f1db?useSSL=false
into pgsql://localhost:9700/dim
with reset no sequences
distribute f1db.circuits as reference table
distribute f1db.constructorResults using raceId
distribute f1db.constructors as reference table
distribute f1db.constructorStandings using raceId
distribute f1db.drivers as reference table
distribute f1db.driverStandings using raceId
distribute f1db.lapTimes using raceId
distribute f1db.pitStops using raceId
distribute f1db.qualifying using raceId
distribute f1db.races as reference table
distribute f1db.results using raceId
distribute f1db.seasons as reference table
distribute f1db.status as reference table;

View File

@ -9,6 +9,7 @@ load database
quote identifiers
ALTER SCHEMA 'pgloader' RENAME TO 'mysql'
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
CAST column utilisateurs__Yvelines2013-06-28.sexe
to text drop not null using empty-string-to-null,

18
test/mysql/my.sql vendored
View File

@ -100,6 +100,24 @@ create table bits
insert into bits(bool) values(0b00), (0b01);
/*
* https://github.com/dimitri/pgloader/issues/811
*/
CREATE TABLE `domain_filter` (
`id` binary(16) NOT NULL ,
`type` varchar(50) NOT NULL ,
`value` json DEFAULT NULL ,
`negated` tinyint(1) NOT NULL DEFAULT '0' ,
`report_id` varbinary(255) NOT NULL ,
`query_id` varchar(255) NOT NULL ,
`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ,
`updated_at` datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP ,
`updated_by` varbinary(255) DEFAULT NULL ,
PRIMARY KEY (`id`),
UNIQUE KEY `domain_filter_unq` (`report_id`,`query_id`,`type`),
KEY `domain_filter` (`type`)
) ENGINE=InnoDB DEFAULT CHARSET=ascii;
/*
* https://github.com/dimitri/pgloader/issues/703
*/

14
test/pgsql-source.load Normal file
View File

@ -0,0 +1,14 @@
load database
from pgsql://localhost/pgloader
into pgsql://localhost/copy
-- including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
including only table names matching ~/geolocations/ in schema 'public'
materialize views some_usps
as $$
select usps, geoid, aland, awater, aland_sqmi, awater_sqmi, location
from districts
where usps in ('MT', 'DE', 'AK', 'WY', 'PR', 'VT', 'SD', 'DC', 'ND');
$$
;

View File

@ -1,2 +1,2 @@
a b c d e f
foo bar baz quux foobar fizzbuzz
a b c d e 1
foo bar baz quux foobar 2

View File

@ -1,3 +1,4 @@
1 1999-10-02 00:33:12.123456+02 00:05:02
2 2014-10-02 00:33:13.123+02 18:25:52
3 2014-10-02 00:33:14.1234+02 13:14:15
4 2018-10-09 19:24:59+02 19:24:59