diff --git a/.dockerignore b/.dockerignore index d075b3e..6be6907 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,5 @@ .git .vagrant build +Dockerfile +Dockerfile.ccl \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6fc43dc..ea6a08c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,47 @@ -FROM debian:stretch -MAINTAINER Dimitri Fontaine +FROM debian:stable-slim as builder -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - wget curl make git bzip2 time \ - ca-certificates \ - libzip-dev libssl1.1 openssl \ - patch unzip libsqlite3-dev gawk \ - freetds-dev sbcl && \ - rm -rf /var/lib/apt/lists/* + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bzip2 \ + ca-certificates \ + curl \ + freetds-dev \ + gawk \ + git \ + libsqlite3-dev \ + libssl1.1 \ + libzip-dev \ + make \ + openssl \ + patch \ + sbcl \ + time \ + unzip \ + wget \ + cl-ironclad \ + cl-babel \ + && rm -rf /var/lib/apt/lists/* -ADD ./ /opt/src/pgloader -WORKDIR /opt/src/pgloader + COPY ./ /opt/src/pgloader -# build/ is in the .dockerignore file, but we actually need it now -RUN mkdir -p build/bin -RUN make + RUN mkdir -p /opt/src/pgloader/build/bin \ + && cd /opt/src/pgloader \ + && make -RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin +FROM debian:stable-slim + + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + freetds-dev \ + gawk \ + libsqlite3-dev \ + libzip-dev \ + make \ + sbcl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + + COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin + + LABEL maintainer="Dimitri Fontaine " diff --git a/Dockerfile.ccl b/Dockerfile.ccl index a33f8c9..9377fe0 100644 --- a/Dockerfile.ccl +++ b/Dockerfile.ccl @@ -1,25 +1,51 @@ -FROM debian:stretch -MAINTAINER Dimitri Fontaine +FROM debian:stable-slim as builder -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - wget curl make git bzip2 time \ - ca-certificates \ - libzip-dev libssl1.1 openssl \ - patch unzip libsqlite3-dev gawk \ - freetds-dev sbcl && \ - rm -rf /var/lib/apt/lists/* + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bzip2 \ + ca-certificates \ + curl \ + freetds-dev \ + gawk \ + git \ + libsqlite3-dev \ + libssl1.1 \ + libzip-dev \ + make \ + openssl \ + patch \ + sbcl \ + time \ + unzip \ + wget \ + cl-ironclad \ + cl-babel \ + && rm -rf /var/lib/apt/lists/* -WORKDIR /usr/local/src -RUN curl --location -O https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz -RUN tar xf ccl-1.11.5-linuxx86.tar.gz -RUN cp /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl + RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \ + | tar xz -C /usr/local/src/ \ + && mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl -ADD ./ /opt/src/pgloader -WORKDIR /opt/src/pgloader + COPY ./ /opt/src/pgloader -# build/ is in the .dockerignore file, but we actually need it now -RUN mkdir -p build/bin -RUN make CL=ccl DYNSIZE=256 + RUN mkdir -p /opt/src/pgloader/build/bin \ + && cd /opt/src/pgloader \ + && make CL=ccl DYNSIZE=256 -RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin +FROM debian:stable-slim + + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + freetds-dev \ + gawk \ + libsqlite3-dev \ + libzip-dev \ + make \ + sbcl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + + COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin + + LABEL maintainer="Dimitri Fontaine " diff --git a/Makefile b/Makefile index 06c381a..42511f5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # pgloader build tool APP_NAME = pgloader -VERSION = 3.5.2 +VERSION = 3.6.1 # use either sbcl or ccl CL = sbcl @@ -24,7 +24,7 @@ QLDIR = $(BUILDDIR)/quicklisp MANIFEST = $(BUILDDIR)/manifest.ql LATEST = $(BUILDDIR)/pgloader-latest.tgz -BUNDLEDIST = 2018-04-30 +BUNDLEDIST = 2019-01-07 BUNDLENAME = pgloader-bundle-$(VERSION) BUNDLEDIR = $(BUILDDIR)/bundle/$(BUNDLENAME) BUNDLE = $(BUILDDIR)/$(BUNDLENAME).tgz @@ -99,8 +99,11 @@ clones: $(QLDIR)/local-projects/cl-ixf \ $(QLDIR)/local-projects/cl-csv \ $(QLDIR)/local-projects/qmynd ; -$(LIBS): $(QLDIR)/setup.lisp clones +$(LIBS): $(QLDIR)/setup.lisp $(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \ + --eval '(push :pgloader-image *features*)' \ + --eval '(setf *print-circle* t *print-pretty* t)' \ + --eval '(ql:quickload "pgloader")' \ --eval '(push "$(PWD)/" ql:*local-project-directories*)' \ --eval '(ql:quickload "pgloader")' \ --eval '(quit)' @@ -141,8 +144,11 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC) --manifest-file $(MANIFEST) \ --asdf-tree $(QLDIR)/dists \ --asdf-path . \ - --load-system $(APP_NAME) \ + --load-system cffi \ + --load-system cl+ssl \ + --load-system mssql \ --load src/hooks.lisp \ + --load-system $(APP_NAME) \ --entry pgloader:main \ --dynamic-space-size $(DYNSIZE) \ $(COMPRESS_CORE_OPT) \ @@ -164,6 +170,12 @@ pgloader-standalone: test: $(PGLOADER) $(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress +save: ./src/save.lisp $(LISP_SRC) + $(CL) $(CL_OPTS) --load ./src/save.lisp + +check-saved: save + $(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress + clean-bundle: rm -rf $(BUNDLEDIR) rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/* @@ -179,8 +191,12 @@ $(BUNDLEDIR): --eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \ --load bundle/ql.lisp -$(BUNDLE): $(BUNDLEDIR) +$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR) + echo "\"$(VERSION)\"" > $@ + +$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp cp bundle/README.md $(BUNDLEDIR) + cp bundle/save.lisp $(BUNDLEDIR) sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile git archive --format=tar --prefix=pgloader-$(VERSION)/ master \ | tar -C $(BUNDLEDIR)/local-projects/ -xf - diff --git a/README.md b/README.md index d99cc2d..a3b3049 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,16 @@ pgloader: +### Building Docker image from sources + +You can build a Docker image from source using SBCL by default: + + $ docker build . + +Or Clozure CL (CCL): + + $ docker build -f Dockerfile.ccl . + ## More options when building from source The `Makefile` target `pgloader` knows how to produce a Self Contained diff --git a/bundle/Makefile b/bundle/Makefile index 246438f..fbeae71 100644 --- a/bundle/Makefile +++ b/bundle/Makefile @@ -48,9 +48,12 @@ $(PGLOADER): $(BUILDAPP) $(BUILDAPP_OPTS) \ --sbcl $(CL) \ --asdf-tree . \ + --load-system cffi \ + --load-system cl+ssl \ + --load-system mssql \ + --load $(SRCDIR)/src/hooks.lisp \ --load-system $(APP_NAME) \ --eval '(setf pgloader.params::*version-string* "$(VERSION)")' \ - --load $(SRCDIR)/src/hooks.lisp \ --entry pgloader:main \ --dynamic-space-size $(DYNSIZE) \ $(COMPRESS_CORE_OPT) \ @@ -61,4 +64,7 @@ $(PGLOADER): $(BUILDAPP) test: $(PGLOADER) $(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress +save: + sbcl --no-userinit --load ./save.lisp + check: test ; diff --git a/bundle/save.lisp b/bundle/save.lisp new file mode 100644 index 0000000..d955b6c --- /dev/null +++ b/bundle/save.lisp @@ -0,0 +1,47 @@ +;;; +;;; Create a build/bin/pgloader executable from the source code, using +;;; Quicklisp to load pgloader and its dependencies. +;;; + +(in-package #:cl-user) + +(require :asdf) ; should work in SBCL and CCL + +(let* ((cwd (uiop:getcwd)) + (bundle.lisp (uiop:merge-pathnames* "bundle.lisp" cwd)) + (version-file (uiop:merge-pathnames* "version.sexp" cwd)) + (version-string (uiop:read-file-form version-file)) + (asdf:*central-registry* (list cwd))) + + (format t "Loading bundle.lisp~%") + (load bundle.lisp) + + (format t "Loading system pgloader ~a~%" version-string) + (asdf:load-system :pgloader :verbose nil) + (load (asdf:system-relative-pathname :pgloader "src/hooks.lisp")) + + (let* ((pgl (find-package "PGLOADER")) + (version-symbol (find-symbol "*VERSION-STRING*" pgl))) + (setf (symbol-value version-symbol) version-string))) + +(defun pgloader-image-main () + (let ((argv #+sbcl sb-ext:*posix-argv* + #+ccl ccl:*command-line-argument-list*)) + (pgloader::main argv))) + +(let* ((cwd (uiop:getcwd)) + (bin-dir (uiop:merge-pathnames* "bin/" cwd)) + (bin-filename (uiop:merge-pathnames* "pgloader" bin-dir))) + + (ensure-directories-exist bin-dir) + + #+ccl + (ccl:save-application bin-filename + :toplevel-function #'cl-user::pgloader-image-main + :prepend-kernel t) + #+sbcl + (sb-ext:save-lisp-and-die bin-filename + :toplevel #'cl-user::pgloader-image-main + :executable t + :save-runtime-options t + :compression t)) diff --git a/docs/conf.py b/docs/conf.py index e5a9e4f..0e689e9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -83,7 +83,8 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +#html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/index.rst b/docs/index.rst index d69915e..2a79f34 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,11 +6,242 @@ Welcome to pgloader's documentation! ==================================== +pgloader loads data from various sources into PostgreSQL. It can transform +the data it reads on the fly and submit raw SQL before and after the +loading. It uses the `COPY` PostgreSQL protocol to stream the data into the +server, and manages errors by filling a pair of *reject.dat* and +*reject.log* files. + +Thanks to being able to load data directly from a database source, pgloader +also supports from migrations from other productions to PostgreSQL. In this +mode of operations, pgloader handles both the schema and data parts of the +migration, in a single unmanned command, allowing to implement **Continuous +Migration**. + +Features Overview +================= + +pgloader has two modes of operation: loading from files, migrating +databases. In both cases, pgloader uses the PostgreSQL COPY protocol which +implements a **streaming** to send data in a very efficient way. + +Loading file content in PostgreSQL +---------------------------------- + +When loading from files, pgloader implements the following features: + +Many source formats supported + Support for a wide variety of file based formats are included in + pgloader: the CSV family, fixed columns formats, dBase files (``db3``), + and IBM IXF files. + + The SQLite database engine is accounted for in the next section: + pgloader considers SQLite as a database source and implements schema + discovery from SQLite catalogs. + +On the fly data transformation + Often enough the data as read from a CSV file (or another format) needs + some tweaking and clean-up before being sent to PostgreSQL. + + For instance in the `geolite + `_ + example we can see that integer values are being rewritten as IP address + ranges, allowing to target an ``ip4r`` column directly. + +Full Field projections + pgloader supports loading data into less fields than found on file, or + more, doing some computation on the data read before sending it to + PostgreSQL. + +Reading files from an archive + Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the + archive is extracted in a temporary directly and expanded files are then + loaded. + +HTTP(S) support + pgloader knows how to download a source file or a source archive using + HTTP directly. It might be better to use ``curl -O- http://... | + pgloader`` and read the data from *standard input*, then allowing for + streaming of the data from its source down to PostgreSQL. + +Target schema discovery + When loading in an existing table, pgloader takes into account the + existing columns and may automatically guess the CSV format for you. + +On error stop / On error resume next + In some cases the source data is so damaged as to be impossible to + migrate in full, and when loading from a file then the default for + pgloader is to use ``on error resume next`` option, where the rows + rejected by PostgreSQL are saved away and the migration continues with + the other rows. + + In other cases loading only a part of the input data might not be a + great idea, and in such cases it's possible to use the ``on error stop`` + option. + +Pre/Post SQL commands + This feature allows pgloader commands to include SQL commands to run + before and after loading a file. It might be about creating a table + first, then loading the data into it, and then doing more processing + on-top of the data (implementing an *ELT* pipeline then), or creating + specific indexes as soon as the data has been made ready. + +One-command migration to PostgreSQL +----------------------------------- + +When migrating a full database in a single command, pgloader implements the +following features: + +One-command migration + The whole migration is started with a single command line and then runs + unattended. pgloader is meant to be integrated in a fully automated + tooling that you can repeat as many times as needed. + +Schema discovery + The source database is introspected using its SQL catalogs to get the + list of tables, attributes (with data types, default values, not null + constraints, etc), primary key constraints, foreign key constraints, + indexes, comments, etc. This feeds an internal database catalog of all + the objects to migrate from the source database to the target database. + +User defined casting rules + Some source database have ideas about their data types that might not be + compatible with PostgreSQL implementaion of equivalent data types. + + For instance, SQLite since version 3 has a `Dynamic Type System + `_ which of course isn't + compatible with the idea of a `Relation + `_. Or MySQL accepts + datetime for year zero, which doesn't exists in our calendar, and + doesn't have a boolean data type. + + When migrating from another source database technology to PostgreSQL, + data type casting choices must be made. pgloader implements solid + defaults that you can rely upon, and a facility for **user defined data + type casting rules** for specific cases. The idea is to allow users to + specify the how the migration should be done, in order for it to be + repeatable and included in a *Continuous Migration* process. + +On the fly data transformations + The user defined casting rules come with on the fly rewrite of the data. + For instance zero dates (it's not just the year, MySQL accepts + ``0000-00-00`` as a valid datetime) are rewritten to NULL values by + default. + +Partial Migrations + It is possible to include only a partial list of the source database + tables in the migration, or to exclude some of the tables on the source + database. + +Schema only, Data only + This is the **ORM compatibility** feature of pgloader, where it is + possible to create the schema using your ORM and then have pgloader + migrate the data targeting this already created schema. + + When doing this, it is possible for pgloader to *reindex* the target + schema: before loading the data from the source database into PostgreSQL + using COPY, pgloader DROPs the indexes and constraints, and reinstalls + the exact same definitions of them once the data has been loaded. + + The reason for operating that way is of course data load performance. + +Repeatable (DROP+CREATE) + By default, pgloader issues DROP statements in the target PostgreSQL + database before issing any CREATE statement, so that you can repeat the + migration as many times as necessary until migration specifications and + rules are bug free. + + The schedule the data migration to run every night (or even more often!) + for the whole duration of the code migration project. See the + `Continuous Migration `_ + methodology for more details about the approach. + +On error stop / On error resume next The default behavior of pgloader when + migrating from a database is ``on error stop``. The idea is to let the + user fix either the migration specifications or the source data, and run + the process again, until it works. + + In some cases the source data is so damaged as to be impossible to + migrate in full, and it might be necessary to then resort to the ``on + error resume next`` option, where the rows rejected by PostgreSQL are + saved away and the migration continues with the other rows. + +Pre/Post SQL commands, Post-Schema SQL commands + While pgloader takes care of rewriting the schema to PostgreSQL + expectations, and even provides *user-defined data type casting rules* + support to that end, sometimes it is necessary to add some specific SQL + commands around the migration. It's of course supported right from + pgloader itself, without having to script around it. + +Online ALTER schema + At times migrating to PostgreSQL is also a good opportunity to review + and fix bad decisions that were made in the past, or simply that are not + relevant to PostgreSQL. + + The pgloader command syntax allows to ALTER pgloader's internal + representation of the target catalogs so that the target schema can be + created a little different from the source one. Changes supported + include target a different *schema* or *table* name. + +Materialized Views, or schema rewrite on-the-fly + In some cases the schema rewriting goes deeper than just renaming the + SQL objects to being a full normalization exercise. Because PostgreSQL + is great at running a normalized schema in production under most + workloads. + + pgloader implements full flexibility in on-the-fly schema rewriting, by + making it possible to migrate from a view definition. The view attribute + list becomes a table definition in PostgreSQL, and the data is fetched + by querying the view on the source system. + + A SQL view allows to implement both content filtering at the column + level using the SELECT projection clause, and at the row level using the + WHERE restriction clause. And backfilling from reference tables thanks + to JOINs. + +Distribute to Citus + When migrating from PostgreSQL to Citus, a important part of the process + consists of adjusting the schema to the distribution key. Read + `Preparing Tables and Ingesting Data + `_ in + the Citus documentation for a complete example showing how to do that. + + When using pgloader it's possible to specify the distribution keys and + reference tables and let pgloader take care of adjusting the table, + indexes, primary keys and foreign key definitions all by itself. + +Encoding Overrides + MySQL doesn't actually enforce the encoding of the data in the database + to match the encoding known in the metadata, defined at the database, + table, or attribute level. Sometimes, it's necessary to override the + metadata in order to make sense of the text, and pgloader makes it easy + to do so. + + +Continuous Migration +-------------------- + +pgloader is meant to migrate a whole database in a single command line and +without any manual intervention. The goal is to be able to setup a +*Continuous Integration* environment as described in the `Project +Methodology `_ document of the `MySQL to +PostgreSQL `_ webpage. + + 1. Setup your target PostgreSQL Architecture + 2. Fork a Continuous Integration environment that uses PostgreSQL + 3. Migrate the data over and over again every night, from production + 4. As soon as the CI is all green using PostgreSQL, schedule the D-Day + 5. Migrate without suprise and enjoy! + +In order to be able to follow this great methodology, you need tooling to +implement the third step in a fully automated way. That's pgloader. + .. toctree:: :maxdepth: 2 :caption: Table Of Contents: intro + quickstart tutorial/tutorial pgloader ref/csv @@ -22,6 +253,9 @@ Welcome to pgloader's documentation! ref/mysql ref/sqlite ref/mssql + ref/pgsql + ref/pgsql-citus-target + ref/pgsql-redshift ref/transforms bugreport diff --git a/docs/intro.rst b/docs/intro.rst index 0dc75e2..f733b72 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -10,13 +10,24 @@ the data into the server, and manages errors by filling a pair of pgloader knows how to read data from different kind of sources: * Files + * CSV * Fixed Format * DBF + * Databases + * SQLite * MySQL * MS SQL Server + * PostgreSQL + * Redshift + +pgloader knows how to target different products using the PostgresQL Protocol: + + * PostgreSQL + * `Citus `_ + * Redshift The level of automation provided by pgloader depends on the data source type. In the case of CSV and Fixed Format files, a full description of the @@ -24,23 +35,47 @@ expected input properties must be given to pgloader. In the case of a database, pgloader connects to the live service and knows how to fetch the metadata it needs directly from it. -Continuous Migration --------------------- +Features Matrix +--------------- -pgloader is meant to migrate a whole database in a single command line and -without any manual intervention. The goal is to be able to setup a -*Continuous Integration* environment as described in the `Project -Methodology `_ document of the `MySQL to -PostgreSQL `_ webpage. +Here's a comparison of the features supported depending on the source +database engine. Some features that are not supported can be added to +pgloader, it's just that nobody had the need to do so yet. Those features +are marked with ✗. Empty cells are used when the feature doesn't make sense +for the selected source database. - 1. Setup your target PostgreSQL Architecture - 2. Fork a Continuous Integration environment that uses PostgreSQL - 3. Migrate the data over and over again every night, from production - 4. As soon as the CI is all green using PostgreSQL, schedule the D-Day - 5. Migrate without suprise and enjoy! +========================== ======= ====== ====== =========== ========= +Feature SQLite MySQL MS SQL PostgreSQL Redshift +========================== ======= ====== ====== =========== ========= +One-command migration ✓ ✓ ✓ ✓ ✓ +Continuous Migration ✓ ✓ ✓ ✓ ✓ +Schema discovery ✓ ✓ ✓ ✓ ✓ +Partial Migrations ✓ ✓ ✓ ✓ ✓ +Schema only ✓ ✓ ✓ ✓ ✓ +Data only ✓ ✓ ✓ ✓ ✓ +Repeatable (DROP+CREATE) ✓ ✓ ✓ ✓ ✓ +User defined casting rules ✓ ✓ ✓ ✓ ✓ +Encoding Overrides ✓ +On error stop ✓ ✓ ✓ ✓ ✓ +On error resume next ✓ ✓ ✓ ✓ ✓ +Pre/Post SQL commands ✓ ✓ ✓ ✓ ✓ +Post-Schema SQL commands ✗ ✓ ✓ ✓ ✓ +Primary key support ✓ ✓ ✓ ✓ ✓ +Foreign key support ✓ ✓ ✓ ✓ +Online ALTER schema ✓ ✓ ✓ ✓ ✓ +Materialized views ✗ ✓ ✓ ✓ ✓ +Distribute to Citus ✗ ✓ ✓ ✓ ✓ +========================== ======= ====== ====== =========== ========= + +For more details about what the features are about, see the specific +reference pages for your database source. + +For some of the features, missing support only means that the feature is not +needed for the other sources, such as the capability to override MySQL +encoding metadata about a table or a column. Only MySQL in this list is left +completely unable to guarantee text encoding. Or Redshift not having foreign +keys. -In order to be able to follow this great methodology, you need tooling to -implement the third step in a fully automated way. That's pgloader. Commands -------- diff --git a/docs/pgloader.rst b/docs/pgloader.rst index 4a8cade..00fa186 100644 --- a/docs/pgloader.rst +++ b/docs/pgloader.rst @@ -154,6 +154,18 @@ Those options are meant to tweak `pgloader` behavior when loading data. machine code) another version of itself, usually a newer one like a very recent git checkout. + * `--no-ssl-cert-verification` + + Uses the OpenSSL option to accept a locally issued server-side + certificate, avoiding the following error message:: + + SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY + + The right way to fix the SSL issue is to use a trusted certificate, of + course. Sometimes though it's useful to make progress with the pgloader + setup while the certificate chain of trust is being fixed, maybe by + another team. That's when this option is useful. + Command Line Only Operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -552,6 +564,22 @@ queries from a SQL file. Implements support for PostgreSQL dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch mode (where they are the same thing). +AFTER CREATE SCHEMA DO +^^^^^^^^^^^^^^^^^^^^^^ + +Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that +section are executed once the schema has been craeted by pgloader, and +before the data is loaded. It's the right time to ALTER TABLE or do some +custom implementation on-top of what pgloader does, like maybe partitioning. + +AFTER CREATE SCHEMA EXECUTE +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read +the SQL queries from a SQL file. Implements support for PostgreSQL +dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch +mode (where they are the same thing). + Connection String ^^^^^^^^^^^^^^^^^ diff --git a/docs/tutorial/quickstart.rst b/docs/quickstart.rst similarity index 96% rename from docs/tutorial/quickstart.rst rename to docs/quickstart.rst index abd303c..912a095 100644 --- a/docs/tutorial/quickstart.rst +++ b/docs/quickstart.rst @@ -1,10 +1,10 @@ -PgLoader Quick Start --------------------- +Pgloader Quick Start +==================== In simple cases, pgloader is very easy to use. CSV -^^^ +--- Load data from a CSV file into a pre-existing table in your database:: @@ -26,7 +26,7 @@ For documentation about the available syntaxes for the `--field` and Note also that the PostgreSQL URI includes the target *tablename*. Reading from STDIN -^^^^^^^^^^^^^^^^^^ +------------------ File based pgloader sources can be loaded from the standard input, as in the following example:: @@ -46,7 +46,7 @@ pgloader with this technique, using the Unix pipe:: gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo Loading from CSV available through HTTP -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +--------------------------------------- The same command as just above can also be run if the CSV file happens to be found on a remote HTTP location:: @@ -84,7 +84,7 @@ Also notice that the same command will work against an archived version of the same data. Streaming CSV data from an HTTP compressed file -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +----------------------------------------------- Finally, it's important to note that pgloader first fetches the content from the HTTP URL it to a local file, then expand the archive when it's @@ -110,7 +110,7 @@ and the commands and pgloader will take care of streaming the data down to PostgreSQL. Migrating from SQLite -^^^^^^^^^^^^^^^^^^^^^ +--------------------- The following command will open the SQLite database, discover its tables definitions including indexes and foreign keys, migrate those definitions @@ -121,7 +121,7 @@ and then migrate the data over:: pgloader ./test/sqlite/sqlite.db postgresql:///newdb Migrating from MySQL -^^^^^^^^^^^^^^^^^^^^ +-------------------- Just create a database where to host the MySQL data and definitions and have pgloader do the migration for you in a single command line:: @@ -130,7 +130,7 @@ pgloader do the migration for you in a single command line:: pgloader mysql://user@localhost/sakila postgresql:///pagila Fetching an archived DBF file from a HTTP remote location -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +--------------------------------------------------------- It's possible for pgloader to download a file from HTTP, unarchive it, and only then open it to discover the schema then load the data:: diff --git a/docs/ref/mssql.rst b/docs/ref/mssql.rst index 47cc834..ed024c2 100644 --- a/docs/ref/mssql.rst +++ b/docs/ref/mssql.rst @@ -31,7 +31,7 @@ MS SQL Database Migration Options: WITH --------------------------------------- When loading from a `MS SQL` database, the same options as when loading a -`MySQL` database are supported. Please refer to the MySQL section. The +`MS SQL` database are supported. Please refer to the MS SQL section. The following options are added: - *create schemas* @@ -53,7 +53,39 @@ CAST The cast clause allows to specify custom casting rules, either to overload the default casting rules or to amend them with special cases. -Please refer to the MySQL CAST clause for details. +Please refer to the MS SQL CAST clause for details. + +MS SQL Views Support +-------------------- + +MS SQL views support allows pgloader to migrate view as if they were base +tables. This feature then allows for on-the-fly transformation from MS SQL +to PostgreSQL, as the view definition is used rather than the base data. + +MATERIALIZE VIEWS +^^^^^^^^^^^^^^^^^ + +This clause allows you to implement custom data processing at the data +source by providing a *view definition* against which pgloader will query +the data. It's not possible to just allow for plain `SQL` because we want to +know a lot about the exact data types of each column involved in the query +output. + +This clause expect a comma separated list of view definitions, each one +being either the name of an existing view in your database or the following +expression:: + + *name* `AS` `$$` *sql query* `$$` + +The *name* and the *sql query* will be used in a `CREATE VIEW` statement at +the beginning of the data loading, and the resulting view will then be +dropped at the end of the data loading. + +MATERIALIZE ALL VIEWS +^^^^^^^^^^^^^^^^^^^^^ + +Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as +returned by MS SQL rather than asking the user to specify the list. MS SQL Partial Migration ------------------------ @@ -96,9 +128,35 @@ schema 'public' in the target database with this command:: ALTER TABLE NAMES MATCHING ... IN SCHEMA '...' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -See the MySQL explanation for this clause above. It works the same in the -context of migrating from MS SQL, only with the added option to specify the -name of the schema where to find the definition of the target tables. +Introduce a comma separated list of table names or *regular expressions* +that you want to target in the pgloader *ALTER TABLE* command. Available +actions are *SET SCHEMA*, *RENAME TO*, and *SET*:: + + ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/ + IN SCHEMA 'dbo' + SET SCHEMA 'mv' + + ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films' + + ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40') + + ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc' + +You can use as many such rules as you need. The list of tables to be +migrated is searched in pgloader memory against the *ALTER TABLE* matching +rules, and for each command pgloader stops at the first matching criteria +(regexp or string). + +No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at +the level of the pgloader in-memory representation of your source database +schema. In case of a name change, the mapping is kept and reused in the +*foreign key* and *index* support. + +The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE` +command that pgloader will run when it has to create a table. + +The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the +`CREATE TABLE` command that pgloader will run when it has to create a table. The matching is done in pgloader itself, with a Common Lisp regular expression lib, so doesn't depend on the *LIKE* implementation of MS SQL, diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst index 1957944..e54b42b 100644 --- a/docs/ref/mysql.rst +++ b/docs/ref/mysql.rst @@ -1,10 +1,9 @@ Migrating a MySQL Database to PostgreSQL ======================================== -This command instructs pgloader to load data from a database connection. The -only supported database source is currently *MySQL*, and pgloader supports -dynamically converting the schema of the source database and the indexes -building. +This command instructs pgloader to load data from a database connection. +pgloader supports dynamically converting the schema of the source database +and the indexes building. A default set of casting rules are provided and might be overloaded and appended to by the command. @@ -500,9 +499,8 @@ ALTER TABLE NAMES MATCHING ^^^^^^^^^^^^^^^^^^^^^^^^^^ Introduce a comma separated list of table names or *regular expressions* -that you want to target in the pgloader *ALTER TABLE* command. The only two -available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted -string as parameter:: +that you want to target in the pgloader *ALTER TABLE* command. Available +actions are *SET SCHEMA*, *RENAME TO*, and *SET*:: ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/ SET SCHEMA 'mv' @@ -511,6 +509,8 @@ string as parameter:: ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40') + ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default' + You can use as many such rules as you need. The list of tables to be migrated is searched in pgloader memory against the *ALTER TABLE* matching rules, and for each command pgloader stops at the first matching criteria @@ -524,6 +524,9 @@ schema. In case of a name change, the mapping is kept and reused in the The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE` command that pgloader will run when it has to create a table. +The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the +`CREATE TABLE` command that pgloader will run when it has to create a table. + MySQL Migration: limitations ---------------------------- @@ -556,7 +559,7 @@ Numbers:: type int with extra auto_increment to serial when (< precision 10) type int with extra auto_increment to bigserial when (<= 10 precision) type int to int when (< precision 10) - type int to bigint when (<= 10 precision) + type int to bigint when (>= 10 precision) type tinyint with extra auto_increment to serial type smallint with extra auto_increment to serial type mediumint with extra auto_increment to serial @@ -609,6 +612,14 @@ Date:: to timestamptz drop default using zero-dates-to-null + type datetime with extra on update current timestamp when not null + to timestamptz drop not null drop default + using zero-dates-to-null + + type datetime with extra on update current timestamp + to timestamptz drop default + using zero-dates-to-null + type timestamp when default "0000-00-00 00:00:00" and not null to timestamptz drop not null drop default using zero-dates-to-null diff --git a/docs/ref/pgsql-citus-target.rst b/docs/ref/pgsql-citus-target.rst new file mode 100644 index 0000000..f6397d1 --- /dev/null +++ b/docs/ref/pgsql-citus-target.rst @@ -0,0 +1,196 @@ +Migrating a PostgreSQL Database to Citus +======================================== + +This command instructs pgloader to load data from a database connection. +Automatic discovery of the schema is supported, including build of the +indexes, primary and foreign keys constraints. A default set of casting +rules are provided and might be overloaded and appended to by the command. + +Automatic distribution column backfilling is supported, either from commands +that specify what is the distribution column in every table, or only in the +main table, then relying on foreign key constraints to discover the other +distribution keys. + +Here's a short example of migrating a database from a PostgreSQL server to +another: + +:: + + load database + from pgsql:///hackathon + into pgsql://localhost:9700/dim + + with include drop, reset no sequences + + cast column impressions.seen_at to "timestamp with time zone" + + distribute companies using id + -- distribute campaigns using company_id + -- distribute ads using company_id from campaigns + -- distribute clicks using company_id from ads, campaigns + -- distribute impressions using company_id from ads, campaigns + ; + +Everything works exactly the same way as when doing a PostgreSQL to +PostgreSQL migration, with the added fonctionality of this new `distribute` +command. + +Distribute Command +^^^^^^^^^^^^^^^^^^ + +The distribute command syntax is as following:: + + distribute using + distribute
using from
[,
, ...] + distribute
as reference table + +When using the distribute command, the following steps are added to pgloader +operations when migrating the schema: + + - if the distribution column does not exist in the table, it is added as + the first column of the table + + - if the distribution column does not exists in the primary key of the + table, it is added as the first column of the primary of the table + + - all the foreign keys that point to the table are added the distribution + key automatically too, including the source tables of the foreign key + constraints + + - once the schema has been created on the target database, pgloader then + issues Citus specific command `create_reference_table() + `_ + and `create_distributed_table() + `_ + to make the tables distributed + +Those operations are done in the schema section of pgloader, before the data +is loaded. When the data is loaded, the newly added columns need to be +backfilled from referenced data. pgloader knows how to do that by generating +a query like the following and importing the result set of such a query +rather than the raw data from the source table. + +Citus Migration Example +^^^^^^^^^^^^^^^^^^^^^^^ + +With the migration command as above, pgloader adds the column ``company_id`` +to the tables that have a direct or indirect foreign key reference to the +``companies`` table. + +We run pgloader using the following command, where the file +`./test/citus/company.load +`_ +contains the pgloader command as shown above. + +:: + + $ pgloader --client-min-messages sql ./test/citus/company.load + +The following SQL statements are all extracted from the log messages that +the pgloader command outputs. We are going to have a look at the +`impressions` table. It gets created with a new column `company_id` in the +first position, as follows: + +:: + + CREATE TABLE "public"."impressions" + ( + company_id bigint, + "id" bigserial, + "ad_id" bigint default NULL, + "seen_at" timestamp with time zone default NULL, + "site_url" text default NULL, + "cost_per_impression_usd" numeric(20,10) default NULL, + "user_ip" inet default NULL, + "user_data" jsonb default NULL + ); + +The original schema for this table does not have the `company_id` column, +which means pgloader now needs to change the primary key definition, the +foreign keys constraints definitions from and to this table, and also to +*backfill* the `company_id` data to this table when doing the COPY phase of +the migration. + +Then once the tables have been created, pgloader executes the following SQL +statements:: + + SELECT create_distributed_table('"public"."companies"', 'id'); + SELECT create_distributed_table('"public"."campaigns"', 'company_id'); + SELECT create_distributed_table('"public"."ads"', 'company_id'); + SELECT create_distributed_table('"public"."clicks"', 'company_id'); + SELECT create_distributed_table('"public"."impressions"', 'company_id'); + +Then when copying the data from the source PostgreSQL database to the new +Citus tables, the new column (here ``company_id``) needs to be backfilled +from the source tables. Here's the SQL query that pgloader uses as a data +source for the ``ads`` table in our example: + +:: + + SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text, + "ads".name::text, "ads".image_url::text, "ads".target_url::text, + "ads".impressions_count::text, "ads".clicks_count::text, + "ads".created_at::text, "ads".updated_at::text + + FROM "public"."ads" + JOIN "public"."campaigns" + ON ads.campaign_id = campaigns.id + +The ``impressions`` table has an indirect foreign key reference to the +``company`` table, which is the table where the distribution key is +specified. pgloader will discover that itself from walking the PostgreSQL +catalogs, and you may also use the following specification in the pgloader +command to explicitely add the indirect dependency: + +:: + + distribute impressions using company_id from ads, campaigns + +Given this schema, the SQL query used by pgloader to fetch the data for the +`impressions` table is the following, implementing online backfilling of the +data: + +:: + + SELECT "campaigns".company_id::text, "impressions".id::text, + "impressions".ad_id::text, "impressions".seen_at::text, + "impressions".site_url::text, + "impressions".cost_per_impression_usd::text, + "impressions".user_ip::text, + "impressions".user_data::text + + FROM "public"."impressions" + + JOIN "public"."ads" + ON impressions.ad_id = ads.id + + JOIN "public"."campaigns" + ON ads.campaign_id = campaigns.id + +When the data copying is done, then pgloader also has to install the indexes +supporting the primary keys, and add the foreign key definitions to the +schema. Those definitions are not the same as in the source schema, because +of the adding of the distribution column to the table: we need to also add +the column to the primary key and the foreign key constraints. + +Here's the commands issued by pgloader for the ``impressions`` table: + +:: + + CREATE UNIQUE INDEX "impressions_pkey" + ON "public"."impressions" (company_id, id); + + ALTER TABLE "public"."impressions" + ADD CONSTRAINT "impressions_ad_id_fkey" + FOREIGN KEY(company_id,ad_id) + REFERENCES "public"."ads"(company_id,id) + +Given a single line of specification ``distribute companies using id`` then +pgloader implements all the necessary schema changes on the fly when +migrating to Citus, and also dynamically backfills the data. + +Citus Migration: Limitations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The way pgloader implements *reset sequence* does not work with Citus at +this point, so sequences need to be taken care of separately at this point. diff --git a/docs/ref/pgsql-redshift.rst b/docs/ref/pgsql-redshift.rst new file mode 100644 index 0000000..09d73e1 --- /dev/null +++ b/docs/ref/pgsql-redshift.rst @@ -0,0 +1,70 @@ +Support for Redshift in pgloader +================================ + +The command and behavior are the same as when migration from a PostgreSQL +database source. pgloader automatically discovers that it's talking to a +Redshift database by parsing the output of the `SELECT version()` SQL query. + +Redhift as a data source +^^^^^^^^^^^^^^^^^^^^^^^^ + +Redshit is a variant of PostgreSQL version 8.0.2, which allows pgloader to +work with only a very small amount of adaptation in the catalog queries +used. In other words, migrating from Redshift to PostgreSQL works just the +same as when migrating from a PostgreSQL data source, including the +connection string specification. + +Redshift as a data destination +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Redshift variant of PostgreSQL 8.0.2 does not have support for the +``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY +with Redshift, the data must first be made available in an S3 bucket. + +First, pgloader must authenticate to Amazon S3. pgloader uses the following +setup for that: + + - ``~/.aws/config`` + + This INI formatted file contains sections with your default region and + other global values relevant to using the S3 API. pgloader parses it to + get the region when it's setup in the ``default`` INI section. + + The environment variable ``AWS_DEFAULT_REGION`` can be used to override + the configuration file value. + + - ``~/.aws/credentials`` + + The INI formatted file contains your authentication setup to Amazon, + with the properties ``aws_access_key_id`` and ``aws_secret_access_key`` + in the section ``default``. pgloader parses this file for those keys, + and uses their values when communicating with Amazon S3. + + The environment variables ``AWS_ACCESS_KEY_ID`` and + ``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file + + - ``AWS_S3_BUCKET_NAME`` + + Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is + used by pgloader as the name of the S3 bucket where to upload the files + to COPY to the Redshift database. The bucket name defaults to + ``pgloader``. + +Then pgloader works as usual, see the other sections of the documentation +for the details, depending on the data source (files, other databases, etc). +When preparing the data for PostgreSQL, pgloader now uploads each batch into +a single CSV file, and then issue such as the following, for each batch: + +:: + + COPY + FROM 's3:///' + FORMAT CSV + TIMEFORMAT 'auto' + REGION '' + ACCESS_KEY_ID '' + SECRET_ACCESS_KEY '; + +This is the only difference with a PostgreSQL core version, where pgloader +can rely on the classic ``COPY FROM STDIN`` command, which allows to send +data through the already established connection to PostgreSQL. diff --git a/docs/ref/pgsql.rst b/docs/ref/pgsql.rst new file mode 100644 index 0000000..06dd406 --- /dev/null +++ b/docs/ref/pgsql.rst @@ -0,0 +1,408 @@ +Migrating a PostgreSQL Database to PostgreSQL +============================================= + +This command instructs pgloader to load data from a database connection. +Automatic discovery of the schema is supported, including build of the +indexes, primary and foreign keys constraints. A default set of casting +rules are provided and might be overloaded and appended to by the command. + +Here's a short example of migrating a database from a PostgreSQL server to +another: + +:: + + load database + from pgsql://localhost/pgloader + into pgsql://localhost/copy + + including only table names matching 'bits', ~/utilisateur/ in schema 'mysql' + including only table names matching ~/geolocations/ in schema 'public' + ; + +PostgreSQL Database Source Specification: FROM +---------------------------------------------- + +Must be a connection URL pointing to a PostgreSQL database. + +See the `SOURCE CONNECTION STRING` section above for details on how to write +the connection string. + +:: + + pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...] + + +PostgreSQL Database Migration Options: WITH +------------------------------------------- + +When loading from a `PostgreSQL` database, the following options are +supported, and the default *WITH* clause is: *no truncate*, *create schema*, +*create tables*, *include drop*, *create indexes*, *reset sequences*, +*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*. + + - *include drop* + + When this option is listed, pgloader drops all the tables in the target + PostgreSQL database whose names appear in the MySQL database. This + option allows for using the same command several times in a row until + you figure out all the options, starting automatically from a clean + environment. Please note that `CASCADE` is used to ensure that tables + are dropped even if there are foreign keys pointing to them. This is + precisely what `include drop` is intended to do: drop all target tables + and recreate them. + + Great care needs to be taken when using `include drop`, as it will + cascade to *all* objects referencing the target tables, possibly + including other tables that are not being loaded from the source DB. + + - *include no drop* + + When this option is listed, pgloader will not include any `DROP` + statement when loading the data. + + - *truncate* + + When this option is listed, pgloader issue the `TRUNCATE` command + against each PostgreSQL table just before loading data into it. + + - *no truncate* + + When this option is listed, pgloader issues no `TRUNCATE` command. + + - *disable triggers* + + When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE + TRIGGER ALL` command against the PostgreSQL target table before copying + the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the + `COPY` is done. + + This option allows loading data into a pre-existing table ignoring the + *foreign key constraints* and user defined triggers and may result in + invalid *foreign key constraints* once the data is loaded. Use with + care. + + - *create tables* + + When this option is listed, pgloader creates the table using the meta + data found in the `MySQL` file, which must contain a list of fields with + their data type. A standard data type conversion from DBF to PostgreSQL + is done. + + - *create no tables* + + When this option is listed, pgloader skips the creation of table before + loading data, target tables must then already exist. + + Also, when using *create no tables* pgloader fetches the metadata from + the current target database and checks type casting, then will remove + constraints and indexes prior to loading the data and install them back + again once the loading is done. + + - *create indexes* + + When this option is listed, pgloader gets the definitions of all the + indexes found in the MySQL database and create the same set of index + definitions against the PostgreSQL database. + + - *create no indexes* + + When this option is listed, pgloader skips the creating indexes. + + - *drop indexes* + + When this option is listed, pgloader drops the indexes in the target + database before loading the data, and creates them again at the end + of the data copy. + + - *reindex* + + When this option is used, pgloader does both *drop indexes* before + loading the data and *create indexes* once data is loaded. + + - *drop schema* + + When this option is listed, pgloader drops the target schema in the + target PostgreSQL database before creating it again and all the objects + it contains. The default behavior doesn't drop the target schemas. + + - *foreign keys* + + When this option is listed, pgloader gets the definitions of all the + foreign keys found in the MySQL database and create the same set of + foreign key definitions against the PostgreSQL database. + + - *no foreign keys* + + When this option is listed, pgloader skips creating foreign keys. + + - *reset sequences* + + When this option is listed, at the end of the data loading and after the + indexes have all been created, pgloader resets all the PostgreSQL + sequences created to the current maximum value of the column they are + attached to. + + The options *schema only* and *data only* have no effects on this + option. + + - *reset no sequences* + + When this option is listed, pgloader skips resetting sequences after the + load. + + The options *schema only* and *data only* have no effects on this + option. + + - *downcase identifiers* + + When this option is listed, pgloader converts all MySQL identifiers + (table names, index names, column names) to *downcase*, except for + PostgreSQL *reserved* keywords. + + The PostgreSQL *reserved* keywords are determined dynamically by using + the system function `pg_get_keywords()`. + + - *quote identifiers* + + When this option is listed, pgloader quotes all MySQL identifiers so + that their case is respected. Note that you will then have to do the + same thing in your application code queries. + + - *schema only* + + When this option is listed pgloader refrains from migrating the data + over. Note that the schema in this context includes the indexes when the + option *create indexes* has been listed. + + - *data only* + + When this option is listed pgloader only issues the `COPY` statements, + without doing any other processing. + + - *rows per range* + + How many rows are fetched per `SELECT` query when using *multiple + readers per thread*, see above for details. + +PostgreSQL Database Casting Rules +--------------------------------- + +The command *CAST* introduces user-defined casting rules. + +The cast clause allows to specify custom casting rules, either to overload +the default casting rules or to amend them with special cases. + +A casting rule is expected to follow one of the forms:: + + type [ ... ] to [