From 46d14af0d38b26b066876ecf45a2569da31f1d7e Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 8 Jul 2018 20:34:55 +0200
Subject: [PATCH 01/69] Add more default rules to MySQL datetime handling.

Given the variety of ways to setup default behavior for datetime and
timestamp data types in MySQL, we need yet more default casting rules. It
might be time to think about a more principled way to solve the problem, but
on the other hand, this ad-hoc one also comes with full overriding
flexibility for the end user.

Fixes #811.
---
 src/sources/mysql/mysql-cast-rules.lisp |  8 ++++++++
 test/mysql/my.sql                       | 18 ++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/sources/mysql/mysql-cast-rules.lisp b/src/sources/mysql/mysql-cast-rules.lisp
index e53daa3..f05ecde 100644
--- a/src/sources/mysql/mysql-cast-rules.lisp
+++ b/src/sources/mysql/mysql-cast-rules.lisp
@@ -119,6 +119,10 @@
      :target (:type "timestamptz" :drop-default t :drop-not-null t)
      :using pgloader.transforms::zero-dates-to-null)
 
+    (:source (:type "datetime" :on-update-current-timestamp t :not-null nil)
+     :target (:type "timestamptz" :drop-default t)
+     :using pgloader.transforms::zero-dates-to-null)
+
     (:source (:type "timestamp" :default "0000-00-00 00:00:00" :not-null t)
      :target (:type "timestamptz" :drop-default t :drop-not-null t)
      :using pgloader.transforms::zero-dates-to-null)
@@ -131,6 +135,10 @@
      :target (:type "timestamptz" :drop-default t :drop-not-null t)
      :using pgloader.transforms::zero-dates-to-null)
 
+    (:source (:type "timestamp" :on-update-current-timestamp t :not-null nil)
+     :target (:type "timestamptz" :drop-default t)
+     :using pgloader.transforms::zero-dates-to-null)
+
     (:source (:type "date" :default "0000-00-00")
      :target (:type "date" :drop-default t)
      :using pgloader.transforms::zero-dates-to-null)
diff --git a/test/mysql/my.sql b/test/mysql/my.sql
index 17cd3b6..a9daefd 100644
--- a/test/mysql/my.sql
+++ b/test/mysql/my.sql
@@ -100,6 +100,24 @@ create table bits
 
 insert into bits(bool) values(0b00), (0b01);
 
+/*
+ * https://github.com/dimitri/pgloader/issues/811
+ */
+CREATE TABLE `domain_filter` (
+  `id` binary(16) NOT NULL ,
+  `type` varchar(50) NOT NULL ,
+  `value` json DEFAULT NULL ,
+  `negated` tinyint(1) NOT NULL DEFAULT '0' ,
+  `report_id` varbinary(255) NOT NULL ,
+  `query_id` varchar(255) NOT NULL ,
+  `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ,
+  `updated_at` datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP ,
+  `updated_by` varbinary(255) DEFAULT NULL ,
+  PRIMARY KEY (`id`),
+  UNIQUE KEY `domain_filter_unq` (`report_id`,`query_id`,`type`),
+  KEY `domain_filter` (`type`)
+) ENGINE=InnoDB DEFAULT CHARSET=ascii;
+
 /*
  * https://github.com/dimitri/pgloader/issues/703
  */

From 5ca3ee8aad1d70ce4b3cc5b05bc7bd2f88f32010 Mon Sep 17 00:00:00 2001
From: alexknips <alex@kariusdx.com>
Date: Fri, 20 Jul 2018 14:38:06 +0200
Subject: [PATCH 02/69] Fix documentation of default MySQL cast rules (#815)

The default rule is `type int to bigint    when  (>= 10 precision)`.
---
 docs/ref/mysql.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst
index 1957944..dec33a9 100644
--- a/docs/ref/mysql.rst
+++ b/docs/ref/mysql.rst
@@ -556,7 +556,7 @@ Numbers::
   type int with extra auto_increment to serial when (< precision 10)
   type int with extra auto_increment to bigserial when (<= 10 precision)
   type int to int       when  (< precision 10)
-  type int to bigint    when  (<= 10 precision)
+  type int to bigint    when  (>= 10 precision)
   type tinyint   with extra auto_increment to serial
   type smallint  with extra auto_increment to serial
   type mediumint with extra auto_increment to serial

From 34cc25383ac3729c9c7ebcd06ab068ee24ae55f4 Mon Sep 17 00:00:00 2001
From: uniquestring <36343026+uniquestring@users.noreply.github.com>
Date: Sat, 11 Aug 2018 01:08:00 +0200
Subject: [PATCH 03/69] Improved Dockerfiles/docker image size (#821)

* Add dockerfiles to .dockerignore

Otherwise changes in the dockerfiles would invalidate the cache

* Rewrite Dockerfile

- Fix deprecated MAINTAINER instruction
- Move maintainer label to the bottom (improving cache)
- Tidy up apt-get
- Use COPY instead of ADD
  see https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#add-or-copy
- Remove WORKDIR instruction (we don't really need this)
- Combine remaining RUN layers to reduce layer count
- Move final binary instead of copying (reduce image size)

* Use -slim image an multistage build

Reduce size by using multistage builds and the -slim image.
Use debian:stable instead of an specific code name (future proof).

* [cosmetic] indent Dockerfile instructions

Make it easier to see where a new build stage begins

* Rewrite Dockerfile.ccl

Apply the same changes to Dockerfile.ccl as we did for Dockerfile
---
 .dockerignore  |  2 ++
 Dockerfile     | 57 +++++++++++++++++++++++++++++++-------------
 Dockerfile.ccl | 64 ++++++++++++++++++++++++++++++++++----------------
 3 files changed, 87 insertions(+), 36 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index d075b3e..6be6907 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,3 +1,5 @@
 .git
 .vagrant
 build
+Dockerfile
+Dockerfile.ccl
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 6fc43dc..0500aa2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,45 @@
-FROM debian:stretch
-MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
+FROM debian:stable-slim as builder
 
-RUN apt-get update                                   && \
-    apt-get install -y --no-install-recommends          \
-                    wget curl make git bzip2 time       \
-                    ca-certificates                     \
-                    libzip-dev libssl1.1 openssl        \
-                    patch unzip libsqlite3-dev gawk     \
-                    freetds-dev sbcl                 && \
-    rm -rf /var/lib/apt/lists/*
+  RUN apt-get update \
+      && apt-get install -y --no-install-recommends \
+        bzip2 \
+        ca-certificates \
+        curl \
+        freetds-dev \
+        gawk \
+        git \
+        libsqlite3-dev \
+        libssl1.1 \
+        libzip-dev \
+        make \
+        openssl \
+        patch \
+        sbcl \
+        time \
+        unzip \
+        wget \
+      && rm -rf /var/lib/apt/lists/*
 
-ADD ./ /opt/src/pgloader
-WORKDIR /opt/src/pgloader
+  COPY ./ /opt/src/pgloader
 
-# build/ is in the .dockerignore file, but we actually need it now
-RUN mkdir -p build/bin
-RUN make
+  RUN mkdir -p /opt/src/pgloader/build/bin \
+      && cd /opt/src/pgloader \
+      && make
 
-RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
+FROM debian:stable-slim
+
+  RUN apt-get update \
+      && apt-get install -y --no-install-recommends \
+        curl \
+        freetds-dev \
+        gawk \
+        libsqlite3-dev \
+        libzip-dev \
+        make \
+        sbcl \
+        unzip \
+      && rm -rf /var/lib/apt/lists/*
+
+  COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
+
+  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
\ No newline at end of file
diff --git a/Dockerfile.ccl b/Dockerfile.ccl
index a33f8c9..f88468a 100644
--- a/Dockerfile.ccl
+++ b/Dockerfile.ccl
@@ -1,25 +1,49 @@
-FROM debian:stretch
-MAINTAINER Dimitri Fontaine <dim@tapoueh.org>
+FROM debian:stable-slim as builder
 
-RUN apt-get update                                   && \
-    apt-get install -y --no-install-recommends          \
-                    wget curl make git bzip2 time       \
-                    ca-certificates                     \
-                    libzip-dev libssl1.1 openssl        \
-                    patch unzip libsqlite3-dev gawk     \
-                    freetds-dev sbcl                 && \
-    rm -rf /var/lib/apt/lists/*
+  RUN apt-get update \
+      && apt-get install -y --no-install-recommends \
+        bzip2 \
+        ca-certificates \
+        curl \
+        freetds-dev \
+        gawk \
+        git \
+        libsqlite3-dev \
+        libssl1.1 \
+        libzip-dev \
+        make \
+        openssl \
+        patch \
+        sbcl \
+        time \
+        unzip \
+        wget \
+      && rm -rf /var/lib/apt/lists/*
 
-WORKDIR /usr/local/src
-RUN curl --location -O https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz
-RUN tar xf ccl-1.11.5-linuxx86.tar.gz
-RUN cp /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
+  RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \
+      | tar xz -C /usr/local/src/ \
+      && mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
 
-ADD ./ /opt/src/pgloader
-WORKDIR /opt/src/pgloader
+  COPY ./ /opt/src/pgloader
 
-# build/ is in the .dockerignore file, but we actually need it now
-RUN mkdir -p build/bin
-RUN make CL=ccl DYNSIZE=256
+  RUN mkdir -p /opt/src/pgloader/build/bin \
+      && cd /opt/src/pgloader \
+      && make CL=ccl DYNSIZE=256
 
-RUN cp /opt/src/pgloader/build/bin/pgloader /usr/local/bin
+FROM debian:stable-slim
+
+  RUN apt-get update \
+      && apt-get install -y --no-install-recommends \
+        curl \
+        freetds-dev \
+        gawk \
+        libsqlite3-dev \
+        libzip-dev \
+        make \
+        sbcl \
+        unzip \
+      && rm -rf /var/lib/apt/lists/*
+
+  COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
+
+  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
\ No newline at end of file

From 1ee389d1210f207dcc66ab704db5ccb6de897246 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 14 Aug 2018 10:06:45 +0300
Subject: [PATCH 04/69] Fix parsing empty hostname fields in pgpass.

Fixes #823.
---
 src/parsers/parse-pgpass.lisp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/parsers/parse-pgpass.lisp b/src/parsers/parse-pgpass.lisp
index 3eaaa7e..82efbee 100644
--- a/src/parsers/parse-pgpass.lisp
+++ b/src/parsers/parse-pgpass.lisp
@@ -19,11 +19,11 @@
                                  (pgpass-char-p character))))
   (:lambda (e) (text e)))
 
-(defrule pgpass-line (and pgpass-entry #\: pgpass-entry #\:
+(defrule pgpass-line (and (? pgpass-entry) #\: pgpass-entry #\:
                           pgpass-entry #\: pgpass-entry #\:
                           (? pgpass-entry))
   (:lambda (pl)
-    (make-pgpass :hostname (first pl)
+    (make-pgpass :hostname (or (first pl) "localhost")
                  :port (third pl)
                  :database (fifth pl)
                  :username (seventh pl)

From fc3a1949f74bbfbebdbc023d6e55dc15e5d6df33 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 20 Aug 2018 11:09:52 +0200
Subject: [PATCH 05/69] Add support for PostgreSQL as a source database.

It's now possible to use pgloader to migrate from PostgreSQL to PostgreSQL.
That might be useful for several reasons, including applying user defined
cast rules at COPY time, or just moving from an hosted solution to another.
---
 pgloader.asd                            |   9 +-
 src/load/migrate-database.lisp          |   6 +
 src/package.lisp                        |  29 ++++-
 src/parsers/command-parser.lisp         |   6 +-
 src/parsers/command-pgsql.lisp          | 159 ++++++++++++++++++++++++
 src/pgsql/pgsql-create-schema.lisp      |  25 ++--
 src/pgsql/pgsql-ddl.lisp                |  63 +++++++---
 src/pgsql/pgsql-schema.lisp             |  90 ++++++++++++--
 src/pgsql/sql/list-all-columns.sql      |  24 +++-
 src/pgsql/sql/list-all-extensions.sql   |   4 +
 src/pgsql/sql/list-all-sqltypes.sql     |  43 +++++++
 src/sources/pgsql/pgsql-cast-rules.lisp |  48 +++++++
 src/sources/pgsql/pgsql.lisp            |  90 ++++++++++++++
 src/sources/sqlite/sqlite.lisp          |   2 +-
 src/utils/catalog.lisp                  |  93 ++++++++++++--
 test/archive.load                       |   3 +-
 test/pgsql-source.load                  |   6 +
 17 files changed, 639 insertions(+), 61 deletions(-)
 create mode 100644 src/parsers/command-pgsql.lisp
 create mode 100644 src/pgsql/sql/list-all-extensions.sql
 create mode 100644 src/pgsql/sql/list-all-sqltypes.sql
 create mode 100644 src/sources/pgsql/pgsql-cast-rules.lisp
 create mode 100644 src/sources/pgsql/pgsql.lisp
 create mode 100644 test/pgsql-source.load

diff --git a/pgloader.asd b/pgloader.asd
index 55468e4..89db8c8 100644
--- a/pgloader.asd
+++ b/pgloader.asd
@@ -182,7 +182,13 @@
                                  ;;        :depends-on ("mysql-schema"))
                                  (:file "mysql"
                                         :depends-on ("mysql-cast-rules"
-                                                     "mysql-schema"))))))
+                                                     "mysql-schema"))))
+
+                       (:module "pgsql"
+                                :serial t
+                                :depends-on ("common")
+                                :components ((:file "pgsql-cast-rules")
+                                             (:file "pgsql")))))
 
              ;; package pgloader.copy
              (:module "pg-copy"
@@ -247,6 +253,7 @@
                        (:file "command-including-like")
                        (:file "command-mssql")
                        (:file "command-sqlite")
+                       (:file "command-pgsql")
                        (:file "command-archive")
                        (:file "command-parser")
                        (:file "parse-sqlite-type-name")
diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index e4c39bf..044d931 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -46,6 +46,12 @@
           (with-stats-collection ("Create SQL Types" :section :pre
                                                      :use-result-as-read t
                                                      :use-result-as-rows t)
+            ;; some SQL types come from extensions (ip4r, hstore, etc)
+            (create-extensions catalog
+                               :include-drop include-drop
+                               :if-not-exists t
+                               :client-min-messages :error)
+
             (create-sqltypes catalog
                              :include-drop include-drop
                              :client-min-messages :error))
diff --git a/src/package.lisp b/src/package.lisp
index 1cde4df..8e81cdf 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -49,8 +49,9 @@
 
            #:catalog
            #:schema
-           #:table
+           #:extension
            #:sqltype
+           #:table
            #:column
            #:index
            #:fkey
@@ -82,6 +83,8 @@
            #:schema-source-name
            #:schema-table-list
            #:schema-view-list
+           #:schema-extension-list
+           #:schema-sqltype-list
            #:schema-in-search-path
 
            #:table-name
@@ -96,11 +99,15 @@
            #:table-fkey-list
            #:table-trigger-list
 
+           #:extension-name
+           #:extension-schema
+
            #:sqltype-name
            #:sqltype-schema
            #:sqltype-type
            #:sqltype-source-def
            #:sqltype-extra
+           #:sqltype-extension
 
            #:column-name
            #:column-type-name
@@ -110,6 +117,7 @@
            #:column-comment
            #:column-transform
            #:column-extra
+           #:column-transform-default
 
            #:index-name
            #:index-type
@@ -152,9 +160,15 @@
 
            #:table-list
            #:view-list
+           #:extension-list
+           #:sqltype-list
            #:add-schema
            #:find-schema
            #:maybe-add-schema
+           #:add-extension
+           #:find-extension
+           #:maybe-add-extension
+           #:add-sqltype
            #:add-table
            #:find-table
            #:maybe-add-table
@@ -389,6 +403,7 @@
 	   #:truncate-tables
            #:set-table-oids
 
+           #:create-extensions
            #:create-sqltypes
 	   #:create-schemas
            #:add-to-search-path
@@ -417,6 +432,7 @@
            #:process-index-definitions
 
            ;; postgresql introspection queries
+           #:list-all-sqltypes
 	   #:list-all-columns
 	   #:list-all-indexes
 	   #:list-all-fkeys
@@ -674,6 +690,14 @@
 	   #:*mysql-default-cast-rules*
            #:with-mysql-connection))
 
+(defpackage #:pgloader.source.pgsql
+  (:use #:cl
+        #:pgloader.params #:pgloader.utils #:pgloader.connection
+        #:pgloader.sources #:pgloader.pgsql #:pgloader.catalog)
+  (:import-from #:pgloader.transforms #:precision #:scale)
+  (:export #:copy-pgsql
+           #:*pgsql-default-cast-rules*))
+
 (defpackage #:pgloader.source.sqlite
   (:use #:cl
         #:pgloader.params #:pgloader.utils #:pgloader.connection
@@ -763,6 +787,9 @@
   (:import-from #:pgloader.source.copy
                 #:copy-copy
                 #:copy-connection)
+  (:import-from #:pgloader.source.pgsql
+                #:copy-pgsql
+                #:*pgsql-default-cast-rules*)
   (:import-from #:pgloader.source.mysql
                 #:copy-mysql
                 #:mysql-connection
diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp
index 57e244d..1484b0a 100644
--- a/src/parsers/command-parser.lisp
+++ b/src/parsers/command-parser.lisp
@@ -17,6 +17,7 @@
                           load-copy-file
 			  load-dbf-file
                           load-ixf-file
+                          load-pgsql-database
 			  load-mysql-database
                           load-mssql-database
 			  load-sqlite-database
@@ -160,12 +161,12 @@
     (declare (ignore abs paths no-path-p))
     (let ((dotted-parts (reverse (sq:split-sequence #\. filename))))
       (when (<= 2 (length dotted-parts))
-        (destructuring-bind (extension name-or-ext &rest parts)
+        (destructuring-bind (ext name-or-ext &rest parts)
             dotted-parts
           (declare (ignore parts))
           (if (string-equal "tar" name-or-ext) :archive
               (loop :for (type . extensions) :in *data-source-filename-extensions*
-                 :when (member extension extensions :test #'string-equal)
+                 :when (member ext extensions :test #'string-equal)
                  :return type)))))))
 
 (defvar *parse-rule-for-source-types*
@@ -266,6 +267,7 @@
                         (:dbf    'dbf-option)
                         (:ixf    'ixf-option)
                         (:sqlite 'sqlite-option)
+                        (:pgsql  'pgsql-option)
                         (:mysql  'mysql-option)
                         (:mssql  'mysql-option))
                       option))))
diff --git a/src/parsers/command-pgsql.lisp b/src/parsers/command-pgsql.lisp
new file mode 100644
index 0000000..2a09fd7
--- /dev/null
+++ b/src/parsers/command-pgsql.lisp
@@ -0,0 +1,159 @@
+;;;
+;;; Parse the pgloader commands grammar
+;;;
+
+(in-package :pgloader.parser)
+
+;;;
+;;; PostgreSQL options
+;;;
+(defrule pgsql-option (or option-on-error-stop
+                          option-on-error-resume-next
+                          option-workers
+                          option-concurrency
+                          option-batch-rows
+                          option-batch-size
+                          option-prefetch-rows
+                          option-max-parallel-create-index
+			  option-reindex
+                          option-truncate
+                          option-disable-triggers
+			  option-data-only
+			  option-schema-only
+			  option-include-drop
+                          option-drop-schema
+			  option-create-tables
+			  option-create-indexes
+			  option-index-names
+			  option-reset-sequences
+			  option-foreign-keys
+			  option-identifiers-case))
+
+(defrule pgsql-options (and kw-with
+                            (and pgsql-option (* (and comma pgsql-option))))
+  (:function flatten-option-list))
+
+
+;;;
+;;; Including only some tables or excluding some others
+;;;
+(defrule including-matching-in-schema-filter
+    (and kw-including kw-only kw-table kw-names kw-matching filter-list-matching
+         kw-in kw-schema quoted-namestring)
+  (:lambda (source)
+    (bind (((_ _ _ _ _ filter-list _ _ schema) source))
+      (cons schema filter-list))))
+
+(defrule including-matching-in-schema
+    (and including-in-schema (* including-in-schema))
+  (:lambda (source)
+    (destructuring-bind (inc1 incs) source
+      (cons :including (list* inc1 incs)))))
+
+(defrule excluding-matching-in-schema-filter
+    (and kw-excluding kw-table kw-names kw-matching filter-list-matching
+         kw-in kw-schema quoted-namestring)
+  (:lambda (source)
+    (bind (((_ _ _ _ filter-list _ _ schema) source))
+      (cons schema filter-list))))
+
+(defrule excluding-matching-in-schema
+    (and excluding-in-schema (* excluding-in-schema))
+  (:lambda (source)
+    (destructuring-bind (excl1 excls) source
+      (cons :excluding (list* excl1 excls)))))
+
+
+;;;
+;;; Allow clauses to appear in any order
+;;;
+(defrule load-pgsql-optional-clauses (* (or pgsql-options
+                                            gucs
+                                            casts
+                                            alter-table
+                                            alter-schema
+                                            materialize-views
+                                            including-matching-in-schema
+                                            excluding-matching-in-schema
+                                            decoding-tables-as
+                                            before-load
+                                            after-load))
+  (:lambda (clauses-list)
+    (alexandria:alist-plist clauses-list)))
+
+(defrule pgsql-source (and kw-load kw-database kw-from pgsql-uri)
+  (:lambda (source) (bind (((_ _ _ uri) source)) uri)))
+
+(defrule load-pgsql-command (and pgsql-source target
+                                 load-pgsql-optional-clauses)
+  (:lambda (command)
+    (destructuring-bind (source target clauses) command
+      `(,source ,target ,@clauses))))
+
+
+;;; LOAD DATABASE FROM pgsql://
+(defun lisp-code-for-pgsql-dry-run (pg-src-db-conn pg-dst-db-conn)
+  `(lambda ()
+     (log-message :log "DRY RUN, only checking connections.")
+     (check-connection ,pg-src-db-conn)
+     (check-connection ,pg-dst-db-conn)))
+
+(defun lisp-code-for-loading-from-pgsql (pg-src-db-conn pg-dst-db-conn
+                                         &key
+                                           gucs
+                                           casts before after options
+                                           alter-table alter-schema
+                                           ((:including incl))
+                                           ((:excluding excl))
+                                           ((:decoding decoding-as))
+                                           &allow-other-keys)
+  `(lambda ()
+     (let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
+            (*cast-rules*         ',casts)
+            (*identifier-case*    :quote)
+            (on-error-stop        (getf ',options :on-error-stop t))
+            ,@(pgsql-connection-bindings pg-dst-db-conn gucs)
+            ,@(batch-control-bindings options)
+            (source
+             (make-instance 'copy-pgsql
+                            :target-db ,pg-dst-db-conn
+                            :source-db ,pg-src-db-conn)))
+
+       ,(sql-code-block pg-dst-db-conn :pre before "before load")
+
+       (copy-database source
+                      :including ',incl
+                      :excluding ',excl
+                      :alter-table ',alter-table
+                      :alter-schema ',alter-schema
+                      :index-names :preserve
+                      :set-table-oids t
+                      :on-error-stop on-error-stop
+                      ,@(remove-batch-control-option options))
+
+       ,(sql-code-block pg-dst-db-conn :post after "after load"))))
+
+(defrule load-pgsql-database load-pgsql-command
+  (:lambda (source)
+    (destructuring-bind (pg-src-db-uri
+                         pg-dst-db-uri
+                         &key
+                         gucs casts before after options
+                         alter-table alter-schema
+                         including excluding decoding)
+        source
+      (cond (*dry-run*
+             (lisp-code-for-pgsql-dry-run pg-src-db-uri pg-dst-db-uri))
+            (t
+             (lisp-code-for-loading-from-pgsql pg-src-db-uri pg-dst-db-uri
+                                               :gucs gucs
+                                               :casts casts
+                                               :before before
+                                               :after after
+                                               :options options
+                                               :alter-table alter-table
+                                               :alter-schema alter-schema
+                                               :including including
+                                               :excluding excluding
+                                               :decoding decoding))))))
+
diff --git a/src/pgsql/pgsql-create-schema.lisp b/src/pgsql/pgsql-create-schema.lisp
index dd490ac..e6154e5 100644
--- a/src/pgsql/pgsql-create-schema.lisp
+++ b/src/pgsql/pgsql-create-schema.lisp
@@ -13,17 +13,7 @@
                           include-drop
                           (client-min-messages :notice))
   "Create the needed data types for given CATALOG."
-  (let ((sqltype-list))
-    ;; build the sqltype list
-    (loop :for table :in (append (table-list catalog)
-                                 (view-list catalog))
-       :do (loop :for column :in (table-column-list table)
-              :do (when (typep (column-type-name column) 'sqltype)
-                    (pushnew (column-type-name column) sqltype-list
-                             :test #'string-equal
-                             :key #'sqltype-name))))
-
-    ;; now create the types
+  (let ((sqltype-list (sqltype-list catalog)))
     (loop :for sqltype :in sqltype-list
        :when include-drop
        :count t
@@ -114,6 +104,19 @@
                                    :log-level log-level
                                    :client-min-messages client-min-messages)))))
 
+(defun create-extensions (catalog
+                          &key
+                            if-not-exists
+                            include-drop
+                            (client-min-messages :notice))
+  "Create all extensions from the given database CATALOG."
+  (let ((sql
+         (loop :for extension :in (extension-list catalog)
+            :when include-drop
+            :collect (format-drop-sql extension :if-exists t :cascade t)
+            :collect (format-create-sql extension :if-not-exists if-not-exists))))
+    (pgsql-execute sql :client-min-messages client-min-messages)))
+
 (defun create-tables (catalog
                       &key
 			if-not-exists
diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp
index fa29e27..580618e 100644
--- a/src/pgsql/pgsql-ddl.lisp
+++ b/src/pgsql/pgsql-ddl.lisp
@@ -38,6 +38,25 @@
           (sqltype-name sqltype)
           cascade))
 
+
+;;;
+;;; Extensions
+;;;
+(defmethod format-create-sql ((extension extension)
+                              &key (stream nil) if-not-exists)
+  (format stream "CREATE EXTENSION~:[~; IF NOT EXISTS~] ~a WITH SCHEMA ~a;"
+          if-not-exists
+          (extension-name extension)
+          (schema-name (extension-schema extension))))
+
+(defmethod format-drop-sql ((extension extension)
+                            &key (stream nil) cascade if-exists)
+  (format stream "DROP EXTENSION~:[~; IF EXISTS~] ~a~@[ CASCADE~];"
+          if-exists
+          (extension-name extension)
+          cascade))
+
+
 
 ;;;
 ;;; Tables
@@ -126,26 +145,30 @@
   "Common normalized default values and their PostgreSQL spelling.")
 
 (defmethod format-default-value ((column column) &key (stream nil))
-  (let* ((default       (column-default column))
-         (clean-default (cdr (assoc default *pgsql-default-values*)))
-         (transform     (column-transform column)))
-    (or clean-default
-        (if transform
-            (let* ((transformed-default
-                    (handler-case
-                        (funcall transform default)
-                      (condition (c)
-                        (log-message :warning
-                                     "Failed to transform default value ~s: ~a"
-                                     default c)
-                        ;; can't transform: return nil
-                        nil)))
-                   (transformed-column
-                    (make-column :default transformed-default)))
-              (format-default-value transformed-column))
-            (if default
-                (ensure-quoted default #\')
-                (format stream "NULL"))))))
+  (if (column-transform-default column)
+      (let* ((default       (column-default column))
+             (clean-default (cdr (assoc default *pgsql-default-values*)))
+             (transform     (column-transform column)))
+        (or clean-default
+            (if transform
+                (let* ((transformed-default
+                        (handler-case
+                            (funcall transform default)
+                          (condition (c)
+                            (log-message :warning
+                                         "Failed to transform default value ~s: ~a"
+                                         default c)
+                            ;; can't transform: return nil
+                            nil)))
+                       (transformed-column
+                        (make-column :default transformed-default)))
+                  (format-default-value transformed-column))
+                (if default
+                    (ensure-quoted default #\')
+                    (format stream "NULL")))))
+
+      ;; else, when column-transform-default is nil:
+      (column-default column)))
 
 
 ;;;
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index 8bdf158..e5ce1af 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -19,6 +19,10 @@
                           (t
                            including))))
 
+    (list-all-sqltypes catalog
+                       :including including
+                       :excluding excluding)
+
     (list-all-columns catalog
                       :table-type :table
                       :including including
@@ -116,18 +120,34 @@
   "Associate internal table type symbol with what's found in PostgreSQL
   pg_class.relkind column.")
 
-(defun filter-list-to-where-clause (filter-list
+(defun filter-list-to-where-clause (schema-filter-list
                                     &optional
                                       not
                                       (schema-col "table_schema")
                                       (table-col  "table_name"))
   "Given an INCLUDING or EXCLUDING clause, turn it into a PostgreSQL WHERE
    clause."
-  (loop :for (schema . table-name-list) :in filter-list
-     :append (mapcar (lambda (table-name)
-                       (format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
-                               schema-col schema table-col not table-name))
-                     table-name-list)))
+  (loop :for (schema . filter-list) :in schema-filter-list
+     :append (mapcar (lambda (filter)
+                       (typecase filter
+                         (string-match-rule
+                          (format nil "(~a = '~a' and ~a ~:[~;!~]= '~a')"
+                                  schema-col
+                                  schema
+                                  table-col
+                                  not
+                                  (string-match-rule-target filter)))
+                         (regex-match-rule
+                          (format nil "(~a = '~a' and ~a ~:[~;NOT ~]~~ '~a')"
+                                  schema-col
+                                  schema
+                                  table-col
+                                  not
+                                  (regex-match-rule-target filter)))))
+                     filter-list)))
+
+(defun normalize-extra (extra)
+  (cond ((string= "auto_increment" extra) :auto-increment)))
 
 (defun list-all-columns (catalog
                          &key
@@ -137,7 +157,8 @@
                          &aux
                            (table-type-name (cdr (assoc table-type *table-type*))))
   "Get the list of PostgreSQL column names per table."
-  (loop :for (schema-name table-name table-oid name type typmod notnull default)
+  (loop :for (schema-name table-name table-oid
+                          name type typmod notnull default extra)
      :in
      (query nil
             (format nil
@@ -160,7 +181,9 @@
                                     :type-name type
                                     :type-mod typmod
                                     :nullable (not notnull)
-                                    :default default)))
+                                    :default default
+                                    :transform-default nil
+                                    :extra (normalize-extra extra))))
        (add-field table field))
      :finally (return catalog)))
 
@@ -187,7 +210,7 @@
                 (tschema  (find-schema catalog table-schema))
                 (table    (find-table tschema table-name))
                 (pg-index
-                 (make-index :name name
+                 (make-index :name (ensure-quoted name)
                              :oid oid
                              :schema schema
                              :table table
@@ -195,8 +218,10 @@
                              :unique unique
                              :columns nil
                              :sql sql
-                             :conname (unless (eq :null conname) conname)
-                             :condef  (unless (eq :null condef)  condef))))
+                             :conname (unless (eq :null conname)
+                                        (ensure-quoted conname))
+                             :condef  (unless (eq :null condef)
+                                        condef))))
            (maybe-add-index table name pg-index :key #'index-name))
      :finally (return catalog)))
 
@@ -247,7 +272,7 @@
                   (fschema  (find-schema catalog fschema-name))
                   (ftable   (find-table fschema ftable-name))
                   (fk
-                   (make-fkey :name conname
+                   (make-fkey :name (ensure-quoted conname)
                               :oid conoid
                               :condef condef
                               :table table
@@ -355,3 +380,44 @@
                        (sql "/pgsql/list-table-oids-from-temp-table.sql"))))
          :do (setf (gethash name oidmap) oid)))
     oidmap))
+
+
+
+;;;
+;;; PostgreSQL specific support for extensions and user defined data types.
+;;;
+(defun list-all-sqltypes (catalog &key including excluding)
+  "Set the catalog's schema extension list and sqltype list"
+  (loop :for (schema-name extension-name type-name enum-values)
+     :in (query nil
+                (format nil
+                        (sql "/pgsql/list-all-sqltypes.sql")
+                        including       ; do we print the clause?
+                        (filter-list-to-where-clause including
+                                                     nil
+                                                     "n.nspname"
+                                                     "c.relname")
+                        excluding       ; do we print the clause?
+                        (filter-list-to-where-clause excluding
+                                                     nil
+                                                     "n.nspname"
+                                                     "c.relname")))
+     :do
+     (let* ((schema    (maybe-add-schema catalog schema-name))
+            (sqltype
+             (make-sqltype :name (ensure-quoted type-name)
+                           :schema schema
+                           :type (when enum-values :enum)
+                           :extra (when (and enum-values
+                                             (not (eq enum-values :null)))
+                                    (coerce enum-values 'list)))))
+
+       (if (and extension-name (not (eq :null extension-name)))
+           ;; then create extension will create the type
+           (maybe-add-extension schema extension-name)
+
+           ;; only create a specific entry for types that we need to create
+           ;; ourselves, when extension is not null "create extension" is
+           ;; going to take care of creating the type.
+           (add-sqltype schema sqltype)))
+     :finally (return catalog)))
diff --git a/src/pgsql/sql/list-all-columns.sql b/src/pgsql/sql/list-all-columns.sql
index d3223e1..8875c4d 100644
--- a/src/pgsql/sql/list-all-columns.sql
+++ b/src/pgsql/sql/list-all-columns.sql
@@ -3,17 +3,37 @@
 --         filter-list-to-where-clause for including
 --         excluding
 --         filter-list-to-where-clause for excluding
+with seqattr as
+ (
+   select adrelid, 
+          adnum,
+          adsrc,
+          case when adsrc ~~ 'nextval'
+               then (regexp_match(pg_get_expr(d.adbin, d.adrelid),
+                                  '''([^'']+)''')
+                    )[1]::regclass::oid
+               else null::oid
+           end as seqoid
+     from pg_attrdef d
+ )
     select nspname, relname, c.oid, attname,
            t.oid::regtype as type,
-           case when atttypmod > 0 then atttypmod - 4 else null end as typmod,
+           case when atttypmod > 0
+                then substring(format_type(t.oid, atttypmod) from '\d+(?:,\d+)?')
+                else null
+            end as typmod,
            attnotnull,
-           case when atthasdef then def.adsrc end as default
+           case when atthasdef then def.adsrc end as default,
+           case when s.seqoid is not null then 'auto_increment' end as extra
       from pg_class c
            join pg_namespace n on n.oid = c.relnamespace
            left join pg_attribute a on c.oid = a.attrelid
            join pg_type t on t.oid = a.atttypid and attnum > 0
            left join pg_attrdef def on a.attrelid = def.adrelid
                                    and a.attnum = def.adnum
+                                   and a.atthasdef
+           left join seqattr s on def.adrelid = s.adrelid
+                              and def.adnum = s.adnum
 
      where nspname !~~ '^pg_' and n.nspname <> 'information_schema'
            and relkind in (~{'~a'~^, ~})
diff --git a/src/pgsql/sql/list-all-extensions.sql b/src/pgsql/sql/list-all-extensions.sql
new file mode 100644
index 0000000..00a9aff
--- /dev/null
+++ b/src/pgsql/sql/list-all-extensions.sql
@@ -0,0 +1,4 @@
+select nspname, extname
+  from pg_extension e
+       join pg_namespace n on n.oid = e.extnamespace
+ where nspname !~ '^pg_';
diff --git a/src/pgsql/sql/list-all-sqltypes.sql b/src/pgsql/sql/list-all-sqltypes.sql
new file mode 100644
index 0000000..cfaf791
--- /dev/null
+++ b/src/pgsql/sql/list-all-sqltypes.sql
@@ -0,0 +1,43 @@
+--
+-- get user defined SQL types
+--
+  select nt.nspname,
+         extname,
+         typname,
+         case when enum.enumtypid is not null
+              then array_agg(enum.enumlabel order by enumsortorder)
+          end as enumvalues
+
+    from pg_class c
+         join pg_namespace n on n.oid = c.relnamespace
+         left join pg_attribute a on c.oid = a.attrelid and a.attnum > 0
+         join pg_type t on t.oid = a.atttypid
+         left join pg_namespace nt on nt.oid = t.typnamespace
+         left join pg_depend d on d.classid = 'pg_type'::regclass
+                              and d.refclassid = 'pg_extension'::regclass
+                              and d.objid = t.oid
+         left join pg_extension e on refobjid = e.oid
+         left join pg_enum enum on enum.enumtypid = t.oid
+
+   where nt.nspname !~~ '^pg_' and nt.nspname <> 'information_schema'
+         and n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
+         and c.relkind in ('r', 'f', 'p')
+           ~:[~*~;and (~{~a~^~&~10t or ~})~]
+           ~:[~*~;and (~{~a~^~&~10t and ~})~]
+         and
+           (   t.typrelid = 0
+            or
+               (select c.relkind = 'c'
+                 from pg_class c
+                where c.oid = t.typrelid)
+           )
+           and not exists
+             (
+                select 1
+                  from pg_type el
+                 where el.oid = t.typelem
+                   and el.typarray = t.oid
+              )
+
+group by nt.nspname, extname, typname, enumtypid
+order by nt.nspname, extname, typname, enumtypid;
diff --git a/src/sources/pgsql/pgsql-cast-rules.lisp b/src/sources/pgsql/pgsql-cast-rules.lisp
new file mode 100644
index 0000000..2ef0373
--- /dev/null
+++ b/src/sources/pgsql/pgsql-cast-rules.lisp
@@ -0,0 +1,48 @@
+;;;
+;;; Tools to handle PostgreSQL data type casting rules
+;;;
+
+(in-package :pgloader.source.pgsql)
+
+(defparameter *pgsql-default-cast-rules*
+  '((:source (:type "integer" :auto-increment t)
+     :target (:type "serial" :drop-default t))
+
+    (:source (:type "bigint" :auto-increment t)
+     :target (:type "bigserial" :drop-default t)))
+  "Data Type Casting to migrate from PostgtreSQL to PostgreSQL")
+
+(defmethod pgsql-column-ctype ((column column))
+  "Build the ctype definition from the PostgreSQL column information."
+  (let ((type-name (column-type-name column))
+        (type-mod  (unless (or (null (column-type-mod column))
+                               (eq :null (column-type-mod column)))
+                     (column-type-mod column))))
+    (format nil "~a~@[(~a)~]" type-name type-mod)))
+
+(defmethod cast ((field column) &key &allow-other-keys)
+  "Return the PostgreSQL type definition from the given PostgreSQL column
+   definition"
+  (with-slots (pgloader.catalog::name
+               pgloader.catalog::type-name
+               pgloader.catalog::type-mod
+               pgloader.catalog::nullable
+               pgloader.catalog::default
+               pgloader.catalog::comment
+               pgloader.catalog::transform
+               pgloader.catalog::extra)
+      field
+    (let* ((ctype (pgsql-column-ctype field))
+           (pgcol (apply-casting-rules nil
+                                       pgloader.catalog::name
+                                       pgloader.catalog::type-name
+                                       ctype
+                                       pgloader.catalog::default
+                                       pgloader.catalog::nullable
+                                       pgloader.catalog::extra)))
+      ;; re-install our instruction not to transform default value: it comes
+      ;; from PostgreSQL, and we trust it.
+      (setf (column-transform-default pgcol)
+            (column-transform-default field))
+
+      pgcol)))
diff --git a/src/sources/pgsql/pgsql.lisp b/src/sources/pgsql/pgsql.lisp
new file mode 100644
index 0000000..e8cab7b
--- /dev/null
+++ b/src/sources/pgsql/pgsql.lisp
@@ -0,0 +1,90 @@
+;;;
+;;; Read from a PostgreSQL database.
+;;;
+
+(in-package :pgloader.source.pgsql)
+
+(defclass copy-pgsql (db-copy) ()
+  (:documentation "pgloader PostgreSQL Data Source"))
+
+(defmethod initialize-instance :after ((source copy-pgsql) &key)
+  "Add a default value for transforms in case it's not been provided."
+  (let* ((transforms (when (slot-boundp source 'transforms)
+		       (slot-value source 'transforms))))
+    (when (and (slot-boundp source 'fields) (slot-value source 'fields))
+      ;; cast typically happens in copy-database in the schema structure,
+      ;; and the result is then copied into the copy-mysql instance.
+      (unless (and (slot-boundp source 'columns) (slot-value source 'columns))
+        (setf (slot-value source 'columns)
+              (mapcar #'cast (slot-value source 'fields))))
+
+      (unless transforms
+        (setf (slot-value source 'transforms)
+              (mapcar #'column-transform (slot-value source 'columns)))))))
+
+(defmethod map-rows ((pgsql copy-pgsql) &key process-row-fn)
+  "Extract PostgreSQL data and call PROCESS-ROW-FN function with a single
+   argument (a list of column values) for each row"
+  (let ((map-reader
+         ;;
+         ;; Build a Postmodern row reader that prepares a vector of strings
+         ;; and call PROCESS-ROW-FN with the vector as single argument.
+         ;;
+         (cl-postgres:row-reader (fields)
+           (let ((nb-cols (length fields)))
+             (loop :while (cl-postgres:next-row)
+                :do (let ((row (make-array nb-cols)))
+                      (loop :for i :from 0
+                         :for field :across fields
+                         :do (setf (aref row i)
+                                   (cl-postgres:next-field field)))
+                      (funcall process-row-fn row)))))))
+
+    (with-pgsql-connection ((source-db pgsql))
+      (let* ((cols   (mapcar #'column-name (fields pgsql)))
+             (sql
+              (format nil "SELECT ~{~s::text~^, ~} FROM ~s.~s" cols
+                      (schema-source-name (table-schema (source pgsql)))
+                      (table-source-name (source pgsql)))))
+        (cl-postgres:exec-query pomo:*database* sql map-reader)))))
+
+(defmethod fetch-metadata ((pgsql copy-pgsql)
+                           (catalog catalog)
+                           &key
+                             materialize-views
+                             only-tables
+                             create-indexes
+                             foreign-keys
+                             including
+                             excluding)
+  "PostgreSQL introspection to prepare the migration."
+  (declare (ignore materialize-views only-tables))
+  (with-stats-collection ("fetch meta data"
+                          :use-result-as-rows t
+                          :use-result-as-read t
+                          :section :pre)
+    (with-pgsql-transaction (:pgconn (source-db pgsql))
+      (list-all-sqltypes catalog
+                         :including including
+                         :excluding excluding)
+
+      (list-all-columns catalog
+                        :including including
+                        :excluding excluding)
+
+      (when create-indexes
+        (list-all-indexes catalog
+                          :including including
+                          :excluding excluding))
+
+      (when foreign-keys
+        (list-all-fkeys catalog
+                        :including including
+                        :excluding excluding))
+
+      ;; return how many objects we're going to deal with in total
+      ;; for stats collection
+      (+ (count-tables catalog) (count-indexes catalog))))
+
+  ;; be sure to return the catalog itself
+  catalog)
diff --git a/src/sources/sqlite/sqlite.lisp b/src/sources/sqlite/sqlite.lisp
index f6f97de..99e1ab7 100644
--- a/src/sources/sqlite/sqlite.lisp
+++ b/src/sources/sqlite/sqlite.lisp
@@ -96,7 +96,7 @@
   "Send the data in the SQLite column ordering."
   (mapcar #'apply-identifier-case (mapcar #'coldef-name (fields sqlite))))
 
-(defmethod fetch-metadata (sqlite catalog
+(defmethod fetch-metadata ((sqlite copy-sqlite) (catalog catalog)
                            &key
                              materialize-views
                              only-tables
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index c81758f..76a4857 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -43,25 +43,35 @@
 ;;; implemented in each source separately.
 ;;;
 (defstruct catalog name schema-list types-without-btree)
-(defstruct schema source-name name catalog table-list view-list in-search-path)
+
+(defstruct schema source-name name catalog in-search-path
+           table-list view-list extension-list sqltype-list)
+
 (defstruct table source-name name schema oid comment storage-parameter-list
            ;; field is for SOURCE
            ;; column is for TARGET
            field-list column-list index-list fkey-list trigger-list)
 
+;;;
+;;; When migrating from PostgreSQL to PostgreSQL we might have to install
+;;; extensions to have data type coverage.
+;;;
+(defstruct extension name schema)
+
 ;;;
 ;;; When migrating from another database to PostgreSQL some data types might
 ;;; need to be tranformed dynamically into User Defined Types: ENUMs, SET,
 ;;; etc.
 ;;;
-(defstruct sqltype name schema type source-def extra)
+(defstruct sqltype name schema type source-def extra extension)
 
 ;;;
 ;;; The generic PostgreSQL column that the CAST generic function is asked to
 ;;; produce, so that we know how to CREATE TABLEs in PostgreSQL whatever the
 ;;; source is.
 ;;;
-(defstruct column name type-name type-mod nullable default comment transform extra)
+(defstruct column name type-name type-mod nullable default comment
+           transform extra (transform-default t))
 
 ;;;
 ;;; Index and Foreign Keys
@@ -94,13 +104,18 @@
 ;;;
 ;;; Main data collection API
 ;;;
-(defgeneric add-schema  (object schema-name &key))
-(defgeneric add-table   (object table-name &key))
-(defgeneric add-view    (object view-name &key))
-(defgeneric add-column  (object column &key))
-(defgeneric add-index   (object index &key))
-(defgeneric add-fkey    (object fkey &key))
-(defgeneric add-comment (object comment &key))
+(defgeneric add-schema    (object schema-name &key))
+(defgeneric add-extension (object extension-name &key))
+(defgeneric add-table     (object table-name &key))
+(defgeneric add-view      (object view-name &key))
+(defgeneric add-sqltype   (object column &key))
+(defgeneric add-column    (object column &key))
+(defgeneric add-index     (object index &key))
+(defgeneric add-fkey      (object fkey &key))
+(defgeneric add-comment   (object comment &key))
+
+(defgeneric extension-list (object &key)
+  (:documentation "Return the list of extensions found in OBJECT."))
 
 (defgeneric table-list (object &key)
   (:documentation "Return the list of tables found in OBJECT."))
@@ -112,6 +127,10 @@
   (:documentation
    "Find a schema by SCHEMA-NAME in a catalog OBJECT and return the schema"))
 
+(defgeneric find-extension (object extension-name &key)
+  (:documentation
+   "Find an extension by EXTENSION-NAME in a schema OBJECT and return the table"))
+
 (defgeneric find-table (object table-name &key)
   (:documentation
    "Find a table by TABLE-NAME in a schema OBJECT and return the table"))
@@ -131,6 +150,9 @@
 (defgeneric maybe-add-schema (object schema-name &key)
   (:documentation "Add a new schema or return existing one."))
 
+(defgeneric maybe-add-extension (object extension-name &key)
+  (:documentation "Add a new extension or return existing one."))
+
 (defgeneric maybe-add-table (object table-name &key)
   (:documentation "Add a new table or return existing one."))
 
@@ -167,6 +189,35 @@
 ;;;
 ;;; Implementation of the methods
 ;;;
+(defmethod extension-list ((schema schema) &key)
+  "Return the list of extensions for SCHEMA."
+  (schema-extension-list schema))
+
+(defmethod extension-list ((catalog catalog) &key)
+  "Return the list of extensions for CATALOG."
+  (apply #'append (mapcar #'extension-list (catalog-schema-list catalog))))
+
+(defmethod sqltype-list ((column column) &key)
+  "Return the list of sqltypes for SCHEMA."
+  (when (typep (column-type-name column) 'sqltype)
+    (column-type-name column)))
+
+(defmethod sqltype-list ((table table) &key)
+  "Return the list of sqltypes for SCHEMA."
+  (apply #'append (mapcar #'sqltype-list (table-column-list table))))
+
+(defmethod sqltype-list ((schema schema) &key)
+  "Return the list of sqltypes for SCHEMA."
+  (append (schema-sqltype-list schema)
+          (apply #'append
+                 (mapcar #'sqltype-list (schema-table-list schema)))))
+
+(defmethod sqltype-list ((catalog catalog) &key)
+  "Return the list of sqltypes for CATALOG."
+  (remove-duplicates
+   (apply #'append (mapcar #'sqltype-list (catalog-schema-list catalog)))
+   :test #'string-equal :key #'sqltype-name))
+
 (defmethod table-list ((schema schema) &key)
   "Return the list of tables for SCHEMA."
   (schema-table-list schema))
@@ -212,6 +263,17 @@
                              :in-search-path in-search-path)))
     (push-to-end schema (catalog-schema-list catalog))))
 
+(defmethod add-extension ((schema schema) extension-name &key)
+  "Add EXTENSION-NAME to SCHEMA and return the new extension instance."
+  (let ((extension
+         (make-extension :name extension-name
+                         :schema schema)))
+    (push-to-end extension (schema-extension-list schema))))
+
+(defmethod add-sqltype ((schema schema) sqltype &key)
+  "Add SQLTYPE instance to SCHEMA and return SQLTYPE."
+  (push-to-end sqltype (schema-sqltype-list schema)))
+
 (defmethod add-table ((schema schema) table-name &key comment oid)
   "Add TABLE-NAME to SCHEMA and return the new table instance."
   (let ((table
@@ -238,6 +300,11 @@
   (find schema-name (catalog-schema-list catalog)
         :key #'schema-source-name :test 'string=))
 
+(defmethod find-extension ((schema schema) extension-name &key)
+  "Find EXTENSION-NAME in SCHEMA and return the EXTENSION object of this name."
+  (find extension-name (schema-extension-list schema)
+        :key #'extension-name :test 'string=))
+
 (defmethod find-table ((schema schema) table-name &key)
   "Find TABLE-NAME in SCHEMA and return the TABLE object of this name."
   (find table-name (schema-table-list schema)
@@ -254,6 +321,12 @@
   (let ((schema (find-schema catalog schema-name)))
     (or schema (add-schema catalog schema-name))))
 
+(defmethod maybe-add-extension ((schema schema) extension-name &key)
+  "Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
+   of the same name if it already exists in the schema table-list."
+  (let ((extension (find-extension schema extension-name)))
+    (or extension (add-extension schema extension-name))))
+
 (defmethod maybe-add-table ((schema schema) table-name &key comment oid)
   "Add TABLE-NAME to the table-list for SCHEMA, or return the existing table
    of the same name if it already exists in the schema table-list."
diff --git a/test/archive.load b/test/archive.load
index de0f6f5..3d97e14 100644
--- a/test/archive.load
+++ b/test/archive.load
@@ -8,7 +8,8 @@
  */
 
 LOAD ARCHIVE
-   FROM http://pgsql.tapoueh.org/temp/foo.zip
+   -- FROM http://pgsql.tapoueh.org/temp/foo.zip
+   FROM http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip
    INTO postgresql:///ip4r
 
    BEFORE LOAD
diff --git a/test/pgsql-source.load b/test/pgsql-source.load
new file mode 100644
index 0000000..7e74bc3
--- /dev/null
+++ b/test/pgsql-source.load
@@ -0,0 +1,6 @@
+load database
+     from pgsql://localhost/pgloader
+     into pgsql://localhost/copy
+
+  -- including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
+  ;

From d3bfb1db31cad2c10ff185ae8d891718c197163b Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 20 Aug 2018 11:50:50 +0200
Subject: [PATCH 06/69] Bugfix previous commit: filter list format changed.

We now accept the more general string and regex match rules, but the code to
generate including and excluding lists from the catalogs had not been updated.
---
 src/pgsql/pgsql-schema.lisp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index e5ce1af..b47c4e5 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -100,7 +100,7 @@
 (defun format-table-name-as-including-exp (table)
   "Return a table name suitable for a catalog lookup using ~ operator."
   (let ((table-name (table-name table)))
-    (format nil "^~a$" (ensure-unquoted table-name))))
+    (make-string-match-rule :target (ensure-unquoted table-name))))
 
 (defun query-table-schema (table)
   "Get PostgreSQL schema name where to locate TABLE-NAME by following the

From cb633aa092e83aa95b1e0483d6d6ce731bf6bdfe Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 20 Aug 2018 11:52:59 +0200
Subject: [PATCH 07/69] Refrain from some introspections on non-PGDG PostgreSQL
 variants.

When dealing with PostgreSQL protocol compatible databases, often enough
they don't support the same catalogs as PostgreSQL itself. Redshift for
instance lacks foreign key support.
---
 src/load/load-file.lisp      |  4 +++-
 src/pgsql/pgsql-schema.lisp  | 30 ++++++++++++++++++------------
 src/sources/pgsql/pgsql.lisp | 34 ++++++++++++++++++----------------
 3 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/src/load/load-file.lisp b/src/load/load-file.lisp
index 19819e5..7d36d12 100644
--- a/src/load/load-file.lisp
+++ b/src/load/load-file.lisp
@@ -42,7 +42,9 @@
     (handler-case
         (with-pgsql-connection (pgconn)
           (setf pgsql-catalog
-                (fetch-pgsql-catalog (db-name pgconn) :table (target copy)))
+                (fetch-pgsql-catalog (db-name pgconn)
+                                     :table (target copy)
+                                     :variant (pgconn-variant pgconn)))
 
           ;; if the user didn't tell us the column list of the table, now is
           ;; a proper time to set it in the copy object
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index b47c4e5..9ea3d59 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -5,7 +5,12 @@
 (in-package :pgloader.pgsql)
 
 (defun fetch-pgsql-catalog (dbname
-                            &key table source-catalog including excluding)
+                            &key
+                              table
+                              source-catalog
+                              including
+                              excluding
+                              (variant :pgdg))
   "Fetch PostgreSQL catalogs for the target database. A PostgreSQL
    connection must be opened."
   (let* ((*identifier-case* :quote)
@@ -18,10 +23,10 @@
 
                           (t
                            including))))
-
-    (list-all-sqltypes catalog
-                       :including including
-                       :excluding excluding)
+    (when (eq :pgdg variant)
+      (list-all-sqltypes catalog
+                         :including including
+                         :excluding excluding))
 
     (list-all-columns catalog
                       :table-type :table
@@ -32,14 +37,15 @@
                       :including including
                       :excluding excluding)
 
-    (list-all-fkeys catalog
-                    :including including
-                    :excluding excluding)
+    (when (eq :pgdg variant)
+      (list-all-fkeys catalog
+                      :including including
+                      :excluding excluding)
 
-    ;; fetch fkey we depend on with UNIQUE indexes but that have been
-    ;; excluded from the target list, we still need to take care of them to
-    ;; be able to DROP then CREATE those indexes again
-    (list-missing-fk-deps catalog)
+      ;; fetch fkey we depend on with UNIQUE indexes but that have been
+      ;; excluded from the target list, we still need to take care of them to
+      ;; be able to DROP then CREATE those indexes again
+      (list-missing-fk-deps catalog))
 
     (log-message :debug "fetch-pgsql-catalog: ~d tables, ~d indexes, ~d+~d fkeys"
                  (count-tables catalog)
diff --git a/src/sources/pgsql/pgsql.lisp b/src/sources/pgsql/pgsql.lisp
index e8cab7b..8a45a58 100644
--- a/src/sources/pgsql/pgsql.lisp
+++ b/src/sources/pgsql/pgsql.lisp
@@ -64,27 +64,29 @@
                           :use-result-as-read t
                           :section :pre)
     (with-pgsql-transaction (:pgconn (source-db pgsql))
-      (list-all-sqltypes catalog
+      (let ((variant (pgconn-variant (source-db pgsql))))
+       (when (eq :pgdg variant)
+         (list-all-sqltypes catalog
+                            :including including
+                            :excluding excluding))
+
+       (list-all-columns catalog
                          :including including
                          :excluding excluding)
 
-      (list-all-columns catalog
-                        :including including
-                        :excluding excluding)
+       (when create-indexes
+         (list-all-indexes catalog
+                           :including including
+                           :excluding excluding))
 
-      (when create-indexes
-        (list-all-indexes catalog
-                          :including including
-                          :excluding excluding))
+       (when (and (eq :pgdg variant) foreign-keys)
+         (list-all-fkeys catalog
+                         :including including
+                         :excluding excluding))
 
-      (when foreign-keys
-        (list-all-fkeys catalog
-                        :including including
-                        :excluding excluding))
-
-      ;; return how many objects we're going to deal with in total
-      ;; for stats collection
-      (+ (count-tables catalog) (count-indexes catalog))))
+       ;; return how many objects we're going to deal with in total
+       ;; for stats collection
+       (+ (count-tables catalog) (count-indexes catalog)))))
 
   ;; be sure to return the catalog itself
   catalog)

From c9b905b7ac3fa008d7dfeaf7bde539b228ad3f3e Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 20 Aug 2018 11:55:47 +0200
Subject: [PATCH 08/69] Simplify our ASD system definition by using :serial t.

This allows to drop manually maintained list of files dependencies, instead
implying them by the order in which we list the files.
---
 pgloader.asd | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/pgloader.asd b/pgloader.asd
index 89db8c8..12b1684 100644
--- a/pgloader.asd
+++ b/pgloader.asd
@@ -149,40 +149,30 @@
                                         ;(:file "syslog") ; experimental...
 
                        (:module "sqlite"
+                                :serial t
                                 :depends-on ("common")
                                 :components
                                 ((:file "sqlite-cast-rules")
-                                 (:file "sqlite-schema"
-                                        :depends-on ("sqlite-cast-rules"))
-                                 (:file "sqlite"
-                                        :depends-on ("sqlite-cast-rules"
-                                                     "sqlite-schema"))))
+                                 (:file "sqlite-schema")
+                                 (:file "sqlite")))
 
                        (:module "mssql"
+                                :serial t
                                 :depends-on ("common")
                                 :components
                                 ((:file "mssql-cast-rules")
-                                 (:file "mssql-schema"
-                                        :depends-on ("mssql-cast-rules"))
-                                 (:file "mssql"
-                                        :depends-on ("mssql-cast-rules"
-                                                     "mssql-schema"))
-                                 (:file "mssql-index-filters"
-                                        :depends-on ("mssql"))))
+                                 (:file "mssql-schema")
+                                 (:file "mssql")
+                                 (:file "mssql-index-filters")))
 
                        (:module "mysql"
+                                :serial t
                                 :depends-on ("common")
                                 :components
                                 ((:file "mysql-cast-rules")
                                  (:file "mysql-connection")
-                                 (:file "mysql-schema"
-                                        :depends-on ("mysql-connection"
-                                                     "mysql-cast-rules"))
-                                 ;; (:file "mysql-csv"
-                                 ;;        :depends-on ("mysql-schema"))
-                                 (:file "mysql"
-                                        :depends-on ("mysql-cast-rules"
-                                                     "mysql-schema"))))
+                                 (:file "mysql-schema")
+                                 (:file "mysql")))
 
                        (:module "pgsql"
                                 :serial t

From 4fbfd9e5223855690f6b30b876d8e4eb658aeb8e Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 22 Aug 2018 10:52:01 +0200
Subject: [PATCH 09/69] Refrain from using regexp_match() function, introduced
 in Pg10.

Instead use the substring() function which has been there all along.

See #813.
---
 src/pgsql/sql/list-all-columns.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pgsql/sql/list-all-columns.sql b/src/pgsql/sql/list-all-columns.sql
index 8875c4d..11be443 100644
--- a/src/pgsql/sql/list-all-columns.sql
+++ b/src/pgsql/sql/list-all-columns.sql
@@ -9,9 +9,9 @@ with seqattr as
           adnum,
           adsrc,
           case when adsrc ~~ 'nextval'
-               then (regexp_match(pg_get_expr(d.adbin, d.adrelid),
-                                  '''([^'']+)''')
-                    )[1]::regclass::oid
+               then substring(pg_get_expr(d.adbin, d.adrelid)
+                              from '''([^'']+)'''
+                    )::regclass::oid
                else null::oid
            end as seqoid
      from pg_attrdef d

From 0f58a3c84d3694fda01ba1fbf0ccc4f2ea205461 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 31 Aug 2018 22:51:41 -0700
Subject: [PATCH 10/69] Assorted fixes: catalogs SQLtypes and MySQL decoding
 as.

It turns out that when trying to debug "decoding as" the SQLtype listing
support in sqltype-list was found broken, so this patch fixes it. Then goes
on to fix the DECODING AS filters support, which we have switched to using
the better regexp-or-string filter struct but forgot to update the matching
code accordingly.

Fixes #665.
---
 src/sources/mysql/mysql.lisp | 8 +-------
 src/utils/catalog.lisp       | 6 ++++--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/sources/mysql/mysql.lisp b/src/sources/mysql/mysql.lisp
index 1710c8c..7cc9555 100644
--- a/src/sources/mysql/mysql.lisp
+++ b/src/sources/mysql/mysql.lisp
@@ -235,13 +235,7 @@ Illegal ~a character starting at position ~a~@[: ~a~].~%"
 (defun apply-decoding-as-filters (table-name filters)
   "Return a generialized boolean which is non-nil only if TABLE-NAME matches
    one of the FILTERS."
-  (flet ((apply-filter (filter)
-           ;; we close over table-name here.
-           (typecase filter
-             (string (string-equal filter table-name))
-             (list   (destructuring-bind (type val) filter
-                       (ecase type
-                         (:regex (cl-ppcre:scan val table-name))))))))
+  (flet ((apply-filter (filter) (matches filter table-name)))
     (some #'apply-filter filters)))
 
 (defmethod instanciate-table-copy-object ((copy copy-mysql) (table table))
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index 76a4857..c61ce8f 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -204,7 +204,7 @@
 
 (defmethod sqltype-list ((table table) &key)
   "Return the list of sqltypes for SCHEMA."
-  (apply #'append (mapcar #'sqltype-list (table-column-list table))))
+  (mapcar #'sqltype-list (table-column-list table)))
 
 (defmethod sqltype-list ((schema schema) &key)
   "Return the list of sqltypes for SCHEMA."
@@ -215,7 +215,9 @@
 (defmethod sqltype-list ((catalog catalog) &key)
   "Return the list of sqltypes for CATALOG."
   (remove-duplicates
-   (apply #'append (mapcar #'sqltype-list (catalog-schema-list catalog)))
+   (remove-if #'null
+              (apply #'append
+                     (mapcar #'sqltype-list (catalog-schema-list catalog))))
    :test #'string-equal :key #'sqltype-name))
 
 (defmethod table-list ((schema schema) &key)

From 5119d864f4107f4d4d2e4e850fe4b44dc33a0bbc Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 4 Sep 2018 11:49:21 +0200
Subject: [PATCH 11/69] Assorted bug fixes in the context of Redshift support
 as a source.

The catalog queries used in pgloader have to be adjusted for Redshift
because this thing forked PostgreSQL 8.0, which is a long time ago now.
Also, we had a couple bugs here and there that were not really related to
Redshift support but were shown in that context.

Fixes #813.
---
 src/parsers/command-pgsql.lisp          |  6 ++++--
 src/pgsql/sql/list-all-columns.sql      |  8 ++++----
 src/pgsql/sql/list-all-indexes.sql      |  9 +++++----
 src/sources/pgsql/pgsql-cast-rules.lisp | 23 ++++++++++++++++++++++-
 src/utils/transforms.lisp               |  2 +-
 5 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/parsers/command-pgsql.lisp b/src/parsers/command-pgsql.lisp
index 2a09fd7..3650534 100644
--- a/src/parsers/command-pgsql.lisp
+++ b/src/parsers/command-pgsql.lisp
@@ -45,7 +45,8 @@
       (cons schema filter-list))))
 
 (defrule including-matching-in-schema
-    (and including-in-schema (* including-in-schema))
+    (and including-matching-in-schema-filter
+         (* including-matching-in-schema-filter))
   (:lambda (source)
     (destructuring-bind (inc1 incs) source
       (cons :including (list* inc1 incs)))))
@@ -58,7 +59,8 @@
       (cons schema filter-list))))
 
 (defrule excluding-matching-in-schema
-    (and excluding-in-schema (* excluding-in-schema))
+    (and excluding-matching-in-schema-filter
+         (* excluding-matching-in-schema-filter))
   (:lambda (source)
     (destructuring-bind (excl1 excls) source
       (cons :excluding (list* excl1 excls)))))
diff --git a/src/pgsql/sql/list-all-columns.sql b/src/pgsql/sql/list-all-columns.sql
index 11be443..75f8a52 100644
--- a/src/pgsql/sql/list-all-columns.sql
+++ b/src/pgsql/sql/list-all-columns.sql
@@ -11,9 +11,9 @@ with seqattr as
           case when adsrc ~~ 'nextval'
                then substring(pg_get_expr(d.adbin, d.adrelid)
                               from '''([^'']+)'''
-                    )::regclass::oid
-               else null::oid
-           end as seqoid
+                    )
+               else null
+           end as seqname
      from pg_attrdef d
  )
     select nspname, relname, c.oid, attname,
@@ -24,7 +24,7 @@ with seqattr as
             end as typmod,
            attnotnull,
            case when atthasdef then def.adsrc end as default,
-           case when s.seqoid is not null then 'auto_increment' end as extra
+           case when s.seqname is not null then 'auto_increment' end as extra
       from pg_class c
            join pg_namespace n on n.oid = c.relnamespace
            left join pg_attribute a on c.oid = a.attrelid
diff --git a/src/pgsql/sql/list-all-indexes.sql b/src/pgsql/sql/list-all-indexes.sql
index 320a6e0..bfffbf7 100644
--- a/src/pgsql/sql/list-all-indexes.sql
+++ b/src/pgsql/sql/list-all-indexes.sql
@@ -17,10 +17,11 @@
          join pg_class r ON r.oid = x.indrelid
          join pg_namespace n ON n.oid = i.relnamespace
          join pg_namespace rn ON rn.oid = r.relnamespace
-         left join pg_constraint c ON c.conindid = i.oid
-                                  and c.conrelid = r.oid
-                                  -- filter out self-fkeys
-                                  and c.confrelid <> r.oid
+         left join pg_depend d on d.classid = 'pg_class'::regclass
+                              and d.objid = i.oid
+                              and d.refclassid = 'pg_constraint'::regclass
+                              and d.deptype = 'i'
+         left join pg_constraint c ON c.oid = d.refobjid
    where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
          ~:[~*~;and (~{~a~^~&~10t or ~})~]
          ~:[~*~;and (~{~a~^~&~10t and ~})~]
diff --git a/src/sources/pgsql/pgsql-cast-rules.lisp b/src/sources/pgsql/pgsql-cast-rules.lisp
index 2ef0373..6ac37ee 100644
--- a/src/sources/pgsql/pgsql-cast-rules.lisp
+++ b/src/sources/pgsql/pgsql-cast-rules.lisp
@@ -9,7 +9,10 @@
      :target (:type "serial" :drop-default t))
 
     (:source (:type "bigint" :auto-increment t)
-     :target (:type "bigserial" :drop-default t)))
+     :target (:type "bigserial" :drop-default t))
+
+    (:source (:type "character varying")
+     :target (:type "text" :drop-typemod t)))
   "Data Type Casting to migrate from PostgtreSQL to PostgreSQL")
 
 (defmethod pgsql-column-ctype ((column column))
@@ -45,4 +48,22 @@
       (setf (column-transform-default pgcol)
             (column-transform-default field))
 
+      ;; Redshift may be using DEFAULT getdate() instead of now()
+      (let ((default (column-default pgcol)))
+        (setf (column-default pgcol)
+              (cond
+                ((and (stringp default) (string= "NULL" default))
+                 :null)
+
+                ((and (stringp default)
+                      (or (string= "getdate()" default)))
+                 :current-timestamp)
+
+                (t (column-default pgcol))))
+
+        ;; we usually trust defaults that come from PostgreSQL... but we
+        ;; also have support for Redshift.
+        (when (member (column-default pgcol) '(:null :current-timestamp))
+          (setf (column-transform-default pgcol) t)))
+
       pgcol)))
diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp
index dbc39b9..4d77c71 100644
--- a/src/utils/transforms.lisp
+++ b/src/utils/transforms.lisp
@@ -53,7 +53,7 @@
 	      (string= "set" data-type))
     (let ((start-1 (position #\( column-type))	; just before start position
 	  (end     (position #\) column-type)))	; just before end position
-      (when start-1
+      (when (and start-1 (< (+ 1 start-1) end))
 	(destructuring-bind (a &optional b)
 	    (mapcar #'parse-integer
 		    (sq:split-sequence #\, column-type

From d356bd501b557b41502a45ac7471e055112962f5 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 10 Sep 2018 19:33:39 +0200
Subject: [PATCH 12/69] Accept even more ragged date format input.

When parsing a date string from a date format, accept that the ms or us part
be completely missing, rather than just missing some digits.

Fixed #828.
---
 src/parsers/date-format.lisp             | 7 ++++---
 test/csv-parse-date.load                 | 1 +
 test/regress/expected/csv-parse-date.out | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/parsers/date-format.lisp b/src/parsers/date-format.lisp
index d7b7fc6..45ba4d7 100644
--- a/src/parsers/date-format.lisp
+++ b/src/parsers/date-format.lisp
@@ -38,11 +38,12 @@
                 :for ragged-end := (when end
                                      (cond ((member name '(:msecs :usecs))
                                             ;; take any number of digits up to
-                                            ;; the specified field lenght
+                                            ;; the specified field length
                                             ;; (less digits are allowed)
-                                            (min end (length date-string)))
+                                            (when (<= start (length date-string))
+                                              (min end (length date-string))))
                                            (t end)))
-                :when (and start end)
+                :when (and start ragged-end)
                 :append (list name (subseq date-string start ragged-end)))
            (if (or (string= year  "0000")
                    (string= month "00")
diff --git a/test/csv-parse-date.load b/test/csv-parse-date.load
index 318df8a..9d74e22 100644
--- a/test/csv-parse-date.load
+++ b/test/csv-parse-date.load
@@ -28,3 +28,4 @@ LOAD CSV
 1,10-02-1999 00-33-12.123456,"00:05.02"
 2,10-02-2014 00-33-13.123,"18:25.52"
 3,10-02-2014 00-33-14.1234,13:14.15
+4,10-09-2018 19-24-59,19:24.59
diff --git a/test/regress/expected/csv-parse-date.out b/test/regress/expected/csv-parse-date.out
index 4f4e941..d21c37a 100644
--- a/test/regress/expected/csv-parse-date.out
+++ b/test/regress/expected/csv-parse-date.out
@@ -1,3 +1,4 @@
 1	1999-10-02 00:33:12.123456+02	00:05:02
 2	2014-10-02 00:33:13.123+02	18:25:52
 3	2014-10-02 00:33:14.1234+02	13:14:15
+4	2018-10-09 19:24:59+02	19:24:59

From 0957bd0efa901fd4b352cc3f3349ac044d369ad9 Mon Sep 17 00:00:00 2001
From: Jon Snell <jsnell@e-normous.com>
Date: Fri, 5 Oct 2018 05:47:54 -0500
Subject: [PATCH 13/69] Fix pgloader bug #844 by adding support for mssql real
 types (#845)

---
 src/monkey/mssql.lisp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/monkey/mssql.lisp b/src/monkey/mssql.lisp
index 9cd4e41..ce2abf6 100644
--- a/src/monkey/mssql.lisp
+++ b/src/monkey/mssql.lisp
@@ -93,6 +93,7 @@
              (:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2))
              (:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4))
              (:syb-int8 (mem-ref data :int8))
+             (:syb-real (mem-ref data :float))
              (:syb-flt8 (mem-ref data :double))
              ((:syb-datetime :syb-datetime4 :syb-msdate)
               (with-foreign-pointer (%buf +numeric-buf-sz+)

From 344d0ca61b3f34b565cf60f719a33f4f99f01254 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 10 Oct 2018 11:08:28 -0700
Subject: [PATCH 14/69] Implement AFTER SCHEMA sql code blocks.

This allows pgloader users to run SQL commands in between pgloader's schema
creation and the actual loading of the data.
---
 src/load/migrate-database.lisp     | 32 ++++++++++++++++++++----------
 src/package.lisp                   |  1 +
 src/parsers/command-pgsql.lisp     |  9 ++++++---
 src/parsers/command-sql-block.lisp | 31 +++++++++++++++++++----------
 4 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 044d931..28f57c9 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -255,6 +255,7 @@
 			    (reset-sequences  t)
 			    (foreign-keys     t)
                             (reindex          nil)
+                            (after-schema     nil)
 			    only-tables
 			    including
 			    excluding
@@ -329,17 +330,26 @@
 
     ;; if asked, first drop/create the tables on the PostgreSQL side
     (handler-case
-        (prepare-pgsql-database copy
-                                catalog
-                                :truncate truncate
-                                :create-tables create-tables
-                                :create-schemas create-schemas
-                                :drop-indexes drop-indexes
-                                :drop-schema drop-schema
-                                :include-drop include-drop
-                                :foreign-keys foreign-keys
-                                :set-table-oids set-table-oids
-                                :materialize-views materialize-views)
+        (progn
+          (prepare-pgsql-database copy
+                                  catalog
+                                  :truncate truncate
+                                  :create-tables create-tables
+                                  :create-schemas create-schemas
+                                  :drop-indexes drop-indexes
+                                  :drop-schema drop-schema
+                                  :include-drop include-drop
+                                  :foreign-keys foreign-keys
+                                  :set-table-oids set-table-oids
+                                  :materialize-views materialize-views)
+
+          ;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
+          ;; to run it.
+          (when after-schema
+            (pgloader.parser::execute-sql-code-block (target-db copy)
+                                                     :pre
+                                                     after-schema
+                                                     "after schema")))
       ;;
       ;; In case some error happens in the preparatory transaction, we
       ;; need to stop now and refrain from trying to load the data into
diff --git a/src/package.lisp b/src/package.lisp
index 8e81cdf..bc9abfe 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -812,6 +812,7 @@
   (:export #:parse-commands
            #:parse-commands-from-file
            #:initialize-context
+           #:execute-sql-code-block
 
            ;; tools to enable complete cli parsing in main.lisp
            #:process-relative-pathnames
diff --git a/src/parsers/command-pgsql.lisp b/src/parsers/command-pgsql.lisp
index 3650534..6599c4f 100644
--- a/src/parsers/command-pgsql.lisp
+++ b/src/parsers/command-pgsql.lisp
@@ -79,6 +79,7 @@
                                             excluding-matching-in-schema
                                             decoding-tables-as
                                             before-load
+                                            after-schema
                                             after-load))
   (:lambda (clauses-list)
     (alexandria:alist-plist clauses-list)))
@@ -103,11 +104,11 @@
 (defun lisp-code-for-loading-from-pgsql (pg-src-db-conn pg-dst-db-conn
                                          &key
                                            gucs
-                                           casts before after options
+                                           casts options
+                                           before after after-schema
                                            alter-table alter-schema
                                            ((:including incl))
                                            ((:excluding excl))
-                                           ((:decoding decoding-as))
                                            &allow-other-keys)
   `(lambda ()
      (let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
@@ -131,6 +132,7 @@
                       :index-names :preserve
                       :set-table-oids t
                       :on-error-stop on-error-stop
+                      :after-schema ',after-schema
                       ,@(remove-batch-control-option options))
 
        ,(sql-code-block pg-dst-db-conn :post after "after load"))))
@@ -140,7 +142,7 @@
     (destructuring-bind (pg-src-db-uri
                          pg-dst-db-uri
                          &key
-                         gucs casts before after options
+                         gucs casts before after after-schema options
                          alter-table alter-schema
                          including excluding decoding)
         source
@@ -152,6 +154,7 @@
                                                :casts casts
                                                :before before
                                                :after after
+                                               :after-schema after-schema
                                                :options options
                                                :alter-table alter-table
                                                :alter-schema alter-schema
diff --git a/src/parsers/command-sql-block.lisp b/src/parsers/command-sql-block.lisp
index dba0a4b..e99bd07 100644
--- a/src/parsers/command-sql-block.lisp
+++ b/src/parsers/command-sql-block.lisp
@@ -58,17 +58,26 @@
     (bind (((_ _ sql-list-of-list) after))
       (cons :after (apply #'append sql-list-of-list)))))
 
+(defrule after-schema (and kw-after kw-create kw-schema
+                           (+ (or load-do load-execute)))
+  (:lambda (after)
+    (bind (((_ _ _ sql-list-of-list) after))
+      (cons :after-schema (apply #'append sql-list-of-list)))))
+
 (defun sql-code-block (pgconn section commands label)
   "Return lisp code to run COMMANDS against DBNAME, updating STATE."
   (when commands
-    `(with-stats-collection (,label
-                             :dbname ,(db-name pgconn)
-                             :section ,section
-                             :use-result-as-read t
-                             :use-result-as-rows t)
-       (log-message :notice "Executing SQL block for ~a" ,label)
-       (with-pgsql-transaction (:pgconn ,pgconn)
-	 (loop for command in ',commands
-	    do
-	      (pgsql-execute command :client-min-messages :error)
-            counting command)))))
+    `(execute-sql-code-block ,pgconn ,section ',commands ,label)))
+
+(defun execute-sql-code-block (pgconn section commands label)
+  "Exceute given SQL commands."
+  (with-stats-collection (label
+                          :dbname (db-name pgconn)
+                          :section section
+                          :use-result-as-read t
+                          :use-result-as-rows t)
+    (log-message :notice "Executing SQL block for ~a" label)
+    (with-pgsql-transaction (:pgconn pgconn)
+      (loop :for command :in commands
+         :do (pgsql-execute command :client-min-messages :error)
+         :counting command))))

From 381ac9d1a2378fda9317fdbae319e7cc642d3a79 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 10 Oct 2018 14:15:28 -0700
Subject: [PATCH 15/69] Add initial support for Citus distribution from
 pgloader.

The idea is for pgloader to tweak the schema from a description of the
sharding model, the distribute clause. Here's an example of such a clause:

   distribute company using id
   distribute campaign using company_id
   distribute ads using company_id from campaign
   distribute clicks using company_id from ads, campaign

Given such commands, pgloader adds the distibution key to the table when
needed, to the primary key definition of the table, and also to the foreign
keys that are pointing to the changed primary key.

Then when SELECTing the data from the source database, the idea is for
pgloader to automatically JOIN the base table with the source table where to
find the distribution key, in case it was just added in the schema.

Finally, pgloader also calls the following Citus commands:

  SELECT create_distributed_table('company', 'id');
  SELECT create_distributed_table('campaign', 'company_id');
  SELECT create_distributed_table('ads', 'company_id');
  SELECT create_distributed_table('clicks', 'company_id');
---
 pgloader.asd                        |  3 +
 src/load/migrate-database.lisp      | 29 ++++++++--
 src/package.lisp                    | 10 ++++
 src/parsers/command-distribute.lisp | 48 ++++++++++++++++
 src/parsers/command-keywords.lisp   |  3 +
 src/parsers/command-pgsql.lisp      |  8 ++-
 src/pgsql/pgsql-ddl-citus.lisp      | 18 ++++++
 src/pgsql/pgsql-schema.lisp         |  4 +-
 src/pgsql/sql/list-all-indexes.sql  |  5 ++
 src/utils/catalog.lisp              |  3 +-
 src/utils/citus.lisp                | 89 +++++++++++++++++++++++++++++
 11 files changed, 209 insertions(+), 11 deletions(-)
 create mode 100644 src/parsers/command-distribute.lisp
 create mode 100644 src/pgsql/pgsql-ddl-citus.lisp
 create mode 100644 src/utils/citus.lisp

diff --git a/pgloader.asd b/pgloader.asd
index 12b1684..3d12ebd 100644
--- a/pgloader.asd
+++ b/pgloader.asd
@@ -69,6 +69,7 @@
                        (:file "quoting"     :depends-on ("utils"))
                        (:file "catalog"     :depends-on ("quoting"))
                        (:file "alter-table" :depends-on ("catalog"))
+                       (:file "citus"       :depends-on ("catalog"))
 
                        ;; State, monitoring, reporting
                        (:file "reject"  :depends-on ("state"))
@@ -95,6 +96,7 @@
                       :components
                       ((:file "connection")
                        (:file "pgsql-ddl")
+                       (:file "pgsql-ddl-citus")
                        (:file "pgsql-schema")
                        (:file "merge-catalogs" :depends-on ("pgsql-schema"))
                        (:file "pgsql-trigger")
@@ -239,6 +241,7 @@
                        (:file "command-cast-rules")
                        (:file "command-materialize-views")
                        (:file "command-alter-table")
+                       (:file "command-distribute")
                        (:file "command-mysql")
                        (:file "command-including-like")
                        (:file "command-mssql")
diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 28f57c9..129ca5b 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -19,7 +19,8 @@
                                      set-table-oids
                                      materialize-views
                                      foreign-keys
-                                     include-drop)
+                                     include-drop
+                                     distribute)
   "Prepare the target PostgreSQL database: create tables casting datatypes
    from the MySQL definitions, prepare index definitions and create target
    tables for materialized views.
@@ -114,7 +115,15 @@
                                                        :use-result-as-rows t)
         (create-views catalog
                       :include-drop include-drop
-                      :client-min-messages :error))))
+                      :client-min-messages :error)))
+
+    ;; Citus Support
+    (when distribute
+      (with-stats-collection ("Citus Distribute Tables" :section :pre)
+        (let ((citus-sql
+               (loop :for rule :in distribute
+                  :collect (format-create-sql rule))))
+          (pgsql-execute citus-sql :client-min-messages :notice)))))
 
   ;; log the catalog we just fetched and (maybe) merged
   (log-message :data "CATALOG: ~s" catalog))
@@ -213,9 +222,10 @@
                                :reset-sequences reset-sequences))))
 
 
-(defun process-catalog (copy catalog &key alter-table alter-schema)
+(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
   "Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
    rewriting, pgloader level alter schema and alter table commands."
+
   ;; cast the catalog into something PostgreSQL can work on
   (cast catalog)
 
@@ -229,7 +239,11 @@
   ;; if asked, now alter the catalog with given rules: the alter-table
   ;; keyword parameter actually contains a set of alter table rules.
   (when alter-table
-    (alter-table catalog alter-table)))
+    (alter-table catalog alter-table))
+
+  ;; we also support schema changes necessary for Citus distribution
+  (when distribute
+    (pgloader.catalog::citus-distribute-schema catalog distribute)))
 
 
 ;;;
@@ -256,6 +270,7 @@
 			    (foreign-keys     t)
                             (reindex          nil)
                             (after-schema     nil)
+                            distribute
 			    only-tables
 			    including
 			    excluding
@@ -326,7 +341,8 @@
     ;; that's CAST rules, index WHERE clause rewriting and ALTER commands
     (process-catalog copy catalog
                      :alter-table alter-table
-                     :alter-schema alter-schema)
+                     :alter-schema alter-schema
+                     :distribute distribute)
 
     ;; if asked, first drop/create the tables on the PostgreSQL side
     (handler-case
@@ -341,7 +357,8 @@
                                   :include-drop include-drop
                                   :foreign-keys foreign-keys
                                   :set-table-oids set-table-oids
-                                  :materialize-views materialize-views)
+                                  :materialize-views materialize-views
+                                  :distribute distribute)
 
           ;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
           ;; to run it.
diff --git a/src/package.lisp b/src/package.lisp
index bc9abfe..8d32d64 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -98,6 +98,7 @@
            #:table-index-list
            #:table-fkey-list
            #:table-trigger-list
+           #:table-citus-rule
 
            #:extension-name
            #:extension-schema
@@ -208,6 +209,15 @@
            #:match-rule-action
            #:match-rule-args
 
+           #:citus-reference-table
+           #:citus-distributed-table
+           #:make-citus-reference-table
+           #:make-citus-distributed-table
+           #:citus-reference-table-table
+           #:citus-distributed-table-table
+           #:citus-distributed-table-using
+           #:citus-distributed-table-from
+
            #:format-table-name))
 
 (defpackage #:pgloader.state
diff --git a/src/parsers/command-distribute.lisp b/src/parsers/command-distribute.lisp
new file mode 100644
index 0000000..0a642b7
--- /dev/null
+++ b/src/parsers/command-distribute.lisp
@@ -0,0 +1,48 @@
+#|
+   distribute billers using id
+   distribute bills using biller_id
+   distribute receivable_accounts using biller_id
+   distribute payments using biller_id
+
+   distribute splits using biller_id
+                      from receivable_accounts
+
+   distribute ach_accounts as reference table
+|#
+
+(in-package :pgloader.parser)
+
+(defun create-table-from-dsn-table-name (dsn-table-name
+                                         &optional (schema-name "public"))
+  (let ((table (create-table (cdr (second dsn-table-name)))))
+    (unless (table-schema table)
+      (setf (table-schema table)
+            (make-schema :catalog nil
+                         :source-name schema-name
+                         :name (apply-identifier-case schema-name))))
+    table))
+
+(defrule distribute-reference (and kw-distribute dsn-table-name
+                                   kw-as kw-reference kw-table)
+  (:lambda (d-r)
+    (make-citus-reference-table :table (create-table-from-dsn-table-name d-r))))
+
+(defrule distribute-using (and kw-distribute dsn-table-name
+                               kw-using maybe-quoted-namestring)
+  (:lambda (d-u)
+    (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u)
+                                  :using (make-column :name (fourth d-u)))))
+
+(defrule distribute-using-from (and kw-distribute dsn-table-name
+                                    kw-using maybe-quoted-namestring
+                                    kw-from (+ maybe-quoted-namestring))
+  (:lambda (d-u-f)
+    (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u-f)
+                                  :using (make-column :name (fourth d-u-f))
+                                  :from (apply #'create-table (sixth d-u-f)))))
+
+(defrule distribute-commands (+ (or distribute-using-from
+                                    distribute-using
+                                    distribute-reference))
+  (:lambda (commands)
+    (cons :distribute commands)))
diff --git a/src/parsers/command-keywords.lisp b/src/parsers/command-keywords.lisp
index a2454cd..9a4dcea 100644
--- a/src/parsers/command-keywords.lisp
+++ b/src/parsers/command-keywords.lisp
@@ -103,6 +103,9 @@
   (def-keyword-rule "trim")
   (def-keyword-rule "unquoted")
   (def-keyword-rule "delimiter")
+  ;; option for Citus support
+  (def-keyword-rule "distribute")
+  (def-keyword-rule "reference")
   ;; option for MySQL imports
   (def-keyword-rule "schema")
   (def-keyword-rule "schemas")
diff --git a/src/parsers/command-pgsql.lisp b/src/parsers/command-pgsql.lisp
index 6599c4f..f5f7996 100644
--- a/src/parsers/command-pgsql.lisp
+++ b/src/parsers/command-pgsql.lisp
@@ -80,7 +80,8 @@
                                             decoding-tables-as
                                             before-load
                                             after-schema
-                                            after-load))
+                                            after-load
+                                            distribute-commands))
   (:lambda (clauses-list)
     (alexandria:alist-plist clauses-list)))
 
@@ -109,6 +110,7 @@
                                            alter-table alter-schema
                                            ((:including incl))
                                            ((:excluding excl))
+                                           distribute
                                            &allow-other-keys)
   `(lambda ()
      (let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
@@ -133,6 +135,7 @@
                       :set-table-oids t
                       :on-error-stop on-error-stop
                       :after-schema ',after-schema
+                      :distribute ',distribute
                       ,@(remove-batch-control-option options))
 
        ,(sql-code-block pg-dst-db-conn :post after "after load"))))
@@ -143,7 +146,7 @@
                          pg-dst-db-uri
                          &key
                          gucs casts before after after-schema options
-                         alter-table alter-schema
+                         alter-table alter-schema distribute
                          including excluding decoding)
         source
       (cond (*dry-run*
@@ -158,6 +161,7 @@
                                                :options options
                                                :alter-table alter-table
                                                :alter-schema alter-schema
+                                               :distribute distribute
                                                :including including
                                                :excluding excluding
                                                :decoding decoding))))))
diff --git a/src/pgsql/pgsql-ddl-citus.lisp b/src/pgsql/pgsql-ddl-citus.lisp
new file mode 100644
index 0000000..f74ade5
--- /dev/null
+++ b/src/pgsql/pgsql-ddl-citus.lisp
@@ -0,0 +1,18 @@
+;;;
+;;; PostgreSQL Citus support for calling functions.
+;;;
+
+(in-package :pgloader.pgsql)
+
+(defmethod format-create-sql ((rule citus-reference-table)
+                              &key (stream nil) if-not-exists)
+  (declare (ignore if-not-exists))
+  (format stream "SELECT create_reference_table('~a');"
+          (format-table-name (citus-reference-table-table rule))))
+
+(defmethod format-create-sql ((rule citus-distributed-table)
+                              &key (stream nil) if-not-exists)
+  (declare (ignore if-not-exists))
+  (format stream "SELECT create_distributed_table('~a', '~a');"
+          (format-table-name (citus-distributed-table-table rule))
+          (column-name (citus-distributed-table-using rule))))
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index 9ea3d59..72da2ac 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -198,7 +198,7 @@
   (loop
      :for (schema-name name oid
                        table-schema table-name
-                       primary unique sql conname condef)
+                       primary unique cols sql conname condef)
      :in (query nil
                 (format nil
                         (sql "/pgsql/list-all-indexes.sql")
@@ -222,7 +222,7 @@
                              :table table
                              :primary primary
                              :unique unique
-                             :columns nil
+                             :columns (split-sequence:split-sequence #\, cols)
                              :sql sql
                              :conname (unless (eq :null conname)
                                         (ensure-quoted conname))
diff --git a/src/pgsql/sql/list-all-indexes.sql b/src/pgsql/sql/list-all-indexes.sql
index bfffbf7..1f655fa 100644
--- a/src/pgsql/sql/list-all-indexes.sql
+++ b/src/pgsql/sql/list-all-indexes.sql
@@ -9,6 +9,11 @@
          r.relname,
          indisprimary,
          indisunique,
+         (select string_agg(attname, ',')
+            from pg_attribute
+           where attrelid = r.oid
+             and array[attnum::integer] <@ indkey::integer[]
+         ) as cols,
          pg_get_indexdef(indexrelid),
          c.conname,
          pg_get_constraintdef(c.oid)
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index c61ce8f..46ddbc6 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -50,7 +50,8 @@
 (defstruct table source-name name schema oid comment storage-parameter-list
            ;; field is for SOURCE
            ;; column is for TARGET
-           field-list column-list index-list fkey-list trigger-list)
+           ;; citus is an extra slot for citus support
+           field-list column-list index-list fkey-list trigger-list citus-rule)
 
 ;;;
 ;;; When migrating from PostgreSQL to PostgreSQL we might have to install
diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
new file mode 100644
index 0000000..b080afb
--- /dev/null
+++ b/src/utils/citus.lisp
@@ -0,0 +1,89 @@
+;;;
+;;; Citus support in pgloader allows to declare what needs to change in the
+;;; source schema in terms of Citus concepts: reference and distributed
+;;; table.
+;;;
+
+#|
+   distribute billers using id
+   distribute bills using biller_id
+   distribute receivable_accounts using biller_id
+   distribute payments using biller_id
+
+   distribute splits using biller_id
+                      from receivable_accounts
+
+   distribute ach_accounts as reference table
+|#
+
+
+(in-package #:pgloader.catalog)
+
+(defstruct citus-reference-table table)
+(defstruct citus-distributed-table table using from)
+
+(defun citus-distribute-schema (catalog distribution-rules)
+  "Distribute a CATALOG with given user provided DISTRIBUTION-RULES."
+  (loop :for rule :in distribution-rules
+     :do (let ((table (citus-find-table catalog (citus-rule-table rule))))
+           (apply-citus-rule rule table))))
+
+(defun citus-rule-table (rule)
+  (etypecase rule
+    (citus-reference-table (citus-reference-table-table rule))
+    (citus-distributed-table (citus-distributed-table-table rule))))
+
+(defun citus-find-table (catalog table)
+  (let* ((table-name  (table-name table))
+         (schema-name (schema-name (table-schema table))))
+    (find-table (find-schema catalog schema-name) table-name)))
+
+(defgeneric apply-citus-rule (rule table)
+  (:documentation "Apply a Citus distribution RULE to given TABLE."))
+
+(defmethod apply-citus-rule ((rule citus-reference-table) (table table))
+  ;; for a reference table, we have nothing to do really.
+  (setf (table-citus-rule table) rule))
+
+(defmethod apply-citus-rule ((rule citus-distributed-table) (table table))
+  (setf (table-citus-rule table) rule)
+
+  ;; ok now we need to check if the USING column exists or if we need to add
+  ;; it to our model
+  (let ((column (find (column-name (citus-distributed-table-using rule))
+                      (table-field-list table)
+                      :test #'string=
+                      :key #'column-name)))
+    (assert (not (null column)))
+
+    (if column
+
+        ;; add it to the PKEY definition, in first position
+        (let* ((index  (find-if #'index-primary (table-index-list table)))
+               (idxcol (find (column-name (citus-distributed-table-using rule))
+                             (index-columns index)
+                             :test #'string=)))
+          (assert (not (null index)))
+          (unless idxcol
+            ;; add a new column
+            (push (column-name (citus-distributed-table-using rule))
+                  (index-columns index))
+            ;; now remove origin schema sql and condef, we need to redo them
+            (setf (index-sql index) nil)
+            (setf (index-condef index) nil)))
+
+        ;; the column doesn't exist, we need to find it in the :FROM rule
+        (let* ((from-table
+                (citus-find-table (schema-catalog (table-schema table))
+                                  (citus-distributed-table-from rule)))
+               (column-definition
+                (find (column-name (citus-distributed-table-using rule))
+                      (table-field-list from-table)
+                      :test #'string=
+                      :key #'column-name)))
+          (assert (not (null from-table)))
+          (push (make-column :name (column-name column-definition)
+                             :type-name (column-type-name column-definition)
+                             :nullable (column-nullable column-definition)
+                             :transform (column-transform column-definition))
+                (table-column-list table))))))

From 760763be4bb4cc7b45130727a2c303a79943c112 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 10 Oct 2018 15:44:21 -0700
Subject: [PATCH 16/69] Use the constraint name when we have it.

That's important for Citus, which doesn't know how to ADD a constraint
without a name.
---
 src/pgsql/pgsql-ddl.lisp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp
index 580618e..662bc95 100644
--- a/src/pgsql/pgsql-ddl.lisp
+++ b/src/pgsql/pgsql-ddl.lisp
@@ -204,8 +204,9 @@
                 ;; don't use the index schema name here, PostgreSQL doesn't
                 ;; like it, might be implicit from the table's schema
                 ;; itself...
-                "ALTER TABLE ~a ADD ~a USING INDEX ~a;"
+                "ALTER TABLE ~a ADD~@[ CONSTRAINT ~a~] ~a USING INDEX ~a;"
                 (format-table-name table)
+                (index-conname index)
                 (cond ((index-primary index) "PRIMARY KEY")
                       ((index-unique index) "UNIQUE"))
                 index-name)))

From 8112a9b54fc8124ec849324803ebfdb67c1eda2d Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 16 Oct 2018 18:53:41 +0200
Subject: [PATCH 17/69] Improve Citus Distribution Support.

With this patch it's now actually possible to backfill the data on the fly
when using the "distribute" new commands. The schema is modified to add the
distribution key where specified, and changes to the primary and foreign
keys happen automatically. Then a JOIN is generated to get the data directly
during the COPY streaming to the Citus cluster.
---
 src/load/migrate-database.lisp      |  20 ++--
 src/package.lisp                    |   4 +
 src/parsers/command-distribute.lisp |  29 ++++-
 src/pgsql/pgsql-create-schema.lisp  |  11 ++
 src/pgsql/pgsql-schema.lisp         |  13 ++-
 src/pgsql/sql/list-all-fkeys.sql    |   7 +-
 src/sources/pgsql/pgsql.lisp        |  30 ++++--
 src/utils/catalog.lisp              |   2 +-
 src/utils/citus.lisp                | 160 +++++++++++++++++++++++-----
 9 files changed, 234 insertions(+), 42 deletions(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 129ca5b..d99efbf 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -115,15 +115,21 @@
                                                        :use-result-as-rows t)
         (create-views catalog
                       :include-drop include-drop
-                      :client-min-messages :error)))
+                      :client-min-messages :error))))
 
-    ;; Citus Support
-    (when distribute
+  ;; Citus Support
+  ;;
+  ;; We need a separate transaction here in some cases, because of the
+  ;; distributed DDL support from Citus, to avoid the following error:
+  ;;
+  ;; ERROR Database error 25001: cannot establish a new connection for
+  ;; placement 2299, since DDL has been executed on a connection that is in
+  ;; use
+  ;;
+  (when distribute
+    (with-pgsql-transaction (:pgconn (target-db copy))
       (with-stats-collection ("Citus Distribute Tables" :section :pre)
-        (let ((citus-sql
-               (loop :for rule :in distribute
-                  :collect (format-create-sql rule))))
-          (pgsql-execute citus-sql :client-min-messages :notice)))))
+        (create-distributed-table distribute))))
 
   ;; log the catalog we just fetched and (maybe) merged
   (log-message :data "CATALOG: ~s" catalog))
diff --git a/src/package.lisp b/src/package.lisp
index 8d32d64..e1e74bf 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -217,6 +217,8 @@
            #:citus-distributed-table-table
            #:citus-distributed-table-using
            #:citus-distributed-table-from
+           #:citus-format-sql-select
+           #:citus-backfill-table-p
 
            #:format-table-name))
 
@@ -433,6 +435,8 @@
            #:reset-sequences
            #:comment-on-tables-and-columns
 
+           #:create-distributed-table
+
            ;; finalizing catalogs support (redshift and other variants)
            #:finalize-catalogs
            #:adjust-data-types
diff --git a/src/parsers/command-distribute.lisp b/src/parsers/command-distribute.lisp
index 0a642b7..6ae0b66 100644
--- a/src/parsers/command-distribute.lisp
+++ b/src/parsers/command-distribute.lisp
@@ -33,13 +33,38 @@
     (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u)
                                   :using (make-column :name (fourth d-u)))))
 
+;;;
+;;; The namestring rule allows for commas and we use them as a separator
+;;; here, so we need to have our own table name parsing. That's a bummer,
+;;; maybe we should revisit the whole table names parsing code?
+;;;
+(defrule distribute-from-tablename
+    (or double-quoted-namestring
+        quoted-namestring
+        (and (or #\_ (alpha-char-p character))
+             (* (or (alpha-char-p character)
+                    (digit-char-p character)))))
+  (:text t))
+
+(defrule maybe-qualified-dist-from-table-name
+    (and distribute-from-tablename (? (and "." distribute-from-tablename)))
+  (:lambda (name)
+    (if (second name)
+        (cons (first name) (second (second name)))
+        (cons "public" (first name)))))
+
+(defrule distribute-from-list (+ (and maybe-qualified-dist-from-table-name
+                                      (? (and "," ignore-whitespace))))
+  (:lambda (from-list)
+    (mapcar #'first from-list)))
+
 (defrule distribute-using-from (and kw-distribute dsn-table-name
                                     kw-using maybe-quoted-namestring
-                                    kw-from (+ maybe-quoted-namestring))
+                                    kw-from distribute-from-list)
   (:lambda (d-u-f)
     (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u-f)
                                   :using (make-column :name (fourth d-u-f))
-                                  :from (apply #'create-table (sixth d-u-f)))))
+                                  :from (mapcar #'create-table (sixth d-u-f)))))
 
 (defrule distribute-commands (+ (or distribute-using-from
                                     distribute-using
diff --git a/src/pgsql/pgsql-create-schema.lisp b/src/pgsql/pgsql-create-schema.lisp
index e6154e5..b06c31d 100644
--- a/src/pgsql/pgsql-create-schema.lisp
+++ b/src/pgsql/pgsql-create-schema.lisp
@@ -465,3 +465,14 @@ $$; " tables)))
                                  (column-name column)
                                  quote (column-comment column) quote)))))
     (pgsql-execute-with-timing section label sql-list)))
+
+
+
+;;;
+;;; Citus Disitribution support
+;;;
+(defun create-distributed-table (distribute-rules)
+  (let ((citus-sql
+         (loop :for rule :in distribute-rules
+            :collect (format-create-sql rule))))
+    (pgsql-execute citus-sql)))
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index 72da2ac..59f33f0 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -235,7 +235,7 @@
   "Get the list of PostgreSQL index definitions per table."
   (loop
      :for (schema-name table-name fschema-name ftable-name
-                       conoid conname condef
+                       conoid pkeyoid conname condef
                        cols fcols
                        updrule delrule mrule deferrable deferred)
      :in (query nil
@@ -277,9 +277,13 @@
                   (table    (find-table schema table-name))
                   (fschema  (find-schema catalog fschema-name))
                   (ftable   (find-table fschema ftable-name))
+                  (pkey     (find pkeyoid (table-index-list ftable)
+                                  :test #'=
+                                  :key #'index-oid))
                   (fk
                    (make-fkey :name (ensure-quoted conname)
                               :oid conoid
+                              :pkey pkey
                               :condef condef
                               :table table
                               :columns (split-sequence:split-sequence #\, cols)
@@ -290,6 +294,13 @@
                               :match-rule (pg-fk-match-rule-to-match-clause mrule)
                               :deferrable deferrable
                               :initially-deferred deferred)))
+             ;; add the fkey reference to the pkey index too
+             (unless (find conoid
+                           (index-fk-deps pkey)
+                           :test #'=
+                           :key #'fkey-oid)
+               (push-to-end fk (index-fk-deps pkey)))
+             ;; check that both tables are in pgloader's scope
              (if (and table ftable)
                  (add-fkey table fk)
                  (log-message :notice "Foreign Key ~a is ignored, one of its table is missing from pgloader table selection"
diff --git a/src/pgsql/sql/list-all-fkeys.sql b/src/pgsql/sql/list-all-fkeys.sql
index 8ebe8b5..bc666d1 100644
--- a/src/pgsql/sql/list-all-fkeys.sql
+++ b/src/pgsql/sql/list-all-fkeys.sql
@@ -7,7 +7,9 @@
 --         excluding (ftable)
 --         filter-list-to-where-clause for excluding
  select n.nspname, c.relname, nf.nspname, cf.relname as frelname,
-        r.oid, conname,
+        r.oid,
+        d.refobjid as pkeyoid,
+        conname,
         pg_catalog.pg_get_constraintdef(r.oid, true) as condef,
         (select string_agg(attname, ',')
            from pg_attribute
@@ -26,6 +28,9 @@
         JOIN pg_namespace n on c.relnamespace = n.oid
         JOIN pg_class cf on r.confrelid = cf.oid
         JOIN pg_namespace nf on cf.relnamespace = nf.oid
+        JOIN pg_depend d on d.classid = 'pg_constraint'::regclass
+                        and d.objid = r.oid
+                        and d.refobjsubid = 0
    where r.contype = 'f'
          AND c.relkind in ('r', 'f', 'p')
          AND cf.relkind in ('r', 'f', 'p')
diff --git a/src/sources/pgsql/pgsql.lisp b/src/sources/pgsql/pgsql.lisp
index 8a45a58..da6d611 100644
--- a/src/sources/pgsql/pgsql.lisp
+++ b/src/sources/pgsql/pgsql.lisp
@@ -41,12 +41,30 @@
                       (funcall process-row-fn row)))))))
 
     (with-pgsql-connection ((source-db pgsql))
-      (let* ((cols   (mapcar #'column-name (fields pgsql)))
-             (sql
-              (format nil "SELECT ~{~s::text~^, ~} FROM ~s.~s" cols
-                      (schema-source-name (table-schema (source pgsql)))
-                      (table-source-name (source pgsql)))))
-        (cl-postgres:exec-query pomo:*database* sql map-reader)))))
+      (if (citus-backfill-table-p (target pgsql))
+          ;;
+          ;; SELECT dist_key, * FROM source JOIN dist ON ...
+          ;;
+          (let ((sql (citus-format-sql-select (source pgsql) (target pgsql))))
+            (log-message :sql "~a" sql)
+            (cl-postgres:exec-query pomo:*database* sql map-reader))
+
+          ;;
+          ;; No JOIN to add to backfill data in the SQL query here.
+          ;;
+          (let* ((cols   (mapcar #'column-name (fields pgsql)))
+                 (sql
+                  (format nil
+                          "SELECT ~{~s::text~^, ~} FROM ~s.~s"
+                          cols
+                          (schema-source-name (table-schema (source pgsql)))
+                          (table-source-name (source pgsql)))))
+            (log-message :sql "~a" sql)
+            (cl-postgres:exec-query pomo:*database* sql map-reader))))))
+
+(defmethod copy-column-list ((pgsql copy-pgsql))
+  "We are sending the data in the MySQL columns ordering here."
+  (mapcar #'column-name (fields pgsql)))
 
 (defmethod fetch-metadata ((pgsql copy-pgsql)
                            (catalog catalog)
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index 46ddbc6..6b29aad 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -78,7 +78,7 @@
 ;;; Index and Foreign Keys
 ;;;
 (defstruct fkey
-  name oid table columns foreign-table foreign-columns condef
+  name oid table columns pkey foreign-table foreign-columns condef
   update-rule delete-rule match-rule deferrable initially-deferred)
 
 ;;;
diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
index b080afb..0cdcc9b 100644
--- a/src/utils/citus.lisp
+++ b/src/utils/citus.lisp
@@ -48,42 +48,154 @@
 (defmethod apply-citus-rule ((rule citus-distributed-table) (table table))
   (setf (table-citus-rule table) rule)
 
+  ;;
+  ;; Replace the TABLE placeholders in the :FROM slot of the rule with the
+  ;; tables from the catalogs.
+  ;;
+  (when (citus-distributed-table-from rule)
+    (let ((catalog (schema-catalog (table-schema table))))
+     (map-into (citus-distributed-table-from rule)
+               (lambda (from) (citus-find-table catalog from))
+               (citus-distributed-table-from rule))))
+
   ;; ok now we need to check if the USING column exists or if we need to add
   ;; it to our model
   (let ((column (find (column-name (citus-distributed-table-using rule))
                       (table-field-list table)
                       :test #'string=
                       :key #'column-name)))
-    (assert (not (null column)))
-
     (if column
 
         ;; add it to the PKEY definition, in first position
-        (let* ((index  (find-if #'index-primary (table-index-list table)))
-               (idxcol (find (column-name (citus-distributed-table-using rule))
-                             (index-columns index)
-                             :test #'string=)))
-          (assert (not (null index)))
-          (unless idxcol
-            ;; add a new column
-            (push (column-name (citus-distributed-table-using rule))
-                  (index-columns index))
-            ;; now remove origin schema sql and condef, we need to redo them
-            (setf (index-sql index) nil)
-            (setf (index-condef index) nil)))
+        (add-column-to-pkey table
+                            (column-name (citus-distributed-table-using rule)))
 
-        ;; the column doesn't exist, we need to find it in the :FROM rule
-        (let* ((from-table
-                (citus-find-table (schema-catalog (table-schema table))
-                                  (citus-distributed-table-from rule)))
+        ;; The column doesn't exist, we need to find it in the :FROM rule's
+        ;; list. The :FROM slot of the rule is a list of tables to
+        ;; "traverse" when backfilling the data. The list follows the
+        ;; foreign-key relationships from TABLE to the source of the
+        ;; distribution key.
+        ;;
+        ;; To find the column definition to add to the current TABLE, look
+        ;; it up in the last entry of the FROM rule's list.
+        (let* ((last-from-rule (car (last (citus-distributed-table-from rule))))
                (column-definition
                 (find (column-name (citus-distributed-table-using rule))
-                      (table-field-list from-table)
+                      (table-field-list last-from-rule)
                       :test #'string=
-                      :key #'column-name)))
-          (assert (not (null from-table)))
-          (push (make-column :name (column-name column-definition)
+                      :key #'column-name))
+               (new-column
+                (make-column :name (column-name column-definition)
                              :type-name (column-type-name column-definition)
                              :nullable (column-nullable column-definition)
-                             :transform (column-transform column-definition))
-                (table-column-list table))))))
+                             :transform (column-transform column-definition))))
+          ;;
+          ;; Here also we need to add the new column to the PKEY definition,
+          ;; in first position.
+          ;;
+          (add-column-to-pkey table (column-name new-column))
+
+          ;;
+          ;; We need to backfill the distribution key in the data, which
+          ;; we're implementing with a JOIN when we SELECT from the source
+          ;; table. We add the new field here.
+          ;;
+          (push new-column (table-field-list table))
+          (push new-column (table-column-list table))))))
+
+
+(defun add-column-to-pkey (table column-name)
+  "Add COLUMN in the first position of the TABLE's primary key index."
+  (let* ((index  (find-if #'index-primary (table-index-list table)))
+         (idxcol (find column-name (index-columns index) :test #'string=)))
+    (assert (not (null index)))
+    (unless idxcol
+      ;; add a new column
+      (push column-name (index-columns index))
+      ;; now remove origin schema sql and condef, we need to redo them
+      (setf (index-sql index) nil)
+      (setf (index-condef index) nil)
+
+      ;; now tweak the fkey definitions that are using this index
+      (loop :for fkey :in (index-fk-deps index)
+         :do (push column-name (fkey-columns fkey))
+         :do (push column-name (fkey-foreign-columns fkey))
+         :do (setf (fkey-condef fkey) nil)))))
+
+
+(defun format-citus-join-clause (table distribution-rule)
+  "Format a JOIN clause to backfill the distribution key data in tables that
+   are referencing (even indirectly) the main distribution table."
+  (with-output-to-string (s)
+    (loop :for current-table := table :then rel
+       :for rel :in (citus-distributed-table-from distribution-rule)
+       :do (let* ((fkey
+                   (find (ensure-unquoted (table-name rel))
+                         (table-fkey-list current-table)
+                         :test #'string=
+                         :key (lambda (fkey)
+                                (ensure-unquoted
+                                 (table-name (fkey-foreign-table fkey))))))
+                  (ftable (fkey-foreign-table fkey)))
+             (format s
+                     " JOIN ~s.~s"
+                     (schema-source-name (table-schema ftable))
+                     (table-source-name ftable))
+             ;;
+             ;; Skip the first column in the fkey definition, that's the
+             ;; distribution key that was just added by pgloader: we don't
+             ;; have it on the source database, we are going to create it on
+             ;; the target database.
+             ;;
+             (loop :for first := t :then nil
+                :for c :in (cdr (fkey-columns fkey))
+                :for fc :in (cdr (fkey-foreign-columns fkey))
+                :do (format s
+                            " ~:[AND~;ON~] ~a.~a = ~a.~a"
+                            first
+                            (table-source-name (fkey-table fkey))
+                            c
+                            (table-source-name (fkey-foreign-table fkey))
+                            fc))))))
+
+(defun citus-format-sql-select (source-table target-table)
+  "Return the SQL statement to use to fetch data from the COPY context,
+   including backfilling the distribution key in related tables."
+
+  ;;
+  ;; SELECT from.id, id, ... from source join from-table ...
+  ;;
+  ;; So we must be careful to prefix the column names with the
+  ;; proper table name, because of the join(s), and the first column
+  ;; in the output is taken from the main FROM table (the last one
+  ;; in the rule).
+  ;;
+  (let* ((last-from-rule
+          (car (last (citus-distributed-table-from
+                      (table-citus-rule target-table)))))
+         (cols
+          (append (list
+                   (format nil "~a.~a"
+                           (table-name last-from-rule)
+                           (column-name (first (table-field-list source-table)))))
+                  (mapcar (lambda (field)
+                            (format nil "~a.~a"
+                                    (table-name source-table)
+                                    (column-name field)))
+                          (rest (table-field-list source-table)))))
+         (joins
+          (format-citus-join-clause source-table
+                                    (table-citus-rule target-table))))
+    (format nil
+            "SELECT ~{~a::text~^, ~} FROM ~s.~s ~a"
+            cols
+            (schema-source-name (table-schema source-table))
+            (table-source-name source-table)
+            joins)))
+
+(defun citus-backfill-table-p (table)
+  "Returns non-nil when given TABLE should be backfilled with the
+   distribution key."
+  (and (table-citus-rule table)
+       (typep (table-citus-rule table) 'citus-distributed-table)
+       (not (null (citus-distributed-table-from (table-citus-rule table))))))

From d3b21ac54d090f5d8293b4332309775e820c42b1 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 18 Oct 2018 15:31:29 +0200
Subject: [PATCH 18/69] Implement automatic discovery of the Citus distribution
 rules.

With this patch, the following distribution rule

   distribute companies using id

is equivalent to the following distribution rule set, given foreign keys in
the source schema:

   distribute companies using id
   distribute campaigns using company_id
   distribute ads using company_id from campaigns
   distribute clicks using company_id from ads, campaigns
   distribute impressions using company_id from ads, campaigns

In the current code (of this patch) pgloader walks the foreign-keys
dependency tree and knows how to automatically derive distribution rules
from a single rule and the foreign keys.
---
 src/load/migrate-database.lisp      |  13 +-
 src/package.lisp                    |  33 +++--
 src/parsers/command-distribute.lisp |  12 +-
 src/pgsql/pgsql-ddl-citus.lisp      |  10 +-
 src/utils/catalog.lisp              |   2 +-
 src/utils/citus.lisp                | 210 ++++++++++++++++++++++------
 6 files changed, 212 insertions(+), 68 deletions(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index d99efbf..0a5452e 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -19,8 +19,7 @@
                                      set-table-oids
                                      materialize-views
                                      foreign-keys
-                                     include-drop
-                                     distribute)
+                                     include-drop)
   "Prepare the target PostgreSQL database: create tables casting datatypes
    from the MySQL definitions, prepare index definitions and create target
    tables for materialized views.
@@ -126,10 +125,10 @@
   ;; placement 2299, since DDL has been executed on a connection that is in
   ;; use
   ;;
-  (when distribute
+  (when (catalog-distribution-rules catalog)
     (with-pgsql-transaction (:pgconn (target-db copy))
       (with-stats-collection ("Citus Distribute Tables" :section :pre)
-        (create-distributed-table distribute))))
+        (create-distributed-table (catalog-distribution-rules catalog)))))
 
   ;; log the catalog we just fetched and (maybe) merged
   (log-message :data "CATALOG: ~s" catalog))
@@ -249,7 +248,8 @@
 
   ;; we also support schema changes necessary for Citus distribution
   (when distribute
-    (pgloader.catalog::citus-distribute-schema catalog distribute)))
+    (setf (catalog-distribution-rules catalog)
+          (citus-distribute-schema catalog distribute))))
 
 
 ;;;
@@ -363,8 +363,7 @@
                                   :include-drop include-drop
                                   :foreign-keys foreign-keys
                                   :set-table-oids set-table-oids
-                                  :materialize-views materialize-views
-                                  :distribute distribute)
+                                  :materialize-views materialize-views)
 
           ;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
           ;; to run it.
diff --git a/src/package.lisp b/src/package.lisp
index e1e74bf..0d3e5b6 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -77,6 +77,7 @@
            #:catalog-name
            #:catalog-schema-list
            #:catalog-types-without-btree
+           #:catalog-distribution-rules
 
            #:schema-name
            #:schema-catalog
@@ -209,14 +210,14 @@
            #:match-rule-action
            #:match-rule-args
 
-           #:citus-reference-table
-           #:citus-distributed-table
-           #:make-citus-reference-table
-           #:make-citus-distributed-table
-           #:citus-reference-table-table
-           #:citus-distributed-table-table
-           #:citus-distributed-table-using
-           #:citus-distributed-table-from
+           #:citus-reference-rule
+           #:citus-distributed-rule
+           #:make-citus-reference-rule
+           #:make-citus-distributed-rule
+           #:citus-reference-rule-rule
+           #:citus-distributed-rule-table
+           #:citus-distributed-rule-using
+           #:citus-distributed-rule-from
            #:citus-format-sql-select
            #:citus-backfill-table-p
 
@@ -288,6 +289,16 @@
   (:export #:*queries*
            #:sql))
 
+(defpackage #:pgloader.citus
+  (:use #:cl
+        #:pgloader.params
+        #:pgloader.catalog
+        #:pgloader.quoting
+        #:pgloader.monitor)
+  (:export #:citus-distribute-schema
+           #:citus-format-sql-select
+           #:citus-backfill-table-p))
+
 (defpackage #:pgloader.utils
   (:use #:cl
         #:pgloader.params
@@ -295,7 +306,8 @@
         #:pgloader.quoting
         #:pgloader.catalog
         #:pgloader.monitor
-        #:pgloader.state)
+        #:pgloader.state
+        #:pgloader.citus)
   (:import-from #:alexandria
                 #:appendf
                 #:read-file-into-string)
@@ -326,7 +338,8 @@
   (cl-user::export-inherited-symbols "pgloader.quoting" "pgloader.utils")
   (cl-user::export-inherited-symbols "pgloader.catalog" "pgloader.utils")
   (cl-user::export-inherited-symbols "pgloader.monitor" "pgloader.utils")
-  (cl-user::export-inherited-symbols "pgloader.state"   "pgloader.utils"))
+  (cl-user::export-inherited-symbols "pgloader.state"   "pgloader.utils")
+  (cl-user::export-inherited-symbols "pgloader.citus"   "pgloader.utils"))
 
 
 ;;
diff --git a/src/parsers/command-distribute.lisp b/src/parsers/command-distribute.lisp
index 6ae0b66..70b7c37 100644
--- a/src/parsers/command-distribute.lisp
+++ b/src/parsers/command-distribute.lisp
@@ -25,13 +25,13 @@
 (defrule distribute-reference (and kw-distribute dsn-table-name
                                    kw-as kw-reference kw-table)
   (:lambda (d-r)
-    (make-citus-reference-table :table (create-table-from-dsn-table-name d-r))))
+    (make-citus-reference-rule :table (create-table-from-dsn-table-name d-r))))
 
 (defrule distribute-using (and kw-distribute dsn-table-name
                                kw-using maybe-quoted-namestring)
   (:lambda (d-u)
-    (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u)
-                                  :using (make-column :name (fourth d-u)))))
+    (make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u)
+                                 :using (make-column :name (fourth d-u)))))
 
 ;;;
 ;;; The namestring rule allows for commas and we use them as a separator
@@ -62,9 +62,9 @@
                                     kw-using maybe-quoted-namestring
                                     kw-from distribute-from-list)
   (:lambda (d-u-f)
-    (make-citus-distributed-table :table (create-table-from-dsn-table-name d-u-f)
-                                  :using (make-column :name (fourth d-u-f))
-                                  :from (mapcar #'create-table (sixth d-u-f)))))
+    (make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u-f)
+                                 :using (make-column :name (fourth d-u-f))
+                                 :from (mapcar #'create-table (sixth d-u-f)))))
 
 (defrule distribute-commands (+ (or distribute-using-from
                                     distribute-using
diff --git a/src/pgsql/pgsql-ddl-citus.lisp b/src/pgsql/pgsql-ddl-citus.lisp
index f74ade5..d5cd050 100644
--- a/src/pgsql/pgsql-ddl-citus.lisp
+++ b/src/pgsql/pgsql-ddl-citus.lisp
@@ -4,15 +4,15 @@
 
 (in-package :pgloader.pgsql)
 
-(defmethod format-create-sql ((rule citus-reference-table)
+(defmethod format-create-sql ((rule citus-reference-rule)
                               &key (stream nil) if-not-exists)
   (declare (ignore if-not-exists))
   (format stream "SELECT create_reference_table('~a');"
-          (format-table-name (citus-reference-table-table rule))))
+          (format-table-name (citus-reference-rule-table rule))))
 
-(defmethod format-create-sql ((rule citus-distributed-table)
+(defmethod format-create-sql ((rule citus-distributed-rule)
                               &key (stream nil) if-not-exists)
   (declare (ignore if-not-exists))
   (format stream "SELECT create_distributed_table('~a', '~a');"
-          (format-table-name (citus-distributed-table-table rule))
-          (column-name (citus-distributed-table-using rule))))
+          (format-table-name (citus-distributed-rule-table rule))
+          (column-name (citus-distributed-rule-using rule))))
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index 6b29aad..baca81a 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -42,7 +42,7 @@
 ;;; Column structures details depend on the specific source type and are
 ;;; implemented in each source separately.
 ;;;
-(defstruct catalog name schema-list types-without-btree)
+(defstruct catalog name schema-list types-without-btree distribution-rules)
 
 (defstruct schema source-name name catalog in-search-path
            table-list view-list extension-list sqltype-list)
diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
index 0cdcc9b..c1b73b8 100644
--- a/src/utils/citus.lisp
+++ b/src/utils/citus.lisp
@@ -17,58 +17,182 @@
 |#
 
 
-(in-package #:pgloader.catalog)
+(in-package #:pgloader.citus)
 
-(defstruct citus-reference-table table)
-(defstruct citus-distributed-table table using from)
+;;;
+;;; Main data structures to host our distribution rules.
+;;;
+(defstruct citus-reference-rule table)
+(defstruct citus-distributed-rule table using from)
 
 (defun citus-distribute-schema (catalog distribution-rules)
-  "Distribute a CATALOG with given user provided DISTRIBUTION-RULES."
-  (loop :for rule :in distribution-rules
-     :do (let ((table (citus-find-table catalog (citus-rule-table rule))))
-           (apply-citus-rule rule table))))
+  "Distribute a CATALOG with given user provided DISTRIBUTION-RULES. Return
+   the list of rules applied."
+  (let ((processed-rules '())
+        (derived-rules
+         (loop :for rule :in distribution-rules
+            :append (progn
+                      (citus-set-table rule catalog)
+                      (compute-foreign-rules rule (citus-rule-table rule))))))
 
-(defun citus-rule-table (rule)
-  (etypecase rule
-    (citus-reference-table (citus-reference-table-table rule))
-    (citus-distributed-table (citus-distributed-table-table rule))))
+    ;;
+    ;; Apply rules only once.
+    ;;
+    ;; ERROR Database error 42P16: table ;; "campaigns" is already distributed
+    ;;
+    (loop :for rule :in (append distribution-rules derived-rules)
+       :unless (member (table-oid (citus-rule-table rule))
+                       processed-rules
+                       :key (lambda (rule)
+                              (table-oid (citus-rule-table rule))))
+       :collect (progn
+                  (push rule processed-rules)
+                  (apply-citus-rule rule)
+                  rule))))
 
 (defun citus-find-table (catalog table)
   (let* ((table-name  (table-name table))
          (schema-name (schema-name (table-schema table))))
     (find-table (find-schema catalog schema-name) table-name)))
 
-(defgeneric apply-citus-rule (rule table)
+(defgeneric citus-rule-table (rule)
+  (:documentation "Returns the RULE's table.")
+  (:method ((rule citus-reference-rule))   (citus-reference-rule-table rule))
+  (:method ((rule citus-distributed-rule)) (citus-distributed-rule-table rule)))
+
+(defgeneric citus-set-table (rule catalog)
+  (:documentation "Find citus RULE table in CATALOG and update the
+  placeholder with the table found there.")
+  (:method ((rule citus-reference-rule) (catalog catalog))
+    (let ((table (citus-reference-rule-table rule)))
+      (setf (citus-reference-rule-table rule)
+            (citus-find-table catalog table))))
+
+  (:method ((rule citus-distributed-rule) (catalog catalog))
+    (let ((table (citus-distributed-rule-table rule)))
+      (map-into (citus-distributed-rule-from rule)
+                (lambda (from) (citus-find-table catalog from))
+                (citus-distributed-rule-from rule))
+      (setf (citus-distributed-rule-table rule)
+            (citus-find-table catalog table)))))
+
+(defmethod print-object ((table citus-reference-rule) stream)
+  (print-unreadable-object (table stream :type t :identity t)
+    (with-slots (table) table
+      (format stream "distribute ~a as reference" (format-table-name table)))))
+
+(defmethod print-object ((table citus-distributed-rule) stream)
+  (print-unreadable-object (table stream :type t :identity t)
+    (with-slots (table using from) table
+      (format stream
+              "distribute ~a :using ~a~@[ :from ~{~a~^, ~}~]"
+              (format-table-name table)
+              (column-name using)
+              (mapcar #'format-table-name from)))))
+
+
+;;;
+;;; When distributing a table on a given key, we can follow foreign keys
+;;; pointing to this table. We might find out that when computing the
+;;; following rule:
+;;;
+;;;    distribute companies using id
+;;;
+;;; We then want to add the set of rules that we find walking the foreign
+;;; keys:
+;;;
+;;;   distribute campaigns using company_id
+;;;   distribute ads using company_id from campaigns
+;;;   distribute clicks using company_id from ads, campaigns
+;;;   distribute impressions using company_id from ads, campaigns
+;;;
+(defgeneric compute-foreign-rules (rule table &key)
+  (:documentation
+   "Compute rules to apply that derive from the distribution rule RULE when
+    following foreign-keys from TABLE."))
+
+(defmethod compute-foreign-rules ((rule citus-reference-rule)
+                                  (table table)
+                                  &key)
+  "There's nothing to do here, reference table doesn't impact the schema."
+  nil)
+
+(defmethod compute-foreign-rules ((rule citus-distributed-rule)
+                                  (table table)
+                                  &key fkey-list)
+  "Find every foreign key that points to TABLE and add return a list of new
+   rules for the source of those foreign keys."
+  (let ((pkey  (find-if #'index-primary (table-index-list table))))
+
+    (when (and pkey (member (column-name (citus-distributed-rule-using rule))
+                            (index-columns pkey)
+                            :test #'string=))
+      (loop :for fkey :in (index-fk-deps pkey)
+         :for new-fkey-list := (cons fkey fkey-list)
+         :for new-rule := (make-distributed-table-from-fkey rule new-fkey-list)
+         :collect new-rule :into new-rule-list
+         :collect (compute-foreign-rules rule (fkey-table fkey)
+                                         :fkey-list new-fkey-list)
+         :into dep-rule-list
+         :finally (return (append new-rule-list
+                                  ;; flatten sub-lists as we go
+                                  (apply #'append dep-rule-list)))))))
+
+(defun make-distributed-table-from-fkey (rule fkey-list)
+  "Make a new Citus distributed table rule from an existing rule and a fkey
+   definition."
+  ;;
+  ;; We have a list of foreign keys pointing from a current table,
+  ;; (fkey-table fkey), to the root table that is distributed,
+  ;; (fkey-foreign-table fkey).
+  ;;
+  ;; For the distribution key name, we consider the name of the column used
+  ;; in the last entry from the fkey-list, the column name that points to
+  ;; the root.id distribution key and might be named root_id or something.
+  ;;
+  ;; Then we only need to specifying USING the intermediate tables, the last
+  ;; entry gives us the data we need to backfill our tables.
+  ;;
+  (let* ((fkey     (car (last fkey-list)))
+         (dist-key (column-name (citus-distributed-rule-using rule)))
+         (dist-key-pos (position dist-key
+                                 (fkey-foreign-columns fkey)
+                                 :test #'string=))
+         (fkey-table-dist-key (nth dist-key-pos (fkey-columns fkey)))
+         (from-table-list (butlast (mapcar #'fkey-foreign-table fkey-list))))
+    (make-citus-distributed-rule :table (fkey-table (first fkey-list))
+                                  :using (make-column :name fkey-table-dist-key)
+                                  :from from-table-list)))
+
+
+;;;
+;;; Apply a citus distribution rule to given table, and store the rule
+;;; itself to the table-citus-rule slot so that we later know to generate a
+;;; proper SELECT query that includes the backfilling.
+;;;
+(defgeneric apply-citus-rule (rule)
   (:documentation "Apply a Citus distribution RULE to given TABLE."))
 
-(defmethod apply-citus-rule ((rule citus-reference-table) (table table))
+(defmethod apply-citus-rule ((rule citus-reference-rule))
   ;; for a reference table, we have nothing to do really.
-  (setf (table-citus-rule table) rule))
-
-(defmethod apply-citus-rule ((rule citus-distributed-table) (table table))
-  (setf (table-citus-rule table) rule)
-
-  ;;
-  ;; Replace the TABLE placeholders in the :FROM slot of the rule with the
-  ;; tables from the catalogs.
-  ;;
-  (when (citus-distributed-table-from rule)
-    (let ((catalog (schema-catalog (table-schema table))))
-     (map-into (citus-distributed-table-from rule)
-               (lambda (from) (citus-find-table catalog from))
-               (citus-distributed-table-from rule))))
+  (setf (table-citus-rule (citus-reference-rule-table rule)) rule)
+  t)
 
+(defmethod apply-citus-rule ((rule citus-distributed-rule))
   ;; ok now we need to check if the USING column exists or if we need to add
   ;; it to our model
-  (let ((column (find (column-name (citus-distributed-table-using rule))
-                      (table-field-list table)
-                      :test #'string=
-                      :key #'column-name)))
+  (setf (table-citus-rule (citus-distributed-rule-table rule)) rule)
+
+  (let* ((table   (citus-distributed-rule-table rule))
+         (column (find (column-name (citus-distributed-rule-using rule))
+                       (table-field-list table)
+                       :test #'string=
+                       :key #'column-name)))
     (if column
 
         ;; add it to the PKEY definition, in first position
         (add-column-to-pkey table
-                            (column-name (citus-distributed-table-using rule)))
+                            (column-name (citus-distributed-rule-using rule)))
 
         ;; The column doesn't exist, we need to find it in the :FROM rule's
         ;; list. The :FROM slot of the rule is a list of tables to
@@ -78,9 +202,9 @@
         ;;
         ;; To find the column definition to add to the current TABLE, look
         ;; it up in the last entry of the FROM rule's list.
-        (let* ((last-from-rule (car (last (citus-distributed-table-from rule))))
+        (let* ((last-from-rule (car (last (citus-distributed-rule-from rule))))
                (column-definition
-                (find (column-name (citus-distributed-table-using rule))
+                (find (column-name (citus-distributed-rule-using rule))
                       (table-field-list last-from-rule)
                       :test #'string=
                       :key #'column-name))
@@ -122,13 +246,18 @@
          :do (push column-name (fkey-foreign-columns fkey))
          :do (setf (fkey-condef fkey) nil)))))
 
-
+
+;;;
+;;; Format a query for backfilling the data right from pgloader:
+;;;
+;;;   SELECT dist_key, * FROM source JOIN pivot ON ...
+;;;
 (defun format-citus-join-clause (table distribution-rule)
   "Format a JOIN clause to backfill the distribution key data in tables that
    are referencing (even indirectly) the main distribution table."
   (with-output-to-string (s)
     (loop :for current-table := table :then rel
-       :for rel :in (citus-distributed-table-from distribution-rule)
+       :for rel :in (citus-distributed-rule-from distribution-rule)
        :do (let* ((fkey
                    (find (ensure-unquoted (table-name rel))
                          (table-fkey-list current-table)
@@ -171,7 +300,7 @@
   ;; in the rule).
   ;;
   (let* ((last-from-rule
-          (car (last (citus-distributed-table-from
+          (car (last (citus-distributed-rule-from
                       (table-citus-rule target-table)))))
          (cols
           (append (list
@@ -193,9 +322,12 @@
             (table-source-name source-table)
             joins)))
 
+;;;
+;;; Predicate to see if a table needs backfilling
+;;;
 (defun citus-backfill-table-p (table)
   "Returns non-nil when given TABLE should be backfilled with the
    distribution key."
   (and (table-citus-rule table)
-       (typep (table-citus-rule table) 'citus-distributed-table)
-       (not (null (citus-distributed-table-from (table-citus-rule table))))))
+       (typep (table-citus-rule table) 'citus-distributed-rule)
+       (not (null (citus-distributed-rule-from (table-citus-rule table))))))

From 7b487ddacaf38a96acac29c29e815f0d608b59b5 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 18 Oct 2018 15:42:17 +0200
Subject: [PATCH 19/69] Add a Citus distribution test case, from the citus
 tutorial.

---
 test/citus/.gitignore   |  1 +
 test/citus/Makefile     | 20 ++++++++++++
 test/citus/README.md    | 42 +++++++++++++++++++++++++
 test/citus/company.load | 12 ++++++++
 test/citus/company.sql  | 51 +++++++++++++++++++++++++++++++
 test/citus/copy.sql     |  5 +++
 test/citus/data.load    | 68 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 199 insertions(+)
 create mode 100644 test/citus/.gitignore
 create mode 100644 test/citus/Makefile
 create mode 100644 test/citus/README.md
 create mode 100644 test/citus/company.load
 create mode 100644 test/citus/company.sql
 create mode 100644 test/citus/copy.sql
 create mode 100644 test/citus/data.load

diff --git a/test/citus/.gitignore b/test/citus/.gitignore
new file mode 100644
index 0000000..16f2dc5
--- /dev/null
+++ b/test/citus/.gitignore
@@ -0,0 +1 @@
+*.csv
\ No newline at end of file
diff --git a/test/citus/Makefile b/test/citus/Makefile
new file mode 100644
index 0000000..0c4c5b9
--- /dev/null
+++ b/test/citus/Makefile
@@ -0,0 +1,20 @@
+DATASET = companies campaigns ads clicks impressions geo_ips
+CSV     = $(addsuffix .csv,$(DATASET))
+DROP = DROP TABLE IF EXISTS companies, campaigns, ads, clicks, impressions, geo_ips
+
+all: schema data ;
+
+schema:
+	psql --single-transaction -c "$(DROP)" -d hackathon
+	psql --single-transaction -f company.sql -d hackathon
+
+data: fetch
+	psql -f copy.sql -d hackathon
+	../../build/bin/pgloader ./data.load
+
+fetch: $(CSV) ;
+
+%.csv:
+	curl -O https://examples.citusdata.com/mt_ref_arch/$@
+
+.PHONY: schema data fetch
diff --git a/test/citus/README.md b/test/citus/README.md
new file mode 100644
index 0000000..499ecd6
--- /dev/null
+++ b/test/citus/README.md
@@ -0,0 +1,42 @@
+# Citus Multi-Tenant Automatic Distribution
+
+In this test case we follow the following documentation:
+
+  https://docs.citusdata.com/en/v7.5/use_cases/multi_tenant.html
+  
+We install the schema before Citus migration, and load the data without the
+backfilling that is already done. For that we use pgloader to ignore the
+company_id column in the tables that didn't have this column prior to the
+Citus migration effort.
+
+Then the following `company.load` file contains the pgloader command that
+runs a full migration from PostgreSQL to Citus:
+
+```
+load database
+   from pgsql:///hackathon
+   into pgsql://localhost:9700/dim
+
+   with include drop, reset no sequences
+
+   distribute companies using id;
+```
+
+Tables are marked distributed, the company_id column is added where it's
+needed, primary keys and foreign keys definitions are altered to the new
+model, and finally the data is backfilled automatically in the target table
+thanks to generating queries like the following:
+
+~~~
+SELECT "campaigns".company_id::text,
+       "impressions".id::text,
+       "impressions".ad_id::text,
+       "impressions".seen_at::text,
+       "impressions".site_url::text,
+       "impressions".cost_per_impression_usd::text,
+       "impressions".user_ip::text,
+       "impressions".user_data::text
+  FROM "public"."impressions"  
+        JOIN "public"."ads" ON impressions.ad_id = ads.id
+        JOIN "public"."campaigns" ON ads.campaign_id = campaigns.id
+~~~
diff --git a/test/citus/company.load b/test/citus/company.load
new file mode 100644
index 0000000..ef4af21
--- /dev/null
+++ b/test/citus/company.load
@@ -0,0 +1,12 @@
+load database
+   from pgsql:///hackathon
+   into pgsql://localhost:9700/dim
+
+   with include drop, reset no sequences
+
+   distribute companies using id
+   -- distribute campaigns using company_id
+   -- distribute ads using company_id from campaigns
+   -- distribute clicks using company_id from ads, campaigns
+   -- distribute impressions using company_id from ads, campaigns
+   ;
diff --git a/test/citus/company.sql b/test/citus/company.sql
new file mode 100644
index 0000000..dad23dc
--- /dev/null
+++ b/test/citus/company.sql
@@ -0,0 +1,51 @@
+CREATE TABLE companies (
+  id bigserial PRIMARY KEY,
+  name text NOT NULL,
+  image_url text,
+  created_at timestamp without time zone NOT NULL,
+  updated_at timestamp without time zone NOT NULL
+);
+
+CREATE TABLE campaigns (
+  id bigserial PRIMARY KEY,
+  company_id bigint REFERENCES companies (id),
+  name text NOT NULL,
+  cost_model text NOT NULL,
+  state text NOT NULL,
+  monthly_budget bigint,
+  blacklisted_site_urls text[],
+  created_at timestamp without time zone NOT NULL,
+  updated_at timestamp without time zone NOT NULL
+);
+
+CREATE TABLE ads (
+  id bigserial PRIMARY KEY,
+  campaign_id bigint REFERENCES campaigns (id),
+  name text NOT NULL,
+  image_url text,
+  target_url text,
+  impressions_count bigint DEFAULT 0,
+  clicks_count bigint DEFAULT 0,
+  created_at timestamp without time zone NOT NULL,
+  updated_at timestamp without time zone NOT NULL
+);
+
+CREATE TABLE clicks (
+  id bigserial PRIMARY KEY,
+  ad_id bigint REFERENCES ads (id),
+  clicked_at timestamp without time zone NOT NULL,
+  site_url text NOT NULL,
+  cost_per_click_usd numeric(20,10),
+  user_ip inet NOT NULL,
+  user_data jsonb NOT NULL
+);
+
+CREATE TABLE impressions (
+  id bigserial PRIMARY KEY,
+  ad_id bigint REFERENCES ads (id),
+  seen_at timestamp without time zone NOT NULL,
+  site_url text NOT NULL,
+  cost_per_impression_usd numeric(20,10),
+  user_ip inet NOT NULL,
+  user_data jsonb NOT NULL
+);
diff --git a/test/citus/copy.sql b/test/citus/copy.sql
new file mode 100644
index 0000000..684f891
--- /dev/null
+++ b/test/citus/copy.sql
@@ -0,0 +1,5 @@
+\copy companies from 'companies.csv' with csv
+\copy campaigns from 'campaigns.csv' with csv
+-- \copy ads from 'ads.csv' with csv
+-- \copy clicks from 'clicks.csv' with csv
+-- \copy impressions from 'impressions.csv' with csv
diff --git a/test/citus/data.load b/test/citus/data.load
new file mode 100644
index 0000000..cbb29b0
--- /dev/null
+++ b/test/citus/data.load
@@ -0,0 +1,68 @@
+--
+-- Ads
+--
+load csv
+  from ads.csv
+  (
+    id, company_id, campaign_id, name, image_url, target_url,
+    impressions_count, clicks_count, created_at, updated_at
+  )
+  
+  into postgresql:///hackathon
+  
+  target table ads
+  target columns
+  (
+    id, campaign_id, name, image_url, target_url,
+    impressions_count, clicks_count, created_at, updated_at
+  )
+
+  with fields optionally enclosed by '"',
+       fields escaped by double-quote,
+       fields terminated by ',';
+
+--
+-- Clicks
+--
+load csv
+  from clicks.csv
+  (
+   id, company_id, ad_id, clicked_at, site_url, cost_per_click_usd, 
+   user_ip, user_data
+  )
+  
+  into postgresql:///hackathon
+  
+  target table clicks
+  target columns
+  (
+   id, ad_id, clicked_at, site_url, cost_per_click_usd, user_ip, user_data
+  )
+
+  with fields optionally enclosed by '"',
+       fields escaped by double-quote,
+       fields terminated by ',';
+
+
+--
+-- Impressions
+--
+load csv
+  from impressions.csv
+  (
+    id, company_id, ad_id, seen_at, site_url,
+    cost_per_impression_usd, user_ip, user_data
+  )
+  
+  into postgresql:///hackathon
+  
+  target table impressions
+  target columns
+  (
+    id, ad_id, seen_at, site_url, cost_per_impression_usd, user_ip, user_data
+  )
+
+  with drop indexes,
+       fields optionally enclosed by '"',
+       fields escaped by double-quote,
+       fields terminated by ',';
\ No newline at end of file

From 0e6f599282e9f799ec47e8c32dde95d3b0c13201 Mon Sep 17 00:00:00 2001
From: Larry Gebhardt <larry@cerebris.com>
Date: Thu, 18 Oct 2018 12:55:56 -0400
Subject: [PATCH 20/69] Add Docker build instructions (#853)

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index d99cc2d..a3b3049 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,16 @@ pgloader:
 
 <https://github.com/dimitri/pgloader/issues?utf8=✓&q=label%3A%22Windows%20support%22%20>
 
+### Building Docker image from sources
+
+You can build a Docker image from source using SBCL by default:
+
+  $ docker build .
+
+Or Clozure CL (CCL):
+
+  $ docker build -f Dockerfile.ccl .
+
 ## More options when building from source
 
 The `Makefile` target `pgloader` knows how to produce a Self Contained

From 6e7ea9080693c68368fc13075a5360e2bd37bec7 Mon Sep 17 00:00:00 2001
From: Jason Rigby <jasonrig@users.noreply.github.com>
Date: Fri, 19 Oct 2018 03:56:40 +1100
Subject: [PATCH 21/69] add cl-ironclad and cl-babel dependencies to docker
 builds (#854)

---
 Dockerfile     | 4 +++-
 Dockerfile.ccl | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0500aa2..ea6a08c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,6 +18,8 @@ FROM debian:stable-slim as builder
         time \
         unzip \
         wget \
+        cl-ironclad \
+        cl-babel \
       && rm -rf /var/lib/apt/lists/*
 
   COPY ./ /opt/src/pgloader
@@ -42,4 +44,4 @@ FROM debian:stable-slim
 
   COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
 
-  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
\ No newline at end of file
+  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
diff --git a/Dockerfile.ccl b/Dockerfile.ccl
index f88468a..9377fe0 100644
--- a/Dockerfile.ccl
+++ b/Dockerfile.ccl
@@ -18,6 +18,8 @@ FROM debian:stable-slim as builder
         time \
         unzip \
         wget \
+        cl-ironclad \
+        cl-babel \
       && rm -rf /var/lib/apt/lists/*
 
   RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \
@@ -46,4 +48,4 @@ FROM debian:stable-slim
 
   COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
 
-  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
\ No newline at end of file
+  LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"

From f8460c17056df902a10bb08596beed610749a2af Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sat, 20 Oct 2018 19:28:19 +0200
Subject: [PATCH 22/69] Allow usernames and dbnames starting with digits
 (again).

It turns out that the rules about the names of users and databases are more
lax than pgloader would know, so it might be a good move for our DSN parsing
to accept more values and then let the source/target systems to complain
when something goes wrong.

See #230 which got broke again somewhere.
---
 src/parsers/command-db-uri.lisp | 11 ++++++-----
 src/parsers/command-mysql.lisp  |  5 +----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/parsers/command-db-uri.lisp b/src/parsers/command-db-uri.lisp
index 7420e01..339d45f 100644
--- a/src/parsers/command-db-uri.lisp
+++ b/src/parsers/command-db-uri.lisp
@@ -25,7 +25,7 @@
 (defrule doubled-at-sign (and "@@") (:constant "@"))
 (defrule doubled-colon   (and "::") (:constant ":"))
 (defrule password (+ (or (not "@") doubled-at-sign)) (:text t))
-(defrule username (and (or #\_ (alpha-char-p character))
+(defrule username (and (or #\_ (alpha-char-p character) (digit-char-p character))
                        (* (or (alpha-char-p character)
                               (digit-char-p character)
                               #\.
@@ -87,10 +87,11 @@
       (append (list :host (when host (process-hostname host)))
               port))))
 
-(defrule dsn-dbname (and "/" (? maybe-quoted-namestring))
-  (:destructure (slash dbname)
-		(declare (ignore slash))
-		(list :dbname dbname)))
+(defrule dsn-dbname (and "/" (? (* (or (alpha-char-p character)
+                                       (digit-char-p character)
+                                       punct))))
+  (:lambda (dbn)
+    (list :dbname (text (second dbn)))))
 
 (defrule dsn-option-ssl-disable "disable" (:constant :no))
 (defrule dsn-option-ssl-allow   "allow"   (:constant :try))
diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp
index 703515f..bbae776 100644
--- a/src/parsers/command-mysql.lisp
+++ b/src/parsers/command-mysql.lisp
@@ -95,9 +95,6 @@
 
 (defrule mysql-prefix "mysql://" (:constant (list :type :mysql)))
 
-(defrule mysql-dsn-dbname (and "/" maybe-quoted-namestring)
-  (:lambda (m-d-d) (list :dbname (text (second m-d-d)))))
-
 (defrule mysql-dsn-option-usessl-true  "true"  (:constant :yes))
 (defrule mysql-dsn-option-usessl-false "false" (:constant :no))
 
@@ -123,7 +120,7 @@
 (defrule mysql-uri (and mysql-prefix
                         (? dsn-user-password)
                         (? dsn-hostname)
-                        mysql-dsn-dbname
+                        dsn-dbname
                         (? mysql-dsn-options))
   (:lambda (uri)
     (destructuring-bind (&key type

From 207cd82726ef5a88554bf2bf59303a876027d704 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 7 Nov 2018 11:01:06 +0100
Subject: [PATCH 23/69] Improve SQLite type names parsing.

Allow spaces in more random places, as SQLite doesn't seem to normalize the
user input. Fixes #548 again.
---
 src/parsers/command-csv.lisp            |  5 -----
 src/parsers/command-utils.lisp          |  8 ++++++++
 src/parsers/parse-sqlite-type-name.lisp | 14 ++++++++------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp
index 1c2afa4..c7e4e11 100644
--- a/src/parsers/command-csv.lisp
+++ b/src/parsers/command-csv.lisp
@@ -231,11 +231,6 @@
     (destructuring-bind (field1 fields) source
       (list* field1 fields))))
 
-(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
-  (:constant :open-paren))
-(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
-  (:constant :close-paren))
-
 (defrule having-fields (and kw-having kw-fields) (:constant nil))
 
 (defrule csv-source-field-list (and (? having-fields)
diff --git a/src/parsers/command-utils.lisp b/src/parsers/command-utils.lisp
index 4383ab3..ebc476d 100644
--- a/src/parsers/command-utils.lisp
+++ b/src/parsers/command-utils.lisp
@@ -57,3 +57,11 @@
                                      quoted-namestring
                                      namestring))
 
+(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
+  (:constant :open-paren))
+
+(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
+  (:constant :close-paren))
+
+(defrule comma-separator (and ignore-whitespace #\, ignore-whitespace)
+  (:constant ","))
diff --git a/src/parsers/parse-sqlite-type-name.lisp b/src/parsers/parse-sqlite-type-name.lisp
index 81d9a0a..09eb0e8 100644
--- a/src/parsers/parse-sqlite-type-name.lisp
+++ b/src/parsers/parse-sqlite-type-name.lisp
@@ -15,14 +15,16 @@
                                (? " "))
   (:lambda (noise) (second noise)))
 
-(defrule sqlite-single-typemod (and #\( (+ (digit-char-p character)) #\))
+(defrule sqlite-single-typemod (and open-paren
+                                    (+ (digit-char-p character))
+                                    close-paren)
   (:lambda (st) (cons (parse-integer (text (second st))) nil)))
 
-(defrule sqlite-double-typemod (and #\(
+(defrule sqlite-double-typemod (and open-paren
                                     (+ (digit-char-p character))
-                                    (* (or #\, #\Space))
+                                    comma-separator
                                     (+ (digit-char-p character))
-                                    #\))
+                                    close-paren)
   (:lambda (dt) (cons (parse-integer (text (second dt)))
                       (parse-integer (text (fourth dt))))))
 
@@ -31,9 +33,9 @@
 (defrule sqlite-type-name (and (* extra-qualifiers)
                                (+ (alpha-char-p character))
                                (* extra-qualifiers)
-                               (* #\Space)
+                               ignore-whitespace
                                (? sqlite-typemod)
-                               (* #\Space)
+                               ignore-whitespace
                                (* extra-qualifiers))
   (:lambda (tn) (list (text (second tn))
                       (fifth tn)

From 794bc7fc6436ca05a4208fbb5357b8d1791a9b3a Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 7 Nov 2018 21:05:59 +0100
Subject: [PATCH 24/69] Improve redshift support: string_agg() doesn't exist
 there.

Neither does array_agg(), unnest() and other very useful PostgreSQL
functions. Redshift is from 8.0 times, so do things the old way: parse the
output of the index definition that get from calling pg_index_def().

For that, this patch introduces the notion of SQL support that depends on
PostgreSQL major version. If no major-version specific query is found in the
pgloader source tree, then we use the generic one.

Fixes #860.
---
 src/load/load-file.lisp                |  3 +-
 src/load/migrate-database.lisp         |  8 +++--
 src/package.lisp                       |  3 +-
 src/pgsql/pgsql-schema.lisp            | 42 +++++++++++++++++++++++---
 src/pgsql/sql/8.0/README.md            |  4 +++
 src/pgsql/sql/8.0/list-all-indexes.sql | 29 ++++++++++++++++++
 src/sources/pgsql/pgsql.lisp           |  6 ++--
 src/utils/queries.lisp                 | 19 ++++++++++++
 8 files changed, 102 insertions(+), 12 deletions(-)
 create mode 100644 src/pgsql/sql/8.0/README.md
 create mode 100644 src/pgsql/sql/8.0/list-all-indexes.sql

diff --git a/src/load/load-file.lisp b/src/load/load-file.lisp
index 7d36d12..02ddb45 100644
--- a/src/load/load-file.lisp
+++ b/src/load/load-file.lisp
@@ -44,7 +44,8 @@
           (setf pgsql-catalog
                 (fetch-pgsql-catalog (db-name pgconn)
                                      :table (target copy)
-                                     :variant (pgconn-variant pgconn)))
+                                     :variant (pgconn-variant pgconn)
+                                     :pgversion (pgconn-major-version pgconn)))
 
           ;; if the user didn't tell us the column list of the table, now is
           ;; a proper time to set it in the copy object
diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 0a5452e..3037571 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -70,9 +70,11 @@
           ;;
           ;; to be able to do that properly, get the constraints from
           ;; the pre-existing target database catalog
-          (let ((pgsql-catalog
-                 (fetch-pgsql-catalog (db-name (target-db copy))
-                                      :source-catalog catalog)))
+          (let* ((pgversion   (pgconn-major-version (target-db copy)))
+                 (pgsql-catalog
+                  (fetch-pgsql-catalog (db-name (target-db copy))
+                                       :source-catalog catalog
+                                       :pgversion pgversion)))
             (merge-catalogs catalog pgsql-catalog))
 
           ;; now the foreign keys and only then the indexes, because a
diff --git a/src/package.lisp b/src/package.lisp
index 0d3e5b6..20820ab 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -287,7 +287,8 @@
 (defpackage #:pgloader.queries
   (:use #:cl #:pgloader.params)
   (:export #:*queries*
-           #:sql))
+           #:sql
+           #:sql-url-for-variant))
 
 (defpackage #:pgloader.citus
   (:use #:cl
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index 59f33f0..0b98459 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -10,7 +10,8 @@
                               source-catalog
                               including
                               excluding
-                              (variant :pgdg))
+                              (variant :pgdg)
+                              pgversion)
   "Fetch PostgreSQL catalogs for the target database. A PostgreSQL
    connection must be opened."
   (let* ((*identifier-case* :quote)
@@ -35,7 +36,8 @@
 
     (list-all-indexes catalog
                       :including including
-                      :excluding excluding)
+                      :excluding excluding
+                      :pgversion pgversion)
 
     (when (eq :pgdg variant)
       (list-all-fkeys catalog
@@ -193,7 +195,7 @@
        (add-field table field))
      :finally (return catalog)))
 
-(defun list-all-indexes (catalog &key including excluding)
+(defun list-all-indexes (catalog &key including excluding pgversion)
   "Get the list of PostgreSQL index definitions per table."
   (loop
      :for (schema-name name oid
@@ -201,7 +203,9 @@
                        primary unique cols sql conname condef)
      :in (query nil
                 (format nil
-                        (sql "/pgsql/list-all-indexes.sql")
+                        (sql (sql-url-for-variant "pgsql"
+                                                  "list-all-indexes.sql"
+                                                  pgversion))
                         including       ; do we print the clause?
                         (filter-list-to-where-clause including
                                                      nil
@@ -215,6 +219,7 @@
      :do (let* ((schema   (find-schema catalog schema-name))
                 (tschema  (find-schema catalog table-schema))
                 (table    (find-table tschema table-name))
+                (columns  (parse-index-column-names cols sql))
                 (pg-index
                  (make-index :name (ensure-quoted name)
                              :oid oid
@@ -222,7 +227,7 @@
                              :table table
                              :primary primary
                              :unique unique
-                             :columns (split-sequence:split-sequence #\, cols)
+                             :columns columns
                              :sql sql
                              :conname (unless (eq :null conname)
                                         (ensure-quoted conname))
@@ -438,3 +443,30 @@
            ;; going to take care of creating the type.
            (add-sqltype schema sqltype)))
      :finally (return catalog)))
+
+
+
+;;;
+;;; Extra utils like parsing a list of column names from an index definition.
+;;;
+(defun parse-index-column-names (columns index-definition)
+  "Return a list of column names for the given index."
+  (if (and columns (not (eq :null columns)))
+      ;; the normal case, no much parsing to do, the data has been prepared
+      ;; for us in the SQL query
+      (split-sequence:split-sequence #\, columns)
+
+      ;; the redshift variant case, where there's no way to string_agg or
+      ;; even array_to_string(array_agg(...)) and so we need to parse the
+      ;; index-definition instead.
+      ;;
+      ;; CREATE UNIQUE INDEX pg_amproc_opc_proc_index ON pg_amproc USING btree (amopclaid, amprocsubtype, amprocnum)
+      (when index-definition
+        (let ((open-paren-pos  (position #\( index-definition))
+              (close-paren-pos (position #\) index-definition)))
+          (when (and open-paren-pos close-paren-pos)
+            (mapcar (lambda (colname) (string-trim " " colname))
+                    (split-sequence:split-sequence #\,
+                                                   index-definition
+                                                   :start (+ 1 open-paren-pos)
+                                                   :end close-paren-pos)))))))
diff --git a/src/pgsql/sql/8.0/README.md b/src/pgsql/sql/8.0/README.md
new file mode 100644
index 0000000..dc4eddb
--- /dev/null
+++ b/src/pgsql/sql/8.0/README.md
@@ -0,0 +1,4 @@
+Redshift is a fork of PostgreSQL 8.0, and our catalog queries must then
+target this old PostgreSQL version to work on Redshift. Parts of what we
+would usually implement in SQL is implemented in pgloader code instead, in
+order to support such an old PostgreSQL version.
diff --git a/src/pgsql/sql/8.0/list-all-indexes.sql b/src/pgsql/sql/8.0/list-all-indexes.sql
new file mode 100644
index 0000000..2efc8ce
--- /dev/null
+++ b/src/pgsql/sql/8.0/list-all-indexes.sql
@@ -0,0 +1,29 @@
+-- params: including
+--         filter-list-to-where-clause for including
+--         excluding
+--         filter-list-to-where-clause for excluding
+  select n.nspname,
+         i.relname,
+         i.oid,
+         rn.nspname,
+         r.relname,
+         indisprimary,
+         indisunique,
+         null,
+         pg_get_indexdef(indexrelid),
+         c.conname,
+         pg_get_constraintdef(c.oid)
+    from pg_index x
+         join pg_class i ON i.oid = x.indexrelid
+         join pg_class r ON r.oid = x.indrelid
+         join pg_namespace n ON n.oid = i.relnamespace
+         join pg_namespace rn ON rn.oid = r.relnamespace
+         left join pg_depend d on d.classid = 'pg_class'::regclass
+                              and d.objid = i.oid
+                              and d.refclassid = 'pg_constraint'::regclass
+                              and d.deptype = 'i'
+         left join pg_constraint c ON c.oid = d.refobjid
+   where n.nspname !~~ '^pg_' and n.nspname <> 'information_schema'
+         ~:[~*~;and (~{~a~^~&~10t or ~})~]
+         ~:[~*~;and (~{~a~^~&~10t and ~})~]
+order by n.nspname, r.relname;
diff --git a/src/sources/pgsql/pgsql.lisp b/src/sources/pgsql/pgsql.lisp
index da6d611..d62038c 100644
--- a/src/sources/pgsql/pgsql.lisp
+++ b/src/sources/pgsql/pgsql.lisp
@@ -82,7 +82,8 @@
                           :use-result-as-read t
                           :section :pre)
     (with-pgsql-transaction (:pgconn (source-db pgsql))
-      (let ((variant (pgconn-variant (source-db pgsql))))
+      (let ((variant   (pgconn-variant (source-db pgsql)))
+            (pgversion (pgconn-major-version (source-db pgsql))))
        (when (eq :pgdg variant)
          (list-all-sqltypes catalog
                             :including including
@@ -95,7 +96,8 @@
        (when create-indexes
          (list-all-indexes catalog
                            :including including
-                           :excluding excluding))
+                           :excluding excluding
+                           :pgversion pgversion))
 
        (when (and (eq :pgdg variant) foreign-keys)
          (list-all-fkeys catalog
diff --git a/src/utils/queries.lisp b/src/utils/queries.lisp
index b0364d1..45a3c61 100644
--- a/src/utils/queries.lisp
+++ b/src/utils/queries.lisp
@@ -66,3 +66,22 @@
     (recompute-fs-and-retry ()
       (setf *fs* (walk-sources-and-build-fs))
       (sql url))))
+
+(defun sql-url-for-variant (base filename &optional variant)
+  "Build a SQL URL for given VARIANT"
+  (flet ((sql-base-url (base filename)
+           (format nil "/~a/~a" base filename)))
+    (if variant
+        (let ((sql-variant-url
+               (format nil "/~a/~a/~a"
+                       base
+                       (string-downcase (typecase variant
+                                          (symbol (symbol-name variant))
+                                          (string variant)
+                                          (t      (princ-to-string variant))))
+                       filename)))
+          (if (gethash sql-variant-url *fs*)
+              sql-variant-url
+              (sql-base-url base filename)))
+
+        (sql-base-url base filename))))

From 6c804042490e978b1cc630395ff294be2a043914 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 9 Nov 2018 22:41:14 +0100
Subject: [PATCH 25/69] Implement support for Redshift "identity" columns.

At this stage we don't even parse the details of the Redshift identity such
as the seed and step values and consider them the same as a MySQL
auto_increment extra description field.

Fixes #860 (again).
---
 src/pgsql/pgsql-finalize-catalogs.lisp  |  6 +++++-
 src/sources/pgsql/pgsql-cast-rules.lisp | 15 ++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/pgsql/pgsql-finalize-catalogs.lisp b/src/pgsql/pgsql-finalize-catalogs.lisp
index 5a4eeef..5684100 100644
--- a/src/pgsql/pgsql-finalize-catalogs.lisp
+++ b/src/pgsql/pgsql-finalize-catalogs.lisp
@@ -15,6 +15,8 @@
 (in-package #:pgloader.pgsql)
 
 (defun finalize-catalogs (catalog variant)
+  "Finalize the target PostgreSQL catalogs, dumbing down datatypes when the
+   target actually is Redshift rather than core PostgreSQL."
   ;;
   ;; For Core PostgreSQL, we also want to find data types names that have
   ;; no Btree support and fetch alternatives. This allows for supporting
@@ -30,7 +32,9 @@
   ;;
   (adjust-data-types catalog variant))
 
-(defgeneric adjust-data-types (catalog variant))
+(defgeneric adjust-data-types (catalog variant)
+  (:documentation
+   "Adjust PostgreSQL data types depending on the variant we target."))
 
 ;;;
 ;;; Nothing needs to be done for PostgreSQL variant :pgdg, of course.
diff --git a/src/sources/pgsql/pgsql-cast-rules.lisp b/src/sources/pgsql/pgsql-cast-rules.lisp
index 6ac37ee..6c0690e 100644
--- a/src/sources/pgsql/pgsql-cast-rules.lisp
+++ b/src/sources/pgsql/pgsql-cast-rules.lisp
@@ -36,13 +36,16 @@
                pgloader.catalog::extra)
       field
     (let* ((ctype (pgsql-column-ctype field))
+           (extra (when (and (stringp (column-default field))
+                             (search "identity" (column-default field)))
+                    :auto-increment))
            (pgcol (apply-casting-rules nil
                                        pgloader.catalog::name
                                        pgloader.catalog::type-name
                                        ctype
                                        pgloader.catalog::default
                                        pgloader.catalog::nullable
-                                       pgloader.catalog::extra)))
+                                       extra)))
       ;; re-install our instruction not to transform default value: it comes
       ;; from PostgreSQL, and we trust it.
       (setf (column-transform-default pgcol)
@@ -55,10 +58,16 @@
                 ((and (stringp default) (string= "NULL" default))
                  :null)
 
-                ((and (stringp default)
-                      (or (string= "getdate()" default)))
+                ((and (stringp default) (string= "getdate()" default))
                  :current-timestamp)
 
+                ;; get rid of the identity default value, we already added
+                ;; an hint in the column-extra field.
+                ;;
+                ;; "identity"(347358, 0, ('1,1'::character varying)::text)
+                ((and (stringp default) (search "identity" default))
+                 :null)
+
                 (t (column-default pgcol))))
 
         ;; we usually trust defaults that come from PostgreSQL... but we

From 6eaad0621bc6661c314f21f5da158d27282dc02c Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 9 Nov 2018 22:42:31 +0100
Subject: [PATCH 26/69] Desultory code maintenance for MS SQL identity support.

The code expects the keyword :auto-increment rather than a string nowadays
in order to process an extra column bits of information as meaning that we
want to cast to a serial/bigserial datatype.
---
 src/sources/mssql/mssql-cast-rules.lisp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sources/mssql/mssql-cast-rules.lisp b/src/sources/mssql/mssql-cast-rules.lisp
index 471740b..dec7ee8 100644
--- a/src/sources/mssql/mssql-cast-rules.lisp
+++ b/src/sources/mssql/mssql-cast-rules.lisp
@@ -125,7 +125,7 @@
       field
     (declare (ignore schema))           ; FIXME
     (let* ((ctype (mssql-column-ctype field))
-           (extra (when (mssql-column-identity field) "auto_increment"))
+           (extra (when (mssql-column-identity field) :auto-increment))
            (pgcol
             (apply-casting-rules table-name name type ctype default nullable extra)))
       ;; the MS SQL driver smartly maps data to the proper CL type, but the

From 656bf850752ab06435870d2e68696ff12a285362 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sat, 10 Nov 2018 20:22:04 +0100
Subject: [PATCH 27/69] Review field to column projection code emitted.

The code emitted by pgloader to transform input fields into PostgreSQL
column values was using too many optimization declarations, some of them
that SBCL failed to follow through for lack of type marking in the generated
code.

As SBCL doesn't have enough information to be optimizing anyway, at least we
can make it so that we don't have a warning about it. The new code does that.

Fixes #803.
---
 src/sources/common/project-fields.lisp | 4 ++--
 test/allcols.load                      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/sources/common/project-fields.lisp b/src/sources/common/project-fields.lisp
index 0ab57ff..f82d46c 100644
--- a/src/sources/common/project-fields.lisp
+++ b/src/sources/common/project-fields.lisp
@@ -20,7 +20,6 @@
 		       nil
 		       col))
 		 (lambda (col)
-		   (declare (optimize speed))
 		   (if (string= null-as col) nil col))))
 
 	   (field-name-as-symbol (field-name-or-list)
@@ -120,7 +119,8 @@
 		    (destructuring-bind (&optional ,@args &rest extra) row
 		      (declare (ignorable ,@args) (ignore extra))
                       (let ,values
-                        (declare (ignorable ,@args))
+                        (declare (ignorable ,@args)
+                                 (type vector ,@args))
                         (vector ,@newrow)))))))))
       ;; allow for some debugging
       (if compile (compile nil projection) projection))))
diff --git a/test/allcols.load b/test/allcols.load
index e5c4e29..2598466 100644
--- a/test/allcols.load
+++ b/test/allcols.load
@@ -13,7 +13,7 @@
 
 LOAD CSV
      FROM inline (a, b, c)
-     INTO postgresql:///pgloader?allcols (a, b, c)
+     INTO postgresql:///pgloader?allcols (a, b, c text using (subseq c 0))
 
      WITH fields optionally enclosed by '"',
           fields escaped by double-quote,

From a6ef7a56a99d84d7279ebbdc24585f0aece8a3d5 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sat, 10 Nov 2018 21:01:30 +0100
Subject: [PATCH 28/69] Implement ipv6 hostname support in .pgpass rules.

An hostname could be written [::1] in .pgass, without having to escape the
colon characters, and with a proper enclosing in square brackets, as common
for ipv6 addresses.

Fixes #837.
---
 src/parsers/parse-pgpass.lisp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/parsers/parse-pgpass.lisp b/src/parsers/parse-pgpass.lisp
index 82efbee..5f62aba 100644
--- a/src/parsers/parse-pgpass.lisp
+++ b/src/parsers/parse-pgpass.lisp
@@ -14,8 +14,14 @@
 (defrule pgpass-escaped-char (and #\\ (or #\\ #\:))
   (:lambda (c) (second c)))
 
+(defrule pgpass-ipv6-hostname (and #\[
+                                   (+ (or (digit-char-p character) ":"))
+                                   #\])
+  (:lambda (ipv6) (text (second ipv6))))
+
 (defrule pgpass-entry (or "*"
-                          (+ (or pgpass-escaped-char
+                          (+ (or pgpass-ipv6-hostname
+                                 pgpass-escaped-char
                                  (pgpass-char-p character))))
   (:lambda (e) (text e)))
 

From 5ecf04acb910aba17cd5e2001ccb6c8faef37468 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 13 Nov 2018 21:35:48 +0100
Subject: [PATCH 29/69] Implement null if support as a WITH option.

This gives a default "null if" option to all the input columns at once, and
it's still possible to override the default per column.

In passing, fix project-fields declarations that SBCL now complains about
when they're not true, such as declaring a vector when we might have :null
or nil. As a result, remove the (declare (optimize speed)) in the generated
field processing code.
---
 src/parsers/command-csv.lisp           | 52 +++++++++++++++-----------
 src/sources/common/project-fields.lisp |  5 +--
 test/csv-null-if.load                  | 22 +++++++++++
 3 files changed, 55 insertions(+), 24 deletions(-)
 create mode 100644 test/csv-null-if.load

diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp
index c7e4e11..0df8dd3 100644
--- a/src/parsers/command-csv.lisp
+++ b/src/parsers/command-csv.lisp
@@ -134,7 +134,8 @@
                         option-fields-terminated-by
                         option-trim-unquoted-blanks
                         option-keep-unquoted-blanks
-                        option-csv-escape-mode))
+                        option-csv-escape-mode
+                        option-null-if))
 
 (defrule csv-options (and kw-with
                              (and csv-option (* (and comma csv-option))))
@@ -429,26 +430,35 @@
        (progn
          ,(sql-code-block pg-db-conn :pre before "before load")
 
-         (let ((on-error-stop             (getf ',options :on-error-stop))
-               (truncate                  (getf ',options :truncate))
-               (disable-triggers          (getf ',options :disable-triggers))
-               (drop-indexes              (getf ',options :drop-indexes))
-               (max-parallel-create-index (getf ',options :max-parallel-create-index))
-               (source
-                (make-instance 'copy-csv
-                               :target-db  ,pg-db-conn
-                               :source     source-db
-                               :target     (create-table ',target-table-name)
-                               :encoding   ,encoding
-                               :fields    ',fields
-                               :columns   ',columns
-                               ,@(remove-batch-control-option
-                                  options :extras '(:worker-count
-                                                    :concurrency
-                                                    :truncate
-                                                    :drop-indexes
-                                                    :disable-triggers
-                                                    :max-parallel-create-index)))))
+         (let* ((on-error-stop             (getf ',options :on-error-stop))
+                (truncate                  (getf ',options :truncate))
+                (disable-triggers          (getf ',options :disable-triggers))
+                (drop-indexes              (getf ',options :drop-indexes))
+                (max-parallel-create-index (getf ',options :max-parallel-create-index))
+                (fields
+                 ',(let ((null-as (getf options :null-as)))
+                     (if null-as
+                         (mapcar (lambda (field)
+                                   (if (member :null-as field) field
+                                       (append field (list :null-as null-as))))
+                                 fields)
+                         fields)))
+                (source
+                 (make-instance 'copy-csv
+                                :target-db  ,pg-db-conn
+                                :source     source-db
+                                :target     (create-table ',target-table-name)
+                                :encoding   ,encoding
+                                :fields    fields
+                                :columns   ',columns
+                                ,@(remove-batch-control-option
+                                   options :extras '(:null-as
+                                                     :worker-count
+                                                     :concurrency
+                                                     :truncate
+                                                     :drop-indexes
+                                                     :disable-triggers
+                                                     :max-parallel-create-index)))))
            (copy-database source
                           ,@ (when worker-count
                                (list :worker-count worker-count))
diff --git a/src/sources/common/project-fields.lisp b/src/sources/common/project-fields.lisp
index f82d46c..dc47197 100644
--- a/src/sources/common/project-fields.lisp
+++ b/src/sources/common/project-fields.lisp
@@ -115,12 +115,11 @@
                                           sexp))
                               (t      sexp)))))
 		 `(lambda (row)
-		    (declare (optimize speed) (type list row))
+		    (declare (type list row))
 		    (destructuring-bind (&optional ,@args &rest extra) row
 		      (declare (ignorable ,@args) (ignore extra))
                       (let ,values
-                        (declare (ignorable ,@args)
-                                 (type vector ,@args))
+                        (declare (ignorable ,@args))
                         (vector ,@newrow)))))))))
       ;; allow for some debugging
       (if compile (compile nil projection) projection))))
diff --git a/test/csv-null-if.load b/test/csv-null-if.load
new file mode 100644
index 0000000..c35d24b
--- /dev/null
+++ b/test/csv-null-if.load
@@ -0,0 +1,22 @@
+LOAD CSV
+     FROM INLINE (id, number, data)
+     INTO postgresql:///pgloader?nullif
+
+     BEFORE LOAD DO
+      $$ drop table if exists nullif; $$,
+      $$ CREATE TABLE nullif
+         (
+            id     serial primary key,
+            number integer,
+            data   text
+         );
+      $$
+
+     WITH null if '\N',
+          fields terminated by ',',
+          fields enclosed by '"',
+          fields escaped by backslash-quote;
+
+
+"1",\N,"testing nulls"
+"2","2","another test"
\ No newline at end of file

From 16dda01f371f033e0df75d80127643605df7830f Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 15 Nov 2018 00:13:21 +0100
Subject: [PATCH 30/69] Deal with SSL verify error the wrong way.

This patch adds an option --no-ssl-cert-verification that allows bypassing
OpenSSL server certificate verification. It's hopefully a temporary measure
that we set up in order to make progress when confronted to:

  SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY

The real solution is of course to install the SSL certificates at a place
where pgloader will look for them, which defaults to
~/.postgresql/postgresql.crt at the moment. It's not clear what the story is
with the defaults from /etc/ssl, or how to make things happen in a better
way.

See #648, See #679, See #768, See #748, See #775.
---
 src/main.lisp             | 14 ++++++++++++-
 src/pgsql/connection.lisp | 43 +++++++++++++++++++++++++++++----------
 src/utils/threads.lisp    |  6 +++++-
 3 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/src/main.lisp b/src/main.lisp
index a317232..c8e43f7 100644
--- a/src/main.lisp
+++ b/src/main.lisp
@@ -51,6 +51,10 @@
     ("on-error-stop" :type boolean
                      :documentation "Refrain from handling errors properly.")
 
+    ("no-ssl-cert-verification"
+     :type boolean
+     :documentation "Instruct OpenSSL to bypass verifying certificates.")
+
     (("context" #\C) :type string :documentation "Command Context Variables")
 
     (("with") :type string :list t :optional t
@@ -197,6 +201,7 @@
 				client-min-messages log-min-messages summary
 				root-dir self-upgrade
                                 with set field cast type encoding before after
+                                no-ssl-cert-verification
                                 regress)
 	  options
 
@@ -249,11 +254,15 @@
                   (lisp-implementation-type)
                   (lisp-implementation-version)))
 
-	(when help
+	(when (or help)
           (usage argv))
 
 	(when (or help version) (uiop:quit +os-code-success+))
 
+        (when (null arguments)
+          (usage argv)
+          (uiop:quit +os-code-error-usage+))
+
 	(when list-encodings
 	  (show-encodings)
 	  (uiop:quit +os-code-success+))
@@ -316,6 +325,9 @@
                                  (uiop:native-namestring *log-filename*))
                     (log-message :log "Data errors in '~a'~%" *root-dir*)
 
+                    (when no-ssl-cert-verification
+                      (setf cl+ssl:*make-ssl-client-stream-verify-default* nil))
+
                     (cond
                       ((and regress (= 1 (length arguments)))
                        (process-regression-test (first arguments)))
diff --git a/src/pgsql/connection.lisp b/src/pgsql/connection.lisp
index 63af2af..9896559 100644
--- a/src/pgsql/connection.lisp
+++ b/src/pgsql/connection.lisp
@@ -118,7 +118,19 @@
                                          (uiop:native-namestring crt-file)))
          (pomo::*ssl-key-file*         (when (and (ssl-enable-p pgconn)
                                                   (probe-file key-file))
-                                         (uiop:native-namestring key-file))))
+                                         (uiop:native-namestring key-file)))
+         ;;
+         ;; It's ok to set :verify-mode to NONE here because
+         ;; cl+ssl:*make-ssl-client-stream-verify-default* defaults to
+         ;; :require and takes precedence.
+         ;;
+         ;; Only when --no-ssl-cert-verification is passed as a command line
+         ;; option do we set cl+ssl:*make-ssl-client-stream-verify-default*
+         ;; to NIL, then allowing the NONE behaviour set here.
+         ;;
+         (ssl-context
+          (CL+SSL:MAKE-CONTEXT :disabled-protocols nil
+                               :verify-mode CL+SSL:+SSL-VERIFY-NONE+)))
     (flet ((connect (pgconn username)
              (handler-case
                  ;; in some cases (client_min_messages set to debug5
@@ -128,20 +140,29 @@
                                  #'(lambda (w)
                                      (log-message :warning "~a" w)
                                      (muffle-warning))))
-                   (pomo:connect (db-name pgconn)
-                                 (or username (db-user pgconn))
-                                 (db-pass pgconn)
-                                 (let ((host (db-host pgconn)))
-                                   (if (and (consp host) (eq :unix (car host)))
-                                       :unix
-                                       host))
-                                 :port (db-port pgconn)
-                                 :use-ssl (or (pgconn-use-ssl pgconn) :no)))
+                   (CL+SSL:WITH-GLOBAL-CONTEXT (ssl-context :auto-free-p t)
+                    (pomo:connect (db-name pgconn)
+                                  (or username (db-user pgconn))
+                                  (db-pass pgconn)
+                                  (let ((host (db-host pgconn)))
+                                    (if (and (consp host) (eq :unix (car host)))
+                                        :unix
+                                        host))
+                                  :port (db-port pgconn)
+                                  :use-ssl (or (pgconn-use-ssl pgconn) :no))))
+
                ((or too-many-connections configuration-limit-exceeded) (e)
                  (log-message :error
                               "Failed to connect to ~a: ~a; will try again in ~fs"
                               pgconn e *retry-connect-delay*)
-                 (sleep *retry-connect-delay*)))))
+                (sleep *retry-connect-delay*))
+
+               (CL+SSL:SSL-ERROR-VERIFY (e)
+                 (log-message :error
+                              "Connecting to PostgreSQL ~a: ~a"
+                              (db-host pgconn) e)
+                 (log-message :log "You may try --no-ssl-cert-verification")
+                 (error e)))))
       (loop :while (null (conn-handle pgconn))
          :repeat *retry-connect-times*
          :do (setf (conn-handle pgconn) (connect pgconn username))))
diff --git a/src/utils/threads.lisp b/src/utils/threads.lisp
index a2776ce..2581579 100644
--- a/src/utils/threads.lisp
+++ b/src/utils/threads.lisp
@@ -28,6 +28,10 @@
                             ;; bindings updates for libs
                             ;; CFFI is used by the SQLite lib
                             (cffi:*default-foreign-encoding*
-                             . ,cffi:*default-foreign-encoding*))))
+                             . ,cffi:*default-foreign-encoding*)
+
+                            ;; CL+SSL can be picky about verifying certs
+                            (cl+ssl:*make-ssl-client-stream-verify-default*
+                             . ,cl+ssl:*make-ssl-client-stream-verify-default*))))
   "Wrapper around lparallel:make-kernel that sets our usual bindings."
   (lp:make-kernel worker-count :bindings bindings))

From e291c502ba97358e15d27fbd1635ba19c8f495f5 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 15 Nov 2018 23:59:51 +0100
Subject: [PATCH 31/69] Install a call to cl+ssl:reload at image startup time,
 again.

Testing shows that it's not just debian which needs it, it's always
necessary. Just re-add our tweak now.

See #866, see #816, see #807, #794.
---
 src/hooks.lisp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/hooks.lisp b/src/hooks.lisp
index f34d405..bb27ea2 100644
--- a/src/hooks.lisp
+++ b/src/hooks.lisp
@@ -30,10 +30,8 @@
     ;; handles some context and things around loading with CFFI.
     (cl+ssl:reload)))
 
-#|
 #+ccl  (push #'open-foreign-libs *lisp-startup-functions*)
 #+sbcl (push #'open-foreign-libs sb-ext:*init-hooks*)
-|#
 
 #+ccl  (push #'close-foreign-libs *save-exit-functions*)
 #+sbcl (push #'close-foreign-libs sb-ext:*save-hooks*)

From 8b1acbae877b1e5a2c5274ff8937d2ffaef8b922 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 16 Nov 2018 00:03:31 +0100
Subject: [PATCH 32/69] Make sure the image knows how to print circular data
 structures.

Our catalogs representation is designed to be circular, which helps
navigating the graph from anywhere when processing it. This means that we
need to have *print-circle* set to t in the pgloader image, otherwise we
might run into Control stack exhausted when trying to print out debug
information...

Fixes #865, #800, #810, #859, #824.
---
 src/hooks.lisp         | 7 +++++++
 src/utils/threads.lisp | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/hooks.lisp b/src/hooks.lisp
index bb27ea2..62c878b 100644
--- a/src/hooks.lisp
+++ b/src/hooks.lisp
@@ -12,6 +12,13 @@
 ;; So that we can #+pgloader-image some code away, see main.lisp
 (push :pgloader-image *features*)
 
+;;;
+;;; We need to support *print-circle* for the debug traces of the catalogs,
+;;; and while at it let's enforce *print-pretty* too.
+;;;
+(setf *print-circle* t *print-pretty* t)
+
+
 (in-package #:cl-user)
 
 (defun close-foreign-libs ()
diff --git a/src/utils/threads.lisp b/src/utils/threads.lisp
index 2581579..bfa6876 100644
--- a/src/utils/threads.lisp
+++ b/src/utils/threads.lisp
@@ -7,7 +7,9 @@
 
 (defun make-kernel (worker-count
 		    &key (bindings
-			  `((*monitoring-queue*   . ,*monitoring-queue*)
+			  `((*print-circle*       . ,*print-circle*)
+                            (*print-pretty*       . ,*print-pretty*)
+                            (*monitoring-queue*   . ,*monitoring-queue*)
                             (*copy-batch-rows*    . ,*copy-batch-rows*)
                             (*copy-batch-size*    . ,*copy-batch-size*)
                             (*rows-per-range*     . ,*rows-per-range*)

From 1fd0576ace1de169a6bdc01f017519e9ed796cde Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 16 Nov 2018 00:08:27 +0100
Subject: [PATCH 33/69] Fix Citus support related debug print instructions.

---
 src/utils/citus.lisp | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
index c1b73b8..65743af 100644
--- a/src/utils/citus.lisp
+++ b/src/utils/citus.lisp
@@ -76,14 +76,16 @@
       (setf (citus-distributed-rule-table rule)
             (citus-find-table catalog table)))))
 
-(defmethod print-object ((table citus-reference-rule) stream)
-  (print-unreadable-object (table stream :type t :identity t)
-    (with-slots (table) table
-      (format stream "distribute ~a as reference" (format-table-name table)))))
+(defmethod print-object ((rule citus-reference-rule) stream)
+  (print-unreadable-object (rule stream :type t :identity t)
+    (with-slots (table) rule
+      (format stream
+              "distribute ~a as reference"
+              (format-table-name table)))))
 
-(defmethod print-object ((table citus-distributed-rule) stream)
-  (print-unreadable-object (table stream :type t :identity t)
-    (with-slots (table using from) table
+(defmethod print-object ((rule citus-distributed-rule) stream)
+  (print-unreadable-object (rule stream :type t :identity t)
+    (with-slots (table using from) rule
       (format stream
               "distribute ~a :using ~a~@[ :from ~{~a~^, ~}~]"
               (format-table-name table)
@@ -231,9 +233,9 @@
 (defun add-column-to-pkey (table column-name)
   "Add COLUMN in the first position of the TABLE's primary key index."
   (let* ((index  (find-if #'index-primary (table-index-list table)))
-         (idxcol (find column-name (index-columns index) :test #'string=)))
-    (assert (not (null index)))
-    (unless idxcol
+         (idxcol (when index
+                   (find column-name (index-columns index) :test #'string=))))
+    (when (and index (null idxcol))
       ;; add a new column
       (push column-name (index-columns index))
       ;; now remove origin schema sql and condef, we need to redo them

From f07ac6126966b570c912fc326678d1acdb2f8763 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 18 Nov 2018 17:46:41 +0100
Subject: [PATCH 34/69] Fix default/serial handling of pgsql as a source.

In the recent patch that added support for Redshift "identity" columns, we
broke support for PostgreSQL sequences. Unbreak that.
---
 src/sources/pgsql/pgsql-cast-rules.lisp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/sources/pgsql/pgsql-cast-rules.lisp b/src/sources/pgsql/pgsql-cast-rules.lisp
index 6c0690e..ea1ef04 100644
--- a/src/sources/pgsql/pgsql-cast-rules.lisp
+++ b/src/sources/pgsql/pgsql-cast-rules.lisp
@@ -36,9 +36,10 @@
                pgloader.catalog::extra)
       field
     (let* ((ctype (pgsql-column-ctype field))
-           (extra (when (and (stringp (column-default field))
-                             (search "identity" (column-default field)))
-                    :auto-increment))
+           (extra (or pgloader.catalog::extra
+                      (when (and (stringp (column-default field))
+                                 (search "identity" (column-default field)))
+                        :auto-increment)))
            (pgcol (apply-casting-rules nil
                                        pgloader.catalog::name
                                        pgloader.catalog::type-name

From aa8ae159e2c5714bd1913d8dc381a74b460719a9 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 18 Nov 2018 18:21:51 +0100
Subject: [PATCH 35/69] Improve error handling when applying Citus distribution
 rules.

Make it so that we generate a proper error message to the user when failing
to figure out the PATH to the distribution key, rather than failing with an
internal error about The value NIL is not of type PGLOADER.CATALOG:TABLE.
---
 src/load/migrate-database.lisp | 13 ++++---
 src/package.lisp               |  3 +-
 src/utils/citus.lisp           | 65 +++++++++++++++++++++++-----------
 3 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 3037571..db7d096 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -347,10 +347,15 @@
 
     ;; apply catalog level transformations to support the database migration
     ;; that's CAST rules, index WHERE clause rewriting and ALTER commands
-    (process-catalog copy catalog
-                     :alter-table alter-table
-                     :alter-schema alter-schema
-                     :distribute distribute)
+    (handler-case
+        (process-catalog copy catalog
+                         :alter-table alter-table
+                         :alter-schema alter-schema
+                         :distribute distribute)
+
+      (citus-rule-is-missing-from-list (e)
+        (log-message :fatal "~a" e)
+        (return-from copy-database)))
 
     ;; if asked, first drop/create the tables on the PostgreSQL side
     (handler-case
diff --git a/src/package.lisp b/src/package.lisp
index 20820ab..307eb3d 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -298,7 +298,8 @@
         #:pgloader.monitor)
   (:export #:citus-distribute-schema
            #:citus-format-sql-select
-           #:citus-backfill-table-p))
+           #:citus-backfill-table-p
+           #:citus-rule-is-missing-from-list))
 
 (defpackage #:pgloader.utils
   (:use #:cl
diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
index 65743af..0bcf329 100644
--- a/src/utils/citus.lisp
+++ b/src/utils/citus.lisp
@@ -172,6 +172,18 @@
 ;;; itself to the table-citus-rule slot so that we later know to generate a
 ;;; proper SELECT query that includes the backfilling.
 ;;;
+(define-condition citus-rule-is-missing-from-list (error)
+  ((rule  :initarg :rule :accessor citus-rule))
+  (:report
+   (lambda (err stream)
+     (let ((*print-circle* nil))
+       (format stream
+               "Failed to add column ~s to table ~a for lack of a FROM clause in the distribute rule:~%    distribute ~a using ~a from ?"
+               (column-name (citus-distributed-rule-using (citus-rule err)))
+               (format-table-name (citus-distributed-rule-table (citus-rule err)))
+               (format-table-name (citus-distributed-rule-table (citus-rule err)))
+               (column-name (citus-distributed-rule-using (citus-rule err))))))))
+
 (defgeneric apply-citus-rule (rule)
   (:documentation "Apply a Citus distribution RULE to given TABLE."))
 
@@ -206,28 +218,41 @@
         ;; it up in the last entry of the FROM rule's list.
         (let* ((last-from-rule (car (last (citus-distributed-rule-from rule))))
                (column-definition
-                (find (column-name (citus-distributed-rule-using rule))
-                      (table-field-list last-from-rule)
-                      :test #'string=
-                      :key #'column-name))
+                (when last-from-rule
+                  (find (column-name (citus-distributed-rule-using rule))
+                        (table-field-list last-from-rule)
+                        :test #'string=
+                        :key #'column-name)))
                (new-column
-                (make-column :name (column-name column-definition)
-                             :type-name (column-type-name column-definition)
-                             :nullable (column-nullable column-definition)
-                             :transform (column-transform column-definition))))
-          ;;
-          ;; Here also we need to add the new column to the PKEY definition,
-          ;; in first position.
-          ;;
-          (add-column-to-pkey table (column-name new-column))
+                (when column-definition
+                  (make-column :name (column-name column-definition)
+                               :type-name (column-type-name column-definition)
+                               :nullable (column-nullable column-definition)
+                               :transform (column-transform column-definition)))))
 
-          ;;
-          ;; We need to backfill the distribution key in the data, which
-          ;; we're implementing with a JOIN when we SELECT from the source
-          ;; table. We add the new field here.
-          ;;
-          (push new-column (table-field-list table))
-          (push new-column (table-column-list table))))))
+          (if column-definition
+              (progn
+                ;;
+                ;; Here also we need to add the new column to the PKEY
+                ;; definition, in first position.
+                ;;
+                (add-column-to-pkey table (column-name new-column))
+
+                ;;
+                ;; We need to backfill the distribution key in the data,
+                ;; which we're implementing with a JOIN when we SELECT from
+                ;; the source table. We add the new field here.
+                ;;
+                (push new-column (table-field-list table))
+                (push new-column (table-column-list table)))
+
+              ;;
+              ;; We don't have any table-field-list in the citus rule,
+              ;; meaning that the distribute ... using ... clause is lacking
+              ;; the FROM part, and we need it.
+              ;;
+              (error
+               (make-condition 'citus-rule-is-missing-from-list :rule rule)))))))
 
 
 (defun add-column-to-pkey (table column-name)

From 3f2f10eef1f1899ec2b1d93cd9d77353b14cf6f0 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 19 Nov 2018 19:33:37 +0100
Subject: [PATCH 36/69] Finish implementation of CAST rules for PostgreSQL
 source databases.

Add a link to the table from the internal catalogs for columns so that we
can match table-source-name in cast rules when migrating from PostgreSQL.
---
 src/pgsql/pgsql-schema.lisp             | 3 ++-
 src/sources/pgsql/pgsql-cast-rules.lisp | 5 +++--
 src/utils/catalog.lisp                  | 2 +-
 test/citus/company.load                 | 2 ++
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index 0b98459..b2d7a27 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -185,7 +185,8 @@
      :do
      (let* ((schema    (maybe-add-schema catalog schema-name))
             (table     (maybe-add-table schema table-name :oid table-oid))
-            (field     (make-column :name name
+            (field     (make-column :table table
+                                    :name name
                                     :type-name type
                                     :type-mod typmod
                                     :nullable (not notnull)
diff --git a/src/sources/pgsql/pgsql-cast-rules.lisp b/src/sources/pgsql/pgsql-cast-rules.lisp
index ea1ef04..d7b003c 100644
--- a/src/sources/pgsql/pgsql-cast-rules.lisp
+++ b/src/sources/pgsql/pgsql-cast-rules.lisp
@@ -26,7 +26,8 @@
 (defmethod cast ((field column) &key &allow-other-keys)
   "Return the PostgreSQL type definition from the given PostgreSQL column
    definition"
-  (with-slots (pgloader.catalog::name
+  (with-slots (pgloader.catalog::table
+               pgloader.catalog::name
                pgloader.catalog::type-name
                pgloader.catalog::type-mod
                pgloader.catalog::nullable
@@ -40,7 +41,7 @@
                       (when (and (stringp (column-default field))
                                  (search "identity" (column-default field)))
                         :auto-increment)))
-           (pgcol (apply-casting-rules nil
+           (pgcol (apply-casting-rules (table-source-name pgloader.catalog::table)
                                        pgloader.catalog::name
                                        pgloader.catalog::type-name
                                        ctype
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index baca81a..8b0cd62 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -71,7 +71,7 @@
 ;;; produce, so that we know how to CREATE TABLEs in PostgreSQL whatever the
 ;;; source is.
 ;;;
-(defstruct column name type-name type-mod nullable default comment
+(defstruct column table name type-name type-mod nullable default comment
            transform extra (transform-default t))
 
 ;;;
diff --git a/test/citus/company.load b/test/citus/company.load
index ef4af21..c2f7ad0 100644
--- a/test/citus/company.load
+++ b/test/citus/company.load
@@ -4,6 +4,8 @@ load database
 
    with include drop, reset no sequences
 
+   cast column impressions.seen_at to "timestamp with time zone"
+
    distribute companies using id
    -- distribute campaigns using company_id
    -- distribute ads using company_id from campaigns

From 1c18b41cd72300abf12d67abfd411f9edf2bcad9 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 20 Nov 2018 22:59:43 +0100
Subject: [PATCH 37/69] Implement a new way of building pgloader: make save.

This time we directly call into the save-lisp-and-die feature of the
implementation. As pgloader only supports SBCL and CCL at the time being,
doing things without an abstraction layer is easy enough.

This needs more testing and a special version for the bundle case too. One
step at a time, etc.
---
 Makefile      |  3 +++
 src/save.lisp | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 src/save.lisp

diff --git a/Makefile b/Makefile
index 06c381a..5708b8d 100644
--- a/Makefile
+++ b/Makefile
@@ -164,6 +164,9 @@ pgloader-standalone:
 test: $(PGLOADER)
 	$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
 
+save: ./src/save.lisp $(LISP_SRC)
+	sbcl --no-userinit --load ./src/save.lisp
+
 clean-bundle:
 	rm -rf $(BUNDLEDIR)
 	rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
diff --git a/src/save.lisp b/src/save.lisp
new file mode 100644
index 0000000..3da0967
--- /dev/null
+++ b/src/save.lisp
@@ -0,0 +1,70 @@
+;;;
+;;; Create a build/bin/pgloader executable from the source code, using
+;;; Quicklisp to load pgloader and its dependencies.
+;;;
+
+(in-package #:cl-user)
+
+;; ccl provides an implementation of getenv already.
+#+sbcl
+(defun getenv (name &optional default)
+  "Return the current value for the environment variable NAME, or default
+   when unset."
+  (or (sb-ext:posix-getenv name) default))
+
+(require :asdf)                         ; should work in SBCL and CCL
+
+(defvar *quicklisp.lisp* "http://beta.quicklisp.org/quicklisp.lisp")
+
+(let* ((cwd        (uiop:getcwd))
+       (build-dir  (uiop:merge-pathnames* "build/" cwd))
+       (ql.lisp    (uiop:merge-pathnames* "quicklisp.lisp" build-dir))
+       (qldir      (uiop:merge-pathnames* "quicklisp/" build-dir))
+       (qlsetup    (uiop:merge-pathnames* "setup.lisp" qldir)))
+  ;;
+  ;; We might have to install Quicklisp in build/quicklisp
+  ;;
+  (unless (probe-file qlsetup)
+    (format t "File ~a is not found, installing Quicklisp from ~a~%"
+            qlsetup *quicklisp.lisp*)
+    (uiop:run-program (format nil "curl -o ~a ~a" ql.lisp *quicklisp.lisp*))
+    (load ql.lisp)
+    (let* ((quickstart (find-package "QUICKLISP-QUICKSTART"))
+           (ql-install (find-symbol "INSTALL" quickstart)))
+      (funcall ql-install :path qldir :proxy (getenv "http_proxy"))))
+
+  ;;
+  ;; Now that we have Quicklisp, load it and push our copy of pgloader in
+  ;; ql:*local-project-directories* where Quicklisp will find it.
+  ;;
+  (format t "Loading file ~a~%" qlsetup)
+  (load qlsetup)
+
+  (let* ((ql        (find-package "QL"))
+         (lpd       (find-symbol "*LOCAL-PROJECT-DIRECTORIES*" ql))
+         (quickload (find-symbol "QUICKLOAD" ql)))
+    (push cwd (symbol-value lpd))
+
+    ;;
+    ;; And finally load pgloader and its image-based hooks
+    ;;
+    (format t "Loading system pgloader~%")
+    (funcall quickload :pgloader)
+    (load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))))
+
+(defun pgloader-image-main ()
+  (let ((argv #+sbcl sb-ext:*posix-argv*
+              #+ccl ccl:*command-line-argument-list*))
+    (pgloader::main argv)))
+
+(let ((image-filename "/Users/dim/dev/pgloader/build/bin/pgloader"))
+  #+ccl
+  (ccl:save-application image-filename
+                        :toplevel-function #'cl-user::pgloader-image-main
+                        :prepend-kernel t)
+  #+sbcl
+  (sb-ext:save-lisp-and-die image-filename
+                            :toplevel #'cl-user::pgloader-image-main
+                            :executable t
+                            :save-runtime-options t
+                            :compression t))

From 743769d750d933eef70f4f148a3b7337630f4dc6 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 21 Nov 2018 10:38:19 +0100
Subject: [PATCH 38/69] Improve handling of errors when fetching the source
 catalogs.

We might have MS SQL failures at this stage, or even Redshift or other
PostgreSQL variants failing to execute our catalog queries. Handle
conditions by cleanly logging them and returning from copy-database without
doing anything. That's the best we can do here.

Fixes #605, fixes #757.
---
 src/load/migrate-database.lisp | 39 ++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index db7d096..2dee25b 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -319,19 +319,32 @@
 
          (copy-kernel  (make-kernel worker-count))
          (copy-channel (let ((lp:*kernel* copy-kernel)) (lp:make-channel)))
-         (catalog      (fetch-metadata
-                        copy
-                        (make-catalog
-                         :name (typecase (source-db copy)
-                                 (db-connection (db-name (source-db copy)))
-                                 (fd-connection (pathname-name
-                                                 (fd-path (source-db copy))))))
-                        :materialize-views materialize-views
-                        :create-indexes create-indexes
-                        :foreign-keys foreign-keys
-                        :only-tables only-tables
-                        :including including
-                        :excluding excluding))
+         (catalog      (handler-case
+                           (fetch-metadata
+                            copy
+                            (make-catalog
+                             :name (typecase (source-db copy)
+                                     (db-connection
+                                      (db-name (source-db copy)))
+                                     (fd-connection
+                                      (pathname-name
+                                       (fd-path (source-db copy))))))
+                            :materialize-views materialize-views
+                            :create-indexes create-indexes
+                            :foreign-keys foreign-keys
+                            :only-tables only-tables
+                            :including including
+                            :excluding excluding)
+                         (mssql::mssql-error (e)
+                           (log-message :error "MSSQL ERROR: ~a" e)
+                           (log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
+                           (return-from copy-database))
+                         (condition (e)
+                           (log-message :error
+                                        "ERROR ~a: ~a"
+                                        (conn-type (source-db copy))
+                                        e)
+                           (return-from copy-database))))
          pkeys
          (writers-count (make-hash-table :size (count-tables catalog)))
          (max-indexes   (when create-indexes

From 4ab26e5387066199bd7ab7fe4f7ea18dbdccbc84 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 21 Nov 2018 17:31:11 +0100
Subject: [PATCH 39/69] Handle other conditions in process-catalogs.

It might be that some random condition is signaled during process-catalogs,
causing the errors reported so far and that I can't reproduce. Let's add
some handler-case protection to have more clues about what could be
happening.

See #865, #800, #810, #859, #824.
---
 src/load/migrate-database.lisp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 2dee25b..689894d 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -341,7 +341,7 @@
                            (return-from copy-database))
                          (condition (e)
                            (log-message :error
-                                        "ERROR ~a: ~a"
+                                        "~a: ~a"
                                         (conn-type (source-db copy))
                                         e)
                            (return-from copy-database))))
@@ -368,6 +368,10 @@
 
       (citus-rule-is-missing-from-list (e)
         (log-message :fatal "~a" e)
+        (return-from copy-database))
+
+      (condition (e)
+        (log-message :fatal "Failed to process catalogs: ~a" e)
         (return-from copy-database)))
 
     ;; if asked, first drop/create the tables on the PostgreSQL side

From 18bcf109037f434859d9f2dc80bb44f9b0a8eeab Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 21 Nov 2018 18:17:34 +0100
Subject: [PATCH 40/69] Blind fix for a strange use-case.

A user reported a case where pgloader fails to find the table an index has
been created on in pgloader catalogs. That's a weird case. For now, just
issue a warning about the situation and skip the index.
---
 src/sources/mssql/mssql-schema.lisp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp
index 97d3d36..89be6ff 100644
--- a/src/sources/mssql/mssql-schema.lisp
+++ b/src/sources/mssql/mssql-schema.lisp
@@ -144,8 +144,14 @@
                                     :columns nil
                                     :filter filter))
             (index
-             (maybe-add-index table index-name pg-index :key #'index-name)))
-       (add-column index colname))
+             (when table
+               (maybe-add-index table index-name pg-index :key #'index-name))))
+       (unless table
+         (log-message :warning
+                      "Failed to find table ~s in schema ~s for index ~s, skipping the index"
+                      table-name schema-name index-name))
+       (when index
+         (add-column index colname)))
      :finally (return catalog)))
 
 (defun list-all-fkeys (catalog &key including excluding)

From 6e325f67e0d3dfacae36c1c81b9478ec6eaebb98 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 21 Nov 2018 21:44:56 +0100
Subject: [PATCH 41/69] Implement the save.lisp idea for the bundle.

This should make it easier to build pgloader with CCL rather than SBCL, all
from the bundle distribution, and also easier to support windows.

In passing, add a new file in the bundle distribution: version.sexp should
contain a CL string containing the pgloader version string.
---
 Makefile         | 10 +++++++---
 bundle/Makefile  |  3 +++
 bundle/save.lisp | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 src/save.lisp    |  8 ++++++--
 4 files changed, 63 insertions(+), 5 deletions(-)
 create mode 100644 bundle/save.lisp

diff --git a/Makefile b/Makefile
index 5708b8d..83c523a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # pgloader build tool
 APP_NAME   = pgloader
-VERSION    = 3.5.2
+VERSION    = 3.6.0
 
 # use either sbcl or ccl
 CL	   = sbcl
@@ -24,7 +24,7 @@ QLDIR      = $(BUILDDIR)/quicklisp
 MANIFEST   = $(BUILDDIR)/manifest.ql
 LATEST     = $(BUILDDIR)/pgloader-latest.tgz
 
-BUNDLEDIST = 2018-04-30
+BUNDLEDIST = 2018-10-18
 BUNDLENAME = pgloader-bundle-$(VERSION)
 BUNDLEDIR  = $(BUILDDIR)/bundle/$(BUNDLENAME)
 BUNDLE     = $(BUILDDIR)/$(BUNDLENAME).tgz
@@ -182,8 +182,12 @@ $(BUNDLEDIR):
              --eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \
              --load bundle/ql.lisp
 
-$(BUNDLE): $(BUNDLEDIR)
+$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR)
+	echo "\"$(VERSION)\"" > $@
+
+$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp
 	cp bundle/README.md $(BUNDLEDIR)
+	cp bundle/save.lisp $(BUNDLEDIR)
 	sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile
 	git archive --format=tar --prefix=pgloader-$(VERSION)/ master \
 	     | tar -C $(BUNDLEDIR)/local-projects/ -xf -
diff --git a/bundle/Makefile b/bundle/Makefile
index 246438f..9102bd4 100644
--- a/bundle/Makefile
+++ b/bundle/Makefile
@@ -61,4 +61,7 @@ $(PGLOADER): $(BUILDAPP)
 test: $(PGLOADER)
 	$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress
 
+save:
+	sbcl --no-userinit --load ./save.lisp
+
 check: test ;
diff --git a/bundle/save.lisp b/bundle/save.lisp
new file mode 100644
index 0000000..d955b6c
--- /dev/null
+++ b/bundle/save.lisp
@@ -0,0 +1,47 @@
+;;;
+;;; Create a build/bin/pgloader executable from the source code, using
+;;; Quicklisp to load pgloader and its dependencies.
+;;;
+
+(in-package #:cl-user)
+
+(require :asdf)                         ; should work in SBCL and CCL
+
+(let* ((cwd             (uiop:getcwd))
+       (bundle.lisp     (uiop:merge-pathnames* "bundle.lisp" cwd))
+       (version-file    (uiop:merge-pathnames* "version.sexp" cwd))
+       (version-string  (uiop:read-file-form version-file))
+       (asdf:*central-registry* (list cwd)))
+
+  (format t "Loading bundle.lisp~%")
+  (load bundle.lisp)
+
+  (format t "Loading system pgloader ~a~%" version-string)
+  (asdf:load-system :pgloader :verbose nil)
+  (load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))
+
+  (let* ((pgl            (find-package "PGLOADER"))
+         (version-symbol (find-symbol "*VERSION-STRING*" pgl)))
+    (setf (symbol-value version-symbol) version-string)))
+
+(defun pgloader-image-main ()
+  (let ((argv #+sbcl sb-ext:*posix-argv*
+              #+ccl ccl:*command-line-argument-list*))
+    (pgloader::main argv)))
+
+(let* ((cwd          (uiop:getcwd))
+       (bin-dir      (uiop:merge-pathnames* "bin/" cwd))
+       (bin-filename (uiop:merge-pathnames* "pgloader" bin-dir)))
+
+  (ensure-directories-exist bin-dir)
+
+  #+ccl
+  (ccl:save-application bin-filename
+                        :toplevel-function #'cl-user::pgloader-image-main
+                        :prepend-kernel t)
+  #+sbcl
+  (sb-ext:save-lisp-and-die bin-filename
+                            :toplevel #'cl-user::pgloader-image-main
+                            :executable t
+                            :save-runtime-options t
+                            :compression t))
diff --git a/src/save.lisp b/src/save.lisp
index 3da0967..43b0de2 100644
--- a/src/save.lisp
+++ b/src/save.lisp
@@ -27,7 +27,9 @@
   (unless (probe-file qlsetup)
     (format t "File ~a is not found, installing Quicklisp from ~a~%"
             qlsetup *quicklisp.lisp*)
-    (uiop:run-program (format nil "curl -o ~a ~a" ql.lisp *quicklisp.lisp*))
+    (let ((command (format nil "curl -o ~a ~a" ql.lisp *quicklisp.lisp*)))
+      (format t "Running command: ~a~%" command)
+      (uiop:run-program command))
     (load ql.lisp)
     (let* ((quickstart (find-package "QUICKLISP-QUICKSTART"))
            (ql-install (find-symbol "INSTALL" quickstart)))
@@ -57,7 +59,9 @@
               #+ccl ccl:*command-line-argument-list*))
     (pgloader::main argv)))
 
-(let ((image-filename "/Users/dim/dev/pgloader/build/bin/pgloader"))
+(let* ((cwd            (uiop:getcwd))
+       (build-dir      (uiop:merge-pathnames* "build/bin/" cwd))
+       (image-filename (uiop:merge-pathnames* "pgloader" build-dir)))
   #+ccl
   (ccl:save-application image-filename
                         :toplevel-function #'cl-user::pgloader-image-main

From 801d8a6e0939579b4a00de5011712af4d5738cd0 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 23 Nov 2018 10:43:58 +0100
Subject: [PATCH 42/69] Add support for MS SQL time data type.

As for the other datetime types we have to use CONVERT at the SQL level in
order to get a format that PostgreSQL understands. This time the magic
number for it is 114.
---
 src/monkey/mssql.lisp               | 2 +-
 src/sources/mssql/mssql-schema.lisp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/monkey/mssql.lisp b/src/monkey/mssql.lisp
index ce2abf6..5790b17 100644
--- a/src/monkey/mssql.lisp
+++ b/src/monkey/mssql.lisp
@@ -95,7 +95,7 @@
              (:syb-int8 (mem-ref data :int8))
              (:syb-real (mem-ref data :float))
              (:syb-flt8 (mem-ref data :double))
-             ((:syb-datetime :syb-datetime4 :syb-msdate)
+             ((:syb-datetime :syb-datetime4 :syb-msdate :syb-mstime)
               (with-foreign-pointer (%buf +numeric-buf-sz+)
                 (let ((count
                        (%dbconvert %dbproc
diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp
index 89be6ff..c743647 100644
--- a/src/sources/mssql/mssql-schema.lisp
+++ b/src/sources/mssql/mssql-schema.lisp
@@ -201,6 +201,7 @@
 
    Mostly we just use the name, and make try to avoid parsing dates."
   (case (intern (string-upcase type) "KEYWORD")
+    (:time           (format nil "convert(varchar, [~a], 114)" name))
     (:datetime       (format nil "convert(varchar, [~a], 126)" name))
     (:smalldatetime  (format nil "convert(varchar, [~a], 126)" name))
     (:date           (format nil "convert(varchar, [~a], 126)" name))

From ab2cadff24f58c933b2c6afd29604c5e938eb8c7 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 30 Nov 2018 15:38:31 +0100
Subject: [PATCH 43/69] Simplify the regular expresion parsing the PostgreSQL
 version string.

The debian/Ubuntu packaging would defeat the quite simple regexp parsing
PostgreSQL version string that we have in pgloader. To make it more robust,
make it more open to unforeseen strings.

See #800, see #810.
---
 src/pgsql/connection.lisp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pgsql/connection.lisp b/src/pgsql/connection.lisp
index 9896559..404b451 100644
--- a/src/pgsql/connection.lisp
+++ b/src/pgsql/connection.lisp
@@ -410,10 +410,11 @@
 ;;;
 ;;;  PostgreSQL 8.0.2 on i686-pc-linux-gnu, compiled by GCC gcc (GCC) 3.4.2 20041017 (Red Hat 3.4.2-6.fc3), Redshift 1.0.2058
 ;;;  PostgreSQL 10.1 on x86_64-apple-darwin14.5.0, compiled by Apple LLVM version 7.0.0 (clang-700.1.76), 64-bit
+;;;  PostgreSQL 10.6 (Ubuntu 10.6-1.pgdg14.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4, 64-bit
 (defun parse-postgresql-version-string (version-string)
   "Parse PostgreSQL select version() output."
   (cl-ppcre:register-groups-bind (full-version maybe-variant)
-      ("PostgreSQL ([0-9.]+) on .*, [^,]+, (.*)" version-string)
+      ("PostgreSQL ([0-9.]+) [^,]+, [^,]+, (.*)" version-string)
     (let* ((version-dots  (split-sequence:split-sequence #\. full-version))
            (major-version (if (= 3 (length version-dots))
                               (format nil "~a.~a"

From a939d20dff19aa3fbfb0ea3d4360f43dbd6537ae Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sat, 1 Dec 2018 21:27:26 +0100
Subject: [PATCH 44/69] Unquote names when searching for an index column name
 in its table.

If the source database is using a keyword (such as "order") as a column
name, then pgloader is going to quote this column name in its internal
catalogs. In that case, unquote the column in the pgloader catalogs when
matching it against the unquoted column name we have in the index
definition.

Fixes #872.
---
 src/pgsql/pgsql-ddl.lisp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp
index 662bc95..008fe7d 100644
--- a/src/pgsql/pgsql-ddl.lisp
+++ b/src/pgsql/pgsql-ddl.lisp
@@ -266,7 +266,9 @@
                                  :collect (column-type-name
                                            (find idx-col tbl-cols
                                                  :test #'string-equal
-                                                 :key #'column-name))))
+                                                 :key (lambda (col)
+                                                        (ensure-unquoted
+                                                         (column-name col)))))))
                   (nobtree (catalog-types-without-btree
                             (schema-catalog (table-schema (index-table index))))))
              (let* ((idx-type (first idx-types))

From af2995b91804aa52e90f3845b8a46cd46ac66504 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 2 Dec 2018 00:17:26 +0100
Subject: [PATCH 45/69] Apply quoting rules to SQLite index column names.

The previous fix was wrong for missing the point: rather than unquote column
names in the table definition when matching the column names in the index
definition, we should in the first place have quoted the index column names
when needed.

Fixes #872 for real this time.
---
 src/pgsql/pgsql-ddl.lisp              | 4 +---
 src/sources/sqlite/sqlite-schema.lisp | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp
index 008fe7d..662bc95 100644
--- a/src/pgsql/pgsql-ddl.lisp
+++ b/src/pgsql/pgsql-ddl.lisp
@@ -266,9 +266,7 @@
                                  :collect (column-type-name
                                            (find idx-col tbl-cols
                                                  :test #'string-equal
-                                                 :key (lambda (col)
-                                                        (ensure-unquoted
-                                                         (column-name col)))))))
+                                                 :key #'column-name))))
                   (nobtree (catalog-types-without-btree
                             (schema-catalog (table-schema (index-table index))))))
              (let* ((idx-type (first idx-types))
diff --git a/src/sources/sqlite/sqlite-schema.lisp b/src/sources/sqlite/sqlite-schema.lisp
index 7174fea..17f05ba 100644
--- a/src/sources/sqlite/sqlite-schema.lisp
+++ b/src/sources/sqlite/sqlite-schema.lisp
@@ -150,7 +150,7 @@
   "Return the list of columns in INDEX-NAME."
   (let ((sql (format nil (sql "/sqlite/list-index-cols.sql") index-name)))
     (loop :for (index-pos table-pos col-name) :in (sqlite:execute-to-list db sql)
-       :collect col-name)))
+       :collect (apply-identifier-case col-name))))
 
 (defun list-indexes (table &optional (db *sqlite-db*))
   "Return the list of indexes attached to TABLE."

From 56d24de67a3a31a92f514ff1db5eb686ec2d0a63 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 11 Dec 2018 14:25:08 +0900
Subject: [PATCH 46/69] Update documentation with new features.

We have a lot of new features to document. This is a first patch about that,
some more work is to be done. That said, it's better than nothing already.
---
 docs/index.rst                     |   4 +
 docs/intro.rst                     |   8 +
 docs/pgloader.rst                  |  28 +++
 docs/ref/mysql.rst                 |  15 +-
 docs/ref/pgsql-citus-target.rst    |  77 ++++++
 docs/ref/pgsql-redshift-source.rst |  12 +
 docs/ref/pgsql-redshift-target.rst |  10 +
 docs/ref/pgsql.rst                 | 371 +++++++++++++++++++++++++++++
 8 files changed, 521 insertions(+), 4 deletions(-)
 create mode 100644 docs/ref/pgsql-citus-target.rst
 create mode 100644 docs/ref/pgsql-redshift-source.rst
 create mode 100644 docs/ref/pgsql-redshift-target.rst
 create mode 100644 docs/ref/pgsql.rst

diff --git a/docs/index.rst b/docs/index.rst
index d69915e..3fb2f9a 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -22,6 +22,10 @@ Welcome to pgloader's documentation!
    ref/mysql
    ref/sqlite
    ref/mssql
+   ref/pgsql
+   ref/pgsql-citus-target
+   ref/pgsql-redshift-source
+   ref/pgsql-redshift-target
    ref/transforms
    bugreport
 
diff --git a/docs/intro.rst b/docs/intro.rst
index 0dc75e2..2a098d9 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -17,6 +17,14 @@ pgloader knows how to read data from different kind of sources:
     * SQLite
     * MySQL
     * MS SQL Server
+    * PostgreSQL
+    * Redshift
+
+pgloader knows how to target different products using the PostgresQL Protocol:
+
+  * PostgreSQL
+  * `Citus <https://www.citusdata.com>`_
+  * Redshift
 
 The level of automation provided by pgloader depends on the data source
 type. In the case of CSV and Fixed Format files, a full description of the
diff --git a/docs/pgloader.rst b/docs/pgloader.rst
index 4a8cade..00fa186 100644
--- a/docs/pgloader.rst
+++ b/docs/pgloader.rst
@@ -154,6 +154,18 @@ Those options are meant to tweak `pgloader` behavior when loading data.
     machine code) another version of itself, usually a newer one like a very
     recent git checkout.
 
+  * `--no-ssl-cert-verification`
+
+    Uses the OpenSSL option to accept a locally issued server-side
+    certificate, avoiding the following error message::
+
+      SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY
+
+    The right way to fix the SSL issue is to use a trusted certificate, of
+    course. Sometimes though it's useful to make progress with the pgloader
+    setup while the certificate chain of trust is being fixed, maybe by
+    another team. That's when this option is useful.
+
 Command Line Only Operations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -552,6 +564,22 @@ queries from a SQL file. Implements support for PostgreSQL dollar-quoting
 and the `\i` and `\ir` include facilities as in `psql` batch mode (where
 they are the same thing).
 
+AFTER CREATE SCHEMA DO
+^^^^^^^^^^^^^^^^^^^^^^
+
+Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
+section are executed once the schema has been craeted by pgloader, and
+before the data is loaded. It's the right time to ALTER TABLE or do some
+custom implementation on-top of what pgloader does, like maybe partitioning.
+
+AFTER CREATE SCHEMA EXECUTE
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read
+the SQL queries from a SQL file. Implements support for PostgreSQL
+dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch
+mode (where they are the same thing).
+
 Connection String
 ^^^^^^^^^^^^^^^^^
 
diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst
index dec33a9..a55f5d7 100644
--- a/docs/ref/mysql.rst
+++ b/docs/ref/mysql.rst
@@ -1,10 +1,9 @@
 Migrating a MySQL Database to PostgreSQL
 ========================================
 
-This command instructs pgloader to load data from a database connection. The
-only supported database source is currently *MySQL*, and pgloader supports
-dynamically converting the schema of the source database and the indexes
-building.
+This command instructs pgloader to load data from a database connection.
+pgloader supports dynamically converting the schema of the source database
+and the indexes building.
 
 A default set of casting rules are provided and might be overloaded and
 appended to by the command.
@@ -609,6 +608,14 @@ Date::
     to timestamptz drop default
 	using zero-dates-to-null
 
+  type datetime with extra on update current timestamp when not null
+    to timestamptz drop not null drop default
+       using zero-dates-to-null
+
+  type datetime with extra on update current timestamp
+    to timestamptz drop default
+       using zero-dates-to-null
+
   type timestamp when default "0000-00-00 00:00:00" and not null
     to timestamptz drop not null drop default
 	using zero-dates-to-null
diff --git a/docs/ref/pgsql-citus-target.rst b/docs/ref/pgsql-citus-target.rst
new file mode 100644
index 0000000..257e081
--- /dev/null
+++ b/docs/ref/pgsql-citus-target.rst
@@ -0,0 +1,77 @@
+Migrating a PostgreSQL Database to Citus
+========================================
+
+This command instructs pgloader to load data from a database connection.
+Automatic discovery of the schema is supported, including build of the
+indexes, primary and foreign keys constraints. A default set of casting
+rules are provided and might be overloaded and appended to by the command.
+
+Automatic distribution column backfilling is supported, either from commands
+that specify what is the distribution column in every table, or only in the
+main table, then relying on foreign key constraints to discover the other
+distribution keys.
+
+Here's a short example of migrating a database from a PostgreSQL server to
+another:
+
+::
+
+   load database
+   from pgsql:///hackathon
+   into pgsql://localhost:9700/dim
+
+   with include drop, reset no sequences
+
+   cast column impressions.seen_at to "timestamp with time zone"
+
+   distribute companies using id
+   -- distribute campaigns using company_id
+   -- distribute ads using company_id from campaigns
+   -- distribute clicks using company_id from ads, campaigns
+   -- distribute impressions using company_id from ads, campaigns
+   ;
+
+Everything works exactly the same way as when doing a PostgreSQL to
+PostgreSQL migration, with the added fonctionality of this new `distribute`
+command.
+
+Distribute Command
+^^^^^^^^^^^^^^^^^^
+
+The distribute command syntax is as following::
+
+  distribute <table name> using <column name>
+  distribute <table name> using <column name> from <table> [, <table>, ...]
+  distribute <table name> as reference table
+
+When using the distribute command, the following steps are added to pgloader
+operations when migrating the schema:
+
+  - if the distribution column does not exist in the table, it is added as
+    the first column of the table
+
+  - if the distribution column does not exists in the primary key of the
+    table, it is added as the first column of the primary of the table
+
+  - all the foreign keys that point to the table are added the distribution
+    key automatically too, including the source tables of the foreign key
+    constraints
+  
+  - once the schema has been created on the target database, pgloader then
+    issues Citus specific command `create_reference_table()
+    <http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-reference-table>`_
+    and `create_distributed_table()
+    <http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-distributed-table>`_
+    to make the tables distributed
+
+Those operations are done in the schema section of pgloader, before the data
+is loaded. When the data is loaded, the newly added columns need to be
+backfilled from referenced data. pgloader knows how to do that by generating
+a query like the following and importing the result set of such a query
+rather than the raw data from the source table.
+
+Citus Migration: Limitations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The way pgloader implements *reset sequence* does not work with Citus at
+this point, so sequences need to be taken care of separately at this point.
diff --git a/docs/ref/pgsql-redshift-source.rst b/docs/ref/pgsql-redshift-source.rst
new file mode 100644
index 0000000..b69b6d9
--- /dev/null
+++ b/docs/ref/pgsql-redshift-source.rst
@@ -0,0 +1,12 @@
+Migrating a Redhift Database to PostgreSQL
+==========================================
+
+This command instructs pgloader to load data from a database connection.
+Automatic discovery of the schema is supported, including build of the
+indexes, primary and foreign keys constraints. A default set of casting
+rules are provided and might be overloaded and appended to by the command.
+
+The command and behavior are the same as when migration from a PostgreSQL
+database source. pgloader automatically discovers that it's talking to a
+Redshift database by parsing the output of the `SELECT version()` SQL query.
+
diff --git a/docs/ref/pgsql-redshift-target.rst b/docs/ref/pgsql-redshift-target.rst
new file mode 100644
index 0000000..50cc356
--- /dev/null
+++ b/docs/ref/pgsql-redshift-target.rst
@@ -0,0 +1,10 @@
+Migrating a PostgreSQL Database to Redshift
+===========================================
+
+This command instructs pgloader to load data from a database connection.
+Automatic discovery of the schema is supported, including build of the
+indexes, primary and foreign keys constraints. A default set of casting
+rules are provided and might be overloaded and appended to by the command.
+
+
+TODO: add details about S3 credentials and bucket configuration.
diff --git a/docs/ref/pgsql.rst b/docs/ref/pgsql.rst
new file mode 100644
index 0000000..d233ffa
--- /dev/null
+++ b/docs/ref/pgsql.rst
@@ -0,0 +1,371 @@
+Migrating a PostgreSQL Database to PostgreSQL
+=============================================
+
+This command instructs pgloader to load data from a database connection.
+Automatic discovery of the schema is supported, including build of the
+indexes, primary and foreign keys constraints. A default set of casting
+rules are provided and might be overloaded and appended to by the command.
+
+Here's a short example of migrating a database from a PostgreSQL server to
+another:
+
+::
+
+   load database
+     from pgsql://localhost/pgloader
+     into pgsql://localhost/copy
+  
+   including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
+   including only table names matching ~/geolocations/ in schema 'public'
+   ;
+
+PostgreSQL Database Source Specification: FROM
+----------------------------------------------
+
+Must be a connection URL pointing to a PostgreSQL database.
+
+See the `SOURCE CONNECTION STRING` section above for details on how to write
+the connection string. 
+
+::
+
+    pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
+
+
+PostgreSQL Database Migration Options: WITH
+-------------------------------------------
+
+When loading from a `PostgreSQL` database, the following options are
+supported, and the default *WITH* clause is: *no truncate*, *create schema*,
+*create tables*, *include drop*, *create indexes*, *reset sequences*,
+*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*.
+
+  - *include drop*
+
+    When this option is listed, pgloader drops all the tables in the target
+    PostgreSQL database whose names appear in the MySQL database. This
+    option allows for using the same command several times in a row until
+    you figure out all the options, starting automatically from a clean
+    environment. Please note that `CASCADE` is used to ensure that tables
+    are dropped even if there are foreign keys pointing to them. This is
+    precisely what `include drop` is intended to do: drop all target tables
+    and recreate them.
+
+    Great care needs to be taken when using `include drop`, as it will
+    cascade to *all* objects referencing the target tables, possibly
+    including other tables that are not being loaded from the source DB.
+
+  - *include no drop*
+
+    When this option is listed, pgloader will not include any `DROP`
+    statement when loading the data.
+
+  - *truncate*
+
+    When this option is listed, pgloader issue the `TRUNCATE` command
+    against each PostgreSQL table just before loading data into it.
+
+  - *no truncate*
+
+    When this option is listed, pgloader issues no `TRUNCATE` command.
+
+  - *disable triggers*
+
+    When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
+    TRIGGER ALL` command against the PostgreSQL target table before copying
+    the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
+    `COPY` is done.
+
+    This option allows loading data into a pre-existing table ignoring the
+    *foreign key constraints* and user defined triggers and may result in
+    invalid *foreign key constraints* once the data is loaded. Use with
+    care.
+
+  - *create tables*
+
+    When this option is listed, pgloader creates the table using the meta
+    data found in the `MySQL` file, which must contain a list of fields with
+    their data type. A standard data type conversion from DBF to PostgreSQL
+    is done.
+
+  - *create no tables*
+
+    When this option is listed, pgloader skips the creation of table before
+    loading data, target tables must then already exist.
+
+    Also, when using *create no tables* pgloader fetches the metadata from
+    the current target database and checks type casting, then will remove
+    constraints and indexes prior to loading the data and install them back
+    again once the loading is done.
+
+  - *create indexes*
+
+    When this option is listed, pgloader gets the definitions of all the
+    indexes found in the MySQL database and create the same set of index
+    definitions against the PostgreSQL database.
+
+  - *create no indexes*
+
+    When this option is listed, pgloader skips the creating indexes.
+        
+  - *drop indexes*
+  
+    When this option is listed, pgloader drops the indexes in the target
+    database before loading the data, and creates them again at the end
+    of the data copy.
+
+  - *reindex*
+
+    When this option is used, pgloader does both *drop indexes* before
+    loading the data and *create indexes* once data is loaded.
+
+  - *drop schema*
+  
+    When this option is listed, pgloader drops the target schema in the
+    target PostgreSQL database before creating it again and all the objects
+    it contains. The default behavior doesn't drop the target schemas.
+
+  - *foreign keys*
+
+    When this option is listed, pgloader gets the definitions of all the
+    foreign keys found in the MySQL database and create the same set of
+    foreign key definitions against the PostgreSQL database.
+
+  - *no foreign keys*
+
+    When this option is listed, pgloader skips creating foreign keys.
+
+  - *reset sequences*
+
+    When this option is listed, at the end of the data loading and after the
+    indexes have all been created, pgloader resets all the PostgreSQL
+    sequences created to the current maximum value of the column they are
+    attached to.
+
+    The options *schema only* and *data only* have no effects on this
+    option.
+
+  - *reset no sequences*
+
+    When this option is listed, pgloader skips resetting sequences after the
+    load.
+
+    The options *schema only* and *data only* have no effects on this
+    option.
+
+  - *downcase identifiers*
+
+    When this option is listed, pgloader converts all MySQL identifiers
+    (table names, index names, column names) to *downcase*, except for
+    PostgreSQL *reserved* keywords.
+
+    The PostgreSQL *reserved* keywords are determined dynamically by using
+    the system function `pg_get_keywords()`.
+
+  - *quote identifiers*
+
+    When this option is listed, pgloader quotes all MySQL identifiers so
+    that their case is respected. Note that you will then have to do the
+    same thing in your application code queries.
+
+  - *schema only*
+
+    When this option is listed pgloader refrains from migrating the data
+    over. Note that the schema in this context includes the indexes when the
+    option *create indexes* has been listed.
+
+  - *data only*
+
+    When this option is listed pgloader only issues the `COPY` statements,
+    without doing any other processing.
+
+  - *rows per range*
+  
+    How many rows are fetched per `SELECT` query when using *multiple
+    readers per thread*, see above for details.
+
+PostgreSQL Database Casting Rules
+---------------------------------
+
+The command *CAST* introduces user-defined casting rules.
+
+The cast clause allows to specify custom casting rules, either to overload
+the default casting rules or to amend them with special cases.
+
+A casting rule is expected to follow one of the forms::
+
+    type <type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
+    column <table-name>.<column-name> [ <guards> ] to ...
+
+It's possible for a *casting rule* to either match against a PostgreSQL data
+type or against a given *column name* in a given *table name*. So it's
+possible to migrate a table from a PostgreSQL database while changing and
+`int` column to a `bigint` one, automatically.
+
+The *casting rules* are applied in order, the first match prevents following
+rules to be applied, and user defined rules are evaluated first.
+
+The supported guards are:
+
+  - *when default 'value'*
+
+    The casting rule is only applied against MySQL columns of the source
+    type that have given *value*, which must be a single-quoted or a
+    double-quoted string.
+
+  - *when typemod expression*
+
+    The casting rule is only applied against MySQL columns of the source
+    type that have a *typemod* value matching the given *typemod
+    expression*. The *typemod* is separated into its *precision* and *scale*
+    components.
+
+    Example of a cast rule using a *typemod* guard::
+
+      type char when (= precision 1) to char keep typemod
+
+    This expression casts MySQL `char(1)` column to a PostgreSQL column of
+    type `char(1)` while allowing for the general case `char(N)` will be
+    converted by the default cast rule into a PostgreSQL type `varchar(N)`.
+
+  - *with extra auto_increment*
+
+    The casting rule is only applied against PostgreSQL attached to a
+    sequence. This can be the result of doing that manually, using a
+    `serial` or a `bigserial` data type, or an `identity` column.
+
+
+The supported casting options are:
+
+  - *drop default*, *keep default*
+
+    When the option *drop default* is listed, pgloader drops any
+    existing default expression in the MySQL database for columns of the
+    source type from the `CREATE TABLE` statement it generates.
+
+    The spelling *keep default* explicitly prevents that behaviour and
+    can be used to overload the default casting rules.
+
+  - *drop not null*, *keep not null*, *set not null*
+
+    When the option *drop not null* is listed, pgloader drops any
+    existing `NOT NULL` constraint associated with the given source
+    MySQL datatype when it creates the tables in the PostgreSQL
+    database.
+
+    The spelling *keep not null* explicitly prevents that behaviour and
+    can be used to overload the default casting rules.
+
+    When the option *set not null* is listed, pgloader sets a `NOT NULL`
+    constraint on the target column regardless whether it has been set
+    in the source MySQL column.
+
+  - *drop typemod*, *keep typemod*
+
+    When the option *drop typemod* is listed, pgloader drops any
+    existing *typemod* definition (e.g. *precision* and *scale*) from
+    the datatype definition found in the MySQL columns of the source
+    type when it created the tables in the PostgreSQL database.
+
+    The spelling *keep typemod* explicitly prevents that behaviour and
+    can be used to overload the default casting rules.
+
+  - *using*
+
+    This option takes as its single argument the name of a function to
+    be found in the `pgloader.transforms` Common Lisp package. See above
+    for details.
+
+    It's possible to augment a default cast rule (such as one that
+    applies against `ENUM` data type for example) with a *transformation
+    function* by omitting entirely the `type` parts of the casting rule,
+    as in the following example::
+
+      column enumerate.foo using empty-string-to-null
+
+PostgreSQL Partial Migration
+----------------------------
+
+INCLUDING ONLY TABLE NAMES MATCHING
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Introduce a comma separated list of table names or *regular expression* used
+to limit the tables to migrate to a sublist.
+
+Example::
+
+  including only table names matching ~/film/, 'actor' in schema 'public'
+
+EXCLUDING TABLE NAMES MATCHING
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Introduce a comma separated list of table names or *regular expression* used
+to exclude table names from the migration. This filter only applies to the
+result of the *INCLUDING* filter.
+
+::
+  
+  excluding table names matching ~<ory> in schema 'public'
+
+PostgreSQL Schema Transformations
+---------------------------------
+    
+ALTER TABLE NAMES MATCHING
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Introduce a comma separated list of table names or *regular expressions*
+that you want to target in the pgloader *ALTER TABLE* command. The only two
+available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted
+string as parameter::
+
+    ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
+     SET SCHEMA 'mv'
+   
+    ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
+    
+    ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
+
+You can use as many such rules as you need. The list of tables to be
+migrated is searched in pgloader memory against the *ALTER TABLE* matching
+rules, and for each command pgloader stops at the first matching criteria
+(regexp or string).
+
+No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
+the level of the pgloader in-memory representation of your source database
+schema. In case of a name change, the mapping is kept and reused in the
+*foreign key* and *index* support.
+
+The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
+command that pgloader will run when it has to create a table.
+
+PostgreSQL Migration: limitations
+---------------------------------
+
+The only PostgreSQL objects supported at this time in pgloader are
+extensions, schema, tables, indexes and constraints. Anything else is ignored.
+
+  - Views are not migrated,
+
+    Supporting views might require implementing a full SQL parser for the
+    MySQL dialect with a porting engine to rewrite the SQL against
+    PostgreSQL, including renaming functions and changing some constructs.
+
+    While it's not theoretically impossible, don't hold your breath.
+
+  - Triggers are not migrated
+
+    The difficulty of doing so is not yet assessed.
+
+  - Stored Procedures and Functions are not migrated.
+
+
+Default PostgreSQL Casting Rules
+--------------------------------
+
+When migrating from PostgreSQL the following Casting Rules are provided::
+
+  type int with extra auto_increment to serial
+  type bigint with extra auto_increment to bigserial
+  type "character varying" to text drop typemod
+
+

From b6de8f1eadcbc55c5a80ac4f8ecdee954326b925 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 12 Dec 2018 09:34:05 +0900
Subject: [PATCH 47/69] Improve Citus documentation.

---
 docs/ref/pgsql-citus-target.rst | 119 ++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/docs/ref/pgsql-citus-target.rst b/docs/ref/pgsql-citus-target.rst
index 257e081..f6397d1 100644
--- a/docs/ref/pgsql-citus-target.rst
+++ b/docs/ref/pgsql-citus-target.rst
@@ -70,6 +70,125 @@ backfilled from referenced data. pgloader knows how to do that by generating
 a query like the following and importing the result set of such a query
 rather than the raw data from the source table.
 
+Citus Migration Example
+^^^^^^^^^^^^^^^^^^^^^^^
+
+With the migration command as above, pgloader adds the column ``company_id``
+to the tables that have a direct or indirect foreign key reference to the
+``companies`` table.
+
+We run pgloader using the following command, where the file
+`./test/citus/company.load
+<https://github.com/dimitri/pgloader/blob/master/test/citus/company.load>`_
+contains the pgloader command as shown above.
+
+::
+   
+   $ pgloader --client-min-messages sql ./test/citus/company.load
+
+The following SQL statements are all extracted from the log messages that
+the pgloader command outputs. We are going to have a look at the
+`impressions` table. It gets created with a new column `company_id` in the
+first position, as follows:
+
+::
+   
+   CREATE TABLE "public"."impressions" 
+   (
+     company_id                bigint,
+     "id"                      bigserial,
+     "ad_id"                   bigint default NULL,
+     "seen_at"                 timestamp with time zone default NULL,
+     "site_url"                text default NULL,
+     "cost_per_impression_usd" numeric(20,10) default NULL,
+     "user_ip"                 inet default NULL,
+     "user_data"               jsonb default NULL
+   );
+
+The original schema for this table does not have the `company_id` column,
+which means pgloader now needs to change the primary key definition, the
+foreign keys constraints definitions from and to this table, and also to
+*backfill* the `company_id` data to this table when doing the COPY phase of
+the migration.
+
+Then once the tables have been created, pgloader executes the following SQL
+statements::
+
+  SELECT create_distributed_table('"public"."companies"', 'id');
+  SELECT create_distributed_table('"public"."campaigns"', 'company_id');
+  SELECT create_distributed_table('"public"."ads"', 'company_id');
+  SELECT create_distributed_table('"public"."clicks"', 'company_id');
+  SELECT create_distributed_table('"public"."impressions"', 'company_id');
+
+Then when copying the data from the source PostgreSQL database to the new
+Citus tables, the new column (here ``company_id``) needs to be backfilled
+from the source tables. Here's the SQL query that pgloader uses as a data
+source for the ``ads`` table in our example:
+
+::
+
+  SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text,
+         "ads".name::text, "ads".image_url::text, "ads".target_url::text,
+         "ads".impressions_count::text, "ads".clicks_count::text,
+         "ads".created_at::text, "ads".updated_at::text
+         
+    FROM       "public"."ads"
+         JOIN "public"."campaigns"
+           ON ads.campaign_id = campaigns.id    
+
+The ``impressions`` table has an indirect foreign key reference to the
+``company`` table, which is the table where the distribution key is
+specified. pgloader will discover that itself from walking the PostgreSQL
+catalogs, and you may also use the following specification in the pgloader
+command to explicitely add the indirect dependency:
+
+::
+   
+   distribute impressions using company_id from ads, campaigns
+
+Given this schema, the SQL query used by pgloader to fetch the data for the
+`impressions` table is the following, implementing online backfilling of the
+data:
+   
+::
+   
+   SELECT "campaigns".company_id::text, "impressions".id::text,
+          "impressions".ad_id::text, "impressions".seen_at::text,
+          "impressions".site_url::text,
+          "impressions".cost_per_impression_usd::text,
+          "impressions".user_ip::text,
+          "impressions".user_data::text
+
+     FROM      "public"."impressions"
+
+          JOIN "public"."ads"
+            ON impressions.ad_id = ads.id
+
+          JOIN "public"."campaigns"
+            ON ads.campaign_id = campaigns.id
+
+When the data copying is done, then pgloader also has to install the indexes
+supporting the primary keys, and add the foreign key definitions to the
+schema. Those definitions are not the same as in the source schema, because
+of the adding of the distribution column to the table: we need to also add
+the column to the primary key and the foreign key constraints.
+
+Here's the commands issued by pgloader for the ``impressions`` table:
+
+::
+   
+   CREATE UNIQUE INDEX "impressions_pkey"
+       ON "public"."impressions" (company_id, id);
+
+   ALTER TABLE "public"."impressions"
+     ADD CONSTRAINT "impressions_ad_id_fkey"
+        FOREIGN KEY(company_id,ad_id)
+         REFERENCES "public"."ads"(company_id,id)
+
+Given a single line of specification ``distribute companies using id`` then
+pgloader implements all the necessary schema changes on the fly when
+migrating to Citus, and also dynamically backfills the data.
+         
 Citus Migration: Limitations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From f72afeeae7f74fca00e58cf894a5218109ea5f62 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 12 Dec 2018 09:34:20 +0900
Subject: [PATCH 48/69] Switch the documentation to the ReadTheDocs template.

---
 docs/conf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/conf.py b/docs/conf.py
index e5a9e4f..0e689e9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -83,7 +83,8 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+#html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the

From 007003647d6bd16726917f44d635c5d7902c0104 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 14 Dec 2018 18:21:34 +0900
Subject: [PATCH 49/69] Improve Redshift support documentation.

---
 docs/index.rst                     |  3 +-
 docs/intro.rst                     |  3 ++
 docs/ref/pgsql-redshift-source.rst | 12 -----
 docs/ref/pgsql-redshift-target.rst | 10 -----
 docs/ref/pgsql-redshift.rst        | 70 ++++++++++++++++++++++++++++++
 5 files changed, 74 insertions(+), 24 deletions(-)
 delete mode 100644 docs/ref/pgsql-redshift-source.rst
 delete mode 100644 docs/ref/pgsql-redshift-target.rst
 create mode 100644 docs/ref/pgsql-redshift.rst

diff --git a/docs/index.rst b/docs/index.rst
index 3fb2f9a..aac16f7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -24,8 +24,7 @@ Welcome to pgloader's documentation!
    ref/mssql
    ref/pgsql
    ref/pgsql-citus-target
-   ref/pgsql-redshift-source
-   ref/pgsql-redshift-target
+   ref/pgsql-redshift
    ref/transforms
    bugreport
 
diff --git a/docs/intro.rst b/docs/intro.rst
index 2a098d9..ed981b7 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -10,10 +10,13 @@ the data into the server, and manages errors by filling a pair of
 pgloader knows how to read data from different kind of sources:
 
   * Files
+
     * CSV
     * Fixed Format
     * DBF
+
   * Databases
+
     * SQLite
     * MySQL
     * MS SQL Server
diff --git a/docs/ref/pgsql-redshift-source.rst b/docs/ref/pgsql-redshift-source.rst
deleted file mode 100644
index b69b6d9..0000000
--- a/docs/ref/pgsql-redshift-source.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-Migrating a Redhift Database to PostgreSQL
-==========================================
-
-This command instructs pgloader to load data from a database connection.
-Automatic discovery of the schema is supported, including build of the
-indexes, primary and foreign keys constraints. A default set of casting
-rules are provided and might be overloaded and appended to by the command.
-
-The command and behavior are the same as when migration from a PostgreSQL
-database source. pgloader automatically discovers that it's talking to a
-Redshift database by parsing the output of the `SELECT version()` SQL query.
-
diff --git a/docs/ref/pgsql-redshift-target.rst b/docs/ref/pgsql-redshift-target.rst
deleted file mode 100644
index 50cc356..0000000
--- a/docs/ref/pgsql-redshift-target.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Migrating a PostgreSQL Database to Redshift
-===========================================
-
-This command instructs pgloader to load data from a database connection.
-Automatic discovery of the schema is supported, including build of the
-indexes, primary and foreign keys constraints. A default set of casting
-rules are provided and might be overloaded and appended to by the command.
-
-
-TODO: add details about S3 credentials and bucket configuration.
diff --git a/docs/ref/pgsql-redshift.rst b/docs/ref/pgsql-redshift.rst
new file mode 100644
index 0000000..09d73e1
--- /dev/null
+++ b/docs/ref/pgsql-redshift.rst
@@ -0,0 +1,70 @@
+Support for Redshift in pgloader
+================================
+
+The command and behavior are the same as when migration from a PostgreSQL
+database source. pgloader automatically discovers that it's talking to a
+Redshift database by parsing the output of the `SELECT version()` SQL query.
+
+Redhift as a data source
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Redshit is a variant of PostgreSQL version 8.0.2, which allows pgloader to
+work with only a very small amount of adaptation in the catalog queries
+used. In other words, migrating from Redshift to PostgreSQL works just the
+same as when migrating from a PostgreSQL data source, including the
+connection string specification.
+
+Redshift as a data destination
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Redshift variant of PostgreSQL 8.0.2 does not have support for the
+``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY
+with Redshift, the data must first be made available in an S3 bucket.
+
+First, pgloader must authenticate to Amazon S3. pgloader uses the following
+setup for that:
+
+  - ``~/.aws/config``
+
+    This INI formatted file contains sections with your default region and
+    other global values relevant to using the S3 API. pgloader parses it to
+    get the region when it's setup in the ``default`` INI section.
+
+    The environment variable ``AWS_DEFAULT_REGION`` can be used to override
+    the configuration file value.
+    
+  - ``~/.aws/credentials``
+
+    The INI formatted file contains your authentication setup to Amazon,
+    with the properties ``aws_access_key_id`` and ``aws_secret_access_key``
+    in the section ``default``. pgloader parses this file for those keys,
+    and uses their values when communicating with Amazon S3.
+
+    The environment variables ``AWS_ACCESS_KEY_ID`` and
+    ``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file
+    
+  - ``AWS_S3_BUCKET_NAME``
+    
+    Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is
+    used by pgloader as the name of the S3 bucket where to upload the files
+    to COPY to the Redshift database. The bucket name defaults to
+    ``pgloader``.
+
+Then pgloader works as usual, see the other sections of the documentation
+for the details, depending on the data source (files, other databases, etc).
+When preparing the data for PostgreSQL, pgloader now uploads each batch into
+a single CSV file, and then issue such as the following, for each batch:
+
+::
+
+  COPY <target_table_name>
+        FROM 's3://<s3 bucket>/<s3-filename-just-uploaded>'
+        FORMAT CSV
+        TIMEFORMAT 'auto'
+        REGION '<aws-region>'
+        ACCESS_KEY_ID '<aws-access-key-id>'
+        SECRET_ACCESS_KEY '<aws-secret-access-key>;
+
+This is the only difference with a PostgreSQL core version, where pgloader
+can rely on the classic ``COPY FROM STDIN`` command, which allows to send
+data through the already established connection to PostgreSQL.

From 290ad68d61f764dafb17ca52fdd175c88eee6f91 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 16 Dec 2018 23:17:37 +0100
Subject: [PATCH 50/69] Implement materialize views in PostgreSQL source
 support.

---
 pgloader.asd                               |  1 +
 src/package.lisp                           |  3 +
 src/parsers/command-materialize-views.lisp |  9 +--
 src/parsers/command-pgsql.lisp             |  5 +-
 src/parsers/command-utils.lisp             |  2 +-
 src/pgsql/pgsql-schema.lisp                | 21 +++++++
 src/sources/pgsql/pgsql-schema.lisp        | 50 +++++++++++++++
 src/sources/pgsql/pgsql.lisp               | 72 +++++++++++++++-------
 test/mysql/db789.load                      |  5 +-
 test/pgsql-source.load                     |  8 +++
 10 files changed, 145 insertions(+), 31 deletions(-)
 create mode 100644 src/sources/pgsql/pgsql-schema.lisp

diff --git a/pgloader.asd b/pgloader.asd
index 3d12ebd..e120e30 100644
--- a/pgloader.asd
+++ b/pgloader.asd
@@ -180,6 +180,7 @@
                                 :serial t
                                 :depends-on ("common")
                                 :components ((:file "pgsql-cast-rules")
+                                             (:file "pgsql-schema")
                                              (:file "pgsql")))))
 
              ;; package pgloader.copy
diff --git a/src/package.lisp b/src/package.lisp
index 307eb3d..c3e76f5 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -452,6 +452,9 @@
 
            #:create-distributed-table
 
+           #:make-including-expr-from-catalog
+           #:make-including-expr-from-view-names
+
            ;; finalizing catalogs support (redshift and other variants)
            #:finalize-catalogs
            #:adjust-data-types
diff --git a/src/parsers/command-materialize-views.lisp b/src/parsers/command-materialize-views.lisp
index 6785967..e963858 100644
--- a/src/parsers/command-materialize-views.lisp
+++ b/src/parsers/command-materialize-views.lisp
@@ -6,11 +6,8 @@
 ;;;
 (in-package #:pgloader.parser)
 
-(defrule view-name (and (alpha-char-p character)
-			(* (or (alpha-char-p character)
-			       (digit-char-p character)
-			       #\_)))
-  (:text t))
+(defrule view-name (or qualified-table-name maybe-quoted-namestring)
+  (:identity t))
 
 (defrule view-sql (and kw-as dollar-quoted)
   (:destructure (as sql) (declare (ignore as)) sql))
@@ -18,7 +15,7 @@
 (defrule view-definition (and view-name (? view-sql))
   (:destructure (name sql) (cons name sql)))
 
-(defrule another-view-definition (and comma view-definition)
+(defrule another-view-definition (and comma-separator view-definition)
   (:lambda (source)
     (bind (((_ view) source)) view)))
 
diff --git a/src/parsers/command-pgsql.lisp b/src/parsers/command-pgsql.lisp
index f5f7996..a1710b0 100644
--- a/src/parsers/command-pgsql.lisp
+++ b/src/parsers/command-pgsql.lisp
@@ -110,6 +110,7 @@
                                            alter-table alter-schema
                                            ((:including incl))
                                            ((:excluding excl))
+                                           views
                                            distribute
                                            &allow-other-keys)
   `(lambda ()
@@ -129,6 +130,7 @@
        (copy-database source
                       :including ',incl
                       :excluding ',excl
+                      :materialize-views ',views
                       :alter-table ',alter-table
                       :alter-schema ',alter-schema
                       :index-names :preserve
@@ -146,7 +148,7 @@
                          pg-dst-db-uri
                          &key
                          gucs casts before after after-schema options
-                         alter-table alter-schema distribute
+                         alter-table alter-schema views distribute
                          including excluding decoding)
         source
       (cond (*dry-run*
@@ -155,6 +157,7 @@
              (lisp-code-for-loading-from-pgsql pg-src-db-uri pg-dst-db-uri
                                                :gucs gucs
                                                :casts casts
+                                               :views views
                                                :before before
                                                :after after
                                                :after-schema after-schema
diff --git a/src/parsers/command-utils.lisp b/src/parsers/command-utils.lisp
index ebc476d..4ad3a63 100644
--- a/src/parsers/command-utils.lisp
+++ b/src/parsers/command-utils.lisp
@@ -30,7 +30,7 @@
 (defrule ignore-whitespace (* whitespace)
   (:constant nil))
 
-(defrule punct (or #\, #\- #\_ #\$ #\%)
+(defrule punct (or #\- #\_ #\$ #\%)
   (:text t))
 
 (defrule namestring (and (or #\_ (alpha-char-p character))
diff --git a/src/pgsql/pgsql-schema.lisp b/src/pgsql/pgsql-schema.lisp
index b2d7a27..2bcff62 100644
--- a/src/pgsql/pgsql-schema.lisp
+++ b/src/pgsql/pgsql-schema.lisp
@@ -119,6 +119,27 @@
                                    (table-name table))
                            :single)))
 
+(defun make-including-expr-from-view-names (view-names)
+  "Turn MATERIALIZING VIEWs list of view names into an INCLUDING parameter."
+  (let (including current-schema)
+    (loop :for (schema-name . view-name) :in view-names
+       :do (let* ((schema-name
+                   (if schema-name
+                       (ensure-unquoted schema-name)
+                       (or
+                        current-schema
+                        (setf current-schema
+                              (pomo:query "select current_schema()" :single)))))
+                  (table-expr
+                   (make-string-match-rule :target (ensure-unquoted view-name)))
+                  (schema-entry
+                   (or (assoc schema-name including :test #'string=)
+                       (progn (push (cons schema-name nil) including)
+                              (assoc schema-name including :test #'string=)))))
+             (push-to-end table-expr (cdr schema-entry))))
+    ;; return the including alist
+    including))
+
 
 (defvar *table-type*
   '((:table    . ("r" "f" "p"))   ; ordinary, foreign and partitioned
diff --git a/src/sources/pgsql/pgsql-schema.lisp b/src/sources/pgsql/pgsql-schema.lisp
new file mode 100644
index 0000000..2654e45
--- /dev/null
+++ b/src/sources/pgsql/pgsql-schema.lisp
@@ -0,0 +1,50 @@
+(in-package :pgloader.source.pgsql)
+
+(defun create-pg-views (views-alist)
+  "VIEWS-ALIST associates view names with their SQL definition, which might
+   be empty for already existing views. Create only the views for which we
+   have an SQL definition."
+  (unless (eq :all views-alist)
+    (let ((views (remove-if #'null views-alist :key #'cdr)))
+      (when views
+        (loop :for (name . def) :in views
+           :for sql := (destructuring-bind (schema . v-name) name
+                         (format nil
+                                 "CREATE VIEW ~s.~s AS ~a"
+                                 schema v-name def))
+           :do (progn
+                 (log-message :info "PostgreSQL Source: ~a" sql)
+                 #+pgloader-image
+                 (pgsql-execute sql)
+                 #-pgloader-image
+                 (restart-case
+                     (pgsql-execute sql)
+                   (use-existing-view ()
+                     :report "Use the already existing view and continue"
+                     nil)
+                   (replace-view ()
+                     :report
+                     "Replace the view with the one from pgloader's command"
+                     (let ((drop-sql (format nil "DROP VIEW ~a;" (car name))))
+                       (log-message :info "PostgreSQL Source: ~a" drop-sql)
+                       (pgsql-execute drop-sql)
+                       (pgsql-execute sql))))))))))
+
+(defun drop-pg-views (views-alist)
+  "See `create-pg-views' for VIEWS-ALIST description. This time we DROP the
+   views to clean out after our work."
+  (unless (eq :all views-alist)
+   (let ((views (remove-if #'null views-alist :key #'cdr)))
+     (when views
+       (let ((sql
+              (with-output-to-string (sql)
+                (format sql "DROP VIEW ")
+                (loop :for view-definition :in views
+                   :for i :from 0
+                   :do (destructuring-bind (name . def) view-definition
+                         (declare (ignore def))
+                         (format sql
+                                 "~@[, ~]~s.~s"
+                                 (not (zerop i)) (car name) (cdr name)))))))
+         (log-message :info "PostgreSQL Source: ~a" sql)
+         (pgsql-execute sql))))))
diff --git a/src/sources/pgsql/pgsql.lisp b/src/sources/pgsql/pgsql.lisp
index d62038c..1fd15fc 100644
--- a/src/sources/pgsql/pgsql.lisp
+++ b/src/sources/pgsql/pgsql.lisp
@@ -76,7 +76,7 @@
                              including
                              excluding)
   "PostgreSQL introspection to prepare the migration."
-  (declare (ignore materialize-views only-tables))
+  (declare (ignore only-tables))
   (with-stats-collection ("fetch meta data"
                           :use-result-as-rows t
                           :use-result-as-read t
@@ -84,29 +84,59 @@
     (with-pgsql-transaction (:pgconn (source-db pgsql))
       (let ((variant   (pgconn-variant (source-db pgsql)))
             (pgversion (pgconn-major-version (source-db pgsql))))
-       (when (eq :pgdg variant)
-         (list-all-sqltypes catalog
+        ;;
+        ;; First, create the source views that we're going to materialize in
+        ;; the target database.
+        ;;
+        (when (and materialize-views (not (eq :all materialize-views)))
+          (create-pg-views materialize-views))
+
+        (when (eq :pgdg variant)
+          (list-all-sqltypes catalog
+                             :including including
+                             :excluding excluding))
+
+        (list-all-columns catalog
+                          :including including
+                          :excluding excluding)
+
+        (let* ((view-names (unless (eq :all materialize-views)
+                             (mapcar #'car materialize-views)))
+               (including  (make-including-expr-from-view-names view-names)))
+          (cond (view-names
+                 (list-all-columns catalog
+                                   :including including
+                                   :table-type :view))
+
+                ((eq :all materialize-views)
+                 (list-all-columns catalog :table-type :view))))
+
+        (when create-indexes
+          (list-all-indexes catalog
                             :including including
-                            :excluding excluding))
+                            :excluding excluding
+                            :pgversion pgversion))
 
-       (list-all-columns catalog
-                         :including including
-                         :excluding excluding)
+        (when (and (eq :pgdg variant) foreign-keys)
+          (list-all-fkeys catalog
+                          :including including
+                          :excluding excluding))
 
-       (when create-indexes
-         (list-all-indexes catalog
-                           :including including
-                           :excluding excluding
-                           :pgversion pgversion))
-
-       (when (and (eq :pgdg variant) foreign-keys)
-         (list-all-fkeys catalog
-                         :including including
-                         :excluding excluding))
-
-       ;; return how many objects we're going to deal with in total
-       ;; for stats collection
-       (+ (count-tables catalog) (count-indexes catalog)))))
+        ;; return how many objects we're going to deal with in total
+        ;; for stats collection
+        (+ (count-tables catalog)
+           (count-views catalog)
+           (count-indexes catalog)
+           (count-fkeys catalog)))))
 
   ;; be sure to return the catalog itself
   catalog)
+
+
+(defmethod cleanup ((pgsql copy-pgsql) (catalog catalog) &key materialize-views)
+  "When there is a PostgreSQL error at prepare-pgsql-database step, we might
+   need to clean-up any view created in the source PostgreSQL connection for
+   the migration purpose."
+  (when materialize-views
+    (with-pgsql-transaction (:pgconn  (source-db pgsql))
+      (drop-pg-views materialize-views))))
diff --git a/test/mysql/db789.load b/test/mysql/db789.load
index ba456d9..42d6eee 100644
--- a/test/mysql/db789.load
+++ b/test/mysql/db789.load
@@ -4,7 +4,7 @@ LOAD DATABASE
 
   WITH data only, truncate, create no tables
 
-  MATERIALIZE VIEWS proceed
+  MATERIALIZE VIEWS proceed, foo as $$ select 1 as a; $$
 
   INCLUDING ONLY TABLE NAMES MATCHING 'proceed'
 
@@ -13,5 +13,6 @@ LOAD DATABASE
   $$ drop schema if exists db789 cascade; $$,
   $$ create schema db789; $$,
   $$ create table db789.refrain (id char(1) primary key); $$,
-  $$ create table db789.proceed (id char(1) primary key); $$;
+  $$ create table db789.proceed (id char(1) primary key); $$,
+  $$ create table db789.foo (a integer primary key); $$;
 
diff --git a/test/pgsql-source.load b/test/pgsql-source.load
index 7e74bc3..6e767df 100644
--- a/test/pgsql-source.load
+++ b/test/pgsql-source.load
@@ -3,4 +3,12 @@ load database
      into pgsql://localhost/copy
 
   -- including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
+  including only table names matching ~/geolocations/ in schema 'public'
+
+  materialize views public.some_usps
+  as $$
+    select usps, geoid, aland, awater, aland_sqmi, awater_sqmi, location
+      from districts
+     where usps in ('MT', 'DE', 'AK', 'WY', 'PR', 'VT', 'SD', 'DC', 'ND');
+  $$
   ;

From bda06f8ac06929bd7c145b146d0c822b7287c157 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 17 Dec 2018 16:31:47 +0100
Subject: [PATCH 51/69] Implement Citus support from a MySQL database.

---
 src/load/migrate-database.lisp          |  6 ++++-
 src/package.lisp                        | 14 +++++++++-
 src/parsers/command-mysql.lisp          |  9 ++++---
 src/pgsql/pgsql-ddl-citus.lisp          |  8 +++---
 src/sources/mysql/mysql-cast-rules.lisp |  3 +++
 src/utils/catalog.lisp                  |  6 +++++
 src/utils/citus.lisp                    | 34 ++++++++++++++++++++-----
 test/mysql/f1db-citus.load              | 19 ++++++++++++++
 8 files changed, 85 insertions(+), 14 deletions(-)
 create mode 100644 test/mysql/f1db-citus.load

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index 689894d..a9f1ba4 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -232,6 +232,7 @@
 (defun process-catalog (copy catalog &key alter-table alter-schema distribute)
   "Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
    rewriting, pgloader level alter schema and alter table commands."
+  (log-message :info "Processing source catalogs")
 
   ;; cast the catalog into something PostgreSQL can work on
   (cast catalog)
@@ -250,6 +251,7 @@
 
   ;; we also support schema changes necessary for Citus distribution
   (when distribute
+    (log-message :info "Applying distribution rules")
     (setf (catalog-distribution-rules catalog)
           (citus-distribute-schema catalog distribute))))
 
@@ -366,10 +368,12 @@
                          :alter-schema alter-schema
                          :distribute distribute)
 
-      (citus-rule-is-missing-from-list (e)
+      #+pgloader-image
+      ((or citus-rule-table-not-found citus-rule-is-missing-from-list) (e)
         (log-message :fatal "~a" e)
         (return-from copy-database))
 
+      #+pgloader-image
       (condition (e)
         (log-message :fatal "Failed to process catalogs: ~a" e)
         (return-from copy-database)))
diff --git a/src/package.lisp b/src/package.lisp
index c3e76f5..c6eda82 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -190,6 +190,7 @@
            #:count-indexes
            #:count-fkeys
            #:max-indexes-per-table
+           #:field-name
 
            #:push-to-end
            #:with-schema
@@ -299,7 +300,18 @@
   (:export #:citus-distribute-schema
            #:citus-format-sql-select
            #:citus-backfill-table-p
-           #:citus-rule-is-missing-from-list))
+           #:citus-rule-table-not-found
+           #:citus-rule-is-missing-from-list
+
+           #:citus-reference-rule
+           #:citus-reference-rule-p
+           #:citus-reference-rule-table
+
+           #:citus-distributed-rule
+           #:citus-distributed-rule-p
+           #:citus-distributed-rule-table
+           #:citus-distributed-rule-using
+           #:citus-distributed-rule-from))
 
 (defpackage #:pgloader.utils
   (:use #:cl
diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp
index bbae776..0e87f66 100644
--- a/src/parsers/command-mysql.lisp
+++ b/src/parsers/command-mysql.lisp
@@ -89,7 +89,8 @@
                                             excluding-matching
                                             decoding-tables-as
                                             before-load
-                                            after-load))
+                                            after-load
+                                            distribute-commands))
   (:lambda (clauses-list)
     (alexandria:alist-plist clauses-list)))
 
@@ -164,7 +165,7 @@
                                          &key
                                            gucs mysql-gucs
                                            casts views before after options
-                                           alter-table alter-schema
+                                           alter-table alter-schema distribute
                                            ((:including incl))
                                            ((:excluding excl))
                                            ((:decoding decoding-as))
@@ -191,6 +192,7 @@
                       :materialize-views ',views
                       :alter-table ',alter-table
                       :alter-schema ',alter-schema
+                      :distribute ',distribute
                       :set-table-oids t
                       :on-error-stop on-error-stop
                       ,@(remove-batch-control-option options))
@@ -203,7 +205,7 @@
                          pg-db-uri
                          &key
                          gucs mysql-gucs casts views before after options
-                         alter-table alter-schema
+                         alter-table alter-schema distribute
                          including excluding decoding)
         source
       (cond (*dry-run*
@@ -219,6 +221,7 @@
                                                :options options
                                                :alter-table alter-table
                                                :alter-schema alter-schema
+                                               :distribute distribute
                                                :including including
                                                :excluding excluding
                                                :decoding decoding))))))
diff --git a/src/pgsql/pgsql-ddl-citus.lisp b/src/pgsql/pgsql-ddl-citus.lisp
index d5cd050..2fe2028 100644
--- a/src/pgsql/pgsql-ddl-citus.lisp
+++ b/src/pgsql/pgsql-ddl-citus.lisp
@@ -13,6 +13,8 @@
 (defmethod format-create-sql ((rule citus-distributed-rule)
                               &key (stream nil) if-not-exists)
   (declare (ignore if-not-exists))
-  (format stream "SELECT create_distributed_table('~a', '~a');"
-          (format-table-name (citus-distributed-rule-table rule))
-          (column-name (citus-distributed-rule-using rule))))
+  (let* ((rule-table    (citus-distributed-rule-table rule))
+         (rule-col-name (column-name (citus-distributed-rule-using rule))))
+    (format stream "SELECT create_distributed_table('~a', '~a');"
+            (format-table-name rule-table)
+            (apply-identifier-case rule-col-name))))
diff --git a/src/sources/mysql/mysql-cast-rules.lisp b/src/sources/mysql/mysql-cast-rules.lisp
index f05ecde..ba438fa 100644
--- a/src/sources/mysql/mysql-cast-rules.lisp
+++ b/src/sources/mysql/mysql-cast-rules.lisp
@@ -186,6 +186,9 @@
 			   (table-name name comment dtype ctype default nullable extra)))
   table-name name dtype ctype default nullable extra comment)
 
+(defmethod field-name ((field mysql-column) &key)
+  (mysql-column-name field))
+
 (defun explode-mysql-enum (ctype)
   "Convert MySQL ENUM expression into a list of labels."
   (cl-ppcre:register-groups-bind (list)
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index 8b0cd62..6c3ca3a 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -186,6 +186,9 @@
    "Cast a FIELD definition from a source database into a PostgreSQL COLUMN
     definition."))
 
+(defgeneric field-name (object &key)
+  (:documentation "Get the source database column name, or field-name."))
+
 
 ;;;
 ;;; Implementation of the methods
@@ -373,6 +376,9 @@
   (loop :for schema :in (catalog-schema-list catalog)
      :do (cast schema)))
 
+(defmethod field-name ((column column) &key)
+  (column-name column))
+
 ;;;
 ;;; There's no simple equivalent to array_agg() in MS SQL, so the index and
 ;;; fkey queries return a row per index|fkey column rather than per
diff --git a/src/utils/citus.lisp b/src/utils/citus.lisp
index 0bcf329..541e6e2 100644
--- a/src/utils/citus.lisp
+++ b/src/utils/citus.lisp
@@ -40,20 +40,42 @@
     ;;
     ;; ERROR Database error 42P16: table ;; "campaigns" is already distributed
     ;;
+    ;; In the PostgreSQL source case, we have the table OIDs already at this
+    ;; point, but in the general case we don't. Use the names to match what
+    ;; we did up to now.
+    ;;
     (loop :for rule :in (append distribution-rules derived-rules)
-       :unless (member (table-oid (citus-rule-table rule))
+       :unless (member (table-source-name (citus-rule-table rule))
                        processed-rules
                        :key (lambda (rule)
-                              (table-oid (citus-rule-table rule))))
+                              (table-source-name (citus-rule-table rule)))
+                       :test #'equal)
        :collect (progn
                   (push rule processed-rules)
                   (apply-citus-rule rule)
                   rule))))
 
+(define-condition citus-rule-table-not-found (error)
+  ((schema-name :initarg :schema-name
+                :accessor citus-rule-table-not-found-schema-name)
+   (table-name :initarg :table-name
+               :accessor citus-rule-table-not-found-table-name))
+  (:report
+   (lambda (err stream)
+     (let ((*print-circle* nil))
+       (with-slots (schema-name table-name)
+           err
+         (format stream
+                 "Could not find table ~s in schema ~s for distribution rules."
+                 table-name schema-name))))))
+
 (defun citus-find-table (catalog table)
-  (let* ((table-name  (table-name table))
+  (let* ((table-name  (cdr (table-source-name table)))
          (schema-name (schema-name (table-schema table))))
-    (find-table (find-schema catalog schema-name) table-name)))
+    (or (find-table (find-schema catalog schema-name) table-name)
+        (error (make-condition 'citus-rule-table-not-found
+                               :table-name table-name
+                               :schema-name schema-name)))))
 
 (defgeneric citus-rule-table (rule)
   (:documentation "Returns the RULE's table.")
@@ -197,11 +219,11 @@
   ;; it to our model
   (setf (table-citus-rule (citus-distributed-rule-table rule)) rule)
 
-  (let* ((table   (citus-distributed-rule-table rule))
+  (let* ((table  (citus-distributed-rule-table rule))
          (column (find (column-name (citus-distributed-rule-using rule))
                        (table-field-list table)
                        :test #'string=
-                       :key #'column-name)))
+                       :key #'field-name)))
     (if column
 
         ;; add it to the PKEY definition, in first position
diff --git a/test/mysql/f1db-citus.load b/test/mysql/f1db-citus.load
new file mode 100644
index 0000000..840f415
--- /dev/null
+++ b/test/mysql/f1db-citus.load
@@ -0,0 +1,19 @@
+load database
+  from mysql://root@localhost/f1db?useSSL=false
+  into pgsql://localhost:9700/dim
+
+  with reset no sequences
+
+  distribute f1db.circuits as reference table
+  distribute f1db.constructorResults using raceId
+  distribute f1db.constructors as reference table
+  distribute f1db.constructorStandings using raceId
+  distribute f1db.drivers as reference table
+  distribute f1db.driverStandings using raceId
+  distribute f1db.lapTimes using raceId
+  distribute f1db.pitStops using raceId
+  distribute f1db.qualifying using raceId
+  distribute f1db.races as reference table
+  distribute f1db.results using raceId
+  distribute f1db.seasons as reference table
+  distribute f1db.status as reference table;

From c019c16113503e94a332a0c37aea46bc977e88e3 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 19 Dec 2018 01:25:27 +0100
Subject: [PATCH 52/69] Implement MATERIALIZE VIEWS support for MS SQL, and
 distribute.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The latter is not tested yet, but should have no impact if not used. Given
how rare it is that I get a chance to play around with a MS SQL instance
anyway, it might be better to push blind changes for it when it doesn't
impact existing features…
---
 src/parsers/command-mssql.lisp          | 13 ++++--
 src/sources/mssql/mssql-cast-rules.lisp |  3 ++
 src/sources/mssql/mssql-schema.lisp     | 40 ++++++++++++++++
 src/sources/mssql/mssql.lisp            | 61 +++++++++++++++++++------
 4 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp
index 41fcb8a..80b5dc4 100644
--- a/src/parsers/command-mssql.lisp
+++ b/src/parsers/command-mssql.lisp
@@ -83,6 +83,8 @@
                                             casts
                                             alter-schema
                                             alter-table
+                                            materialize-views
+                                            distribute-commands
                                             before-load
                                             after-load
                                             including-like-in-schema
@@ -139,7 +141,8 @@
 (defun lisp-code-for-loading-from-mssql (ms-db-conn pg-db-conn
                                          &key
                                            gucs mssql-gucs
-                                           casts before after options
+                                           casts before after
+                                           options distribute views
                                            alter-schema alter-table
                                            including excluding
                                            &allow-other-keys)
@@ -167,6 +170,8 @@
                       :excluding ',excluding
                       :alter-schema ',alter-schema
                       :alter-table ',alter-table
+                      :materialize-views ',views
+                      :distribute ',distribute
                       :set-table-oids t
                       :on-error-stop on-error-stop
                       ,@(remove-batch-control-option options))
@@ -177,8 +182,8 @@
   (:lambda (source)
     (bind (((ms-db-uri pg-db-uri
                        &key
-                       gucs mssql-gucs casts before after
-                       alter-schema alter-table
+                       gucs mssql-gucs casts views before after
+                       alter-schema alter-table distribute
                        including excluding options)
             source))
       (cond (*dry-run*
@@ -188,10 +193,12 @@
                                                :gucs gucs
                                                :mssql-gucs mssql-gucs
                                                :casts casts
+                                               :views views
                                                :before before
                                                :after after
                                                :alter-schema alter-schema
                                                :alter-table alter-table
+                                               :distribute distribute
                                                :options options
                                                :including including
                                                :excluding excluding))))))
diff --git a/src/sources/mssql/mssql-cast-rules.lisp b/src/sources/mssql/mssql-cast-rules.lisp
index dec7ee8..444b8a0 100644
--- a/src/sources/mssql/mssql-cast-rules.lisp
+++ b/src/sources/mssql/mssql-cast-rules.lisp
@@ -88,6 +88,9 @@
   datetime-precision
   character-set-name collation-name)
 
+(defmethod field-name ((field mssql-column) &key)
+  (mssql-column-name field))
+
 (defmethod mssql-column-ctype ((col mssql-column))
   "Build the ctype definition from the full mssql-column information."
   (let ((type (mssql-column-type col)))
diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp
index c743647..9f83c86 100644
--- a/src/sources/mssql/mssql-schema.lisp
+++ b/src/sources/mssql/mssql-schema.lisp
@@ -213,3 +213,43 @@
   (loop :for col :in columns
      :collect (with-slots (name type) col
                 (get-column-sql-expression name type))))
+
+
+
+;;;
+;;; Materialize Views support
+;;;
+(defun create-ms-views (views-alist)
+  "VIEWS-ALIST associates view names with their SQL definition, which might
+   be empty for already existing views. Create only the views for which we
+   have an SQL definition."
+  (unless (eq :all views-alist)
+    (let ((views (remove-if #'null views-alist :key #'cdr)))
+      (when views
+        (loop :for (name . def) :in views
+           :for sql := (destructuring-bind (schema . v-name) name
+                         (format nil
+                                 "CREATE VIEW ~s.~s AS ~a"
+                                 schema v-name def))
+           :do (progn
+                 (log-message :info "MS SQL: ~a" sql)
+                 (mssql-query sql)))))))
+
+(defun drop-ms-views (views-alist)
+  "See `create-ms-views' for VIEWS-ALIST description. This time we DROP the
+   views to clean out after our work."
+  (unless (eq :all views-alist)
+   (let ((views (remove-if #'null views-alist :key #'cdr)))
+     (when views
+       (let ((sql
+              (with-output-to-string (sql)
+                (format sql "DROP VIEW ")
+                (loop :for view-definition :in views
+                   :for i :from 0
+                   :do (destructuring-bind (name . def) view-definition
+                         (declare (ignore def))
+                         (format sql
+                                 "~@[, ~]~s.~s"
+                                 (not (zerop i)) (car name) (cdr name)))))))
+         (log-message :info "PostgreSQL Source: ~a" sql)
+         (mssql-query sql))))))
diff --git a/src/sources/mssql/mssql.lisp b/src/sources/mssql/mssql.lisp
index cd71141..1f1b507 100644
--- a/src/sources/mssql/mssql.lisp
+++ b/src/sources/mssql/mssql.lisp
@@ -72,30 +72,63 @@
                              including
                              excluding)
   "MS SQL introspection to prepare the migration."
-  (declare (ignore materialize-views only-tables))
+  (declare (ignore only-tables))
   (with-stats-collection ("fetch meta data"
                           :use-result-as-rows t
                           :use-result-as-read t
                           :section :pre)
-      (with-connection (*mssql-db* (source-db mssql))
-        (list-all-columns catalog
-                          :including including
-                          :excluding excluding)
+    (with-connection (*mssql-db* (source-db mssql))
+      ;; If asked to MATERIALIZE VIEWS, now is the time to create them in MS
+      ;; SQL, when given definitions rather than existing view names.
+      (when (and materialize-views (not (eq :all materialize-views)))
+        (create-ms-views materialize-views))
 
-        (when create-indexes
-          (list-all-indexes catalog
-                            :including including
-                            :excluding excluding))
+      (list-all-columns catalog
+                        :including including
+                        :excluding excluding)
 
-        (when foreign-keys
-          (list-all-fkeys catalog
+      ;; fetch view (and their columns) metadata, covering comments too
+      (let* ((view-names (unless (eq :all materialize-views)
+                           (mapcar #'car materialize-views)))
+             (including
+              (loop :for (schema-name . view-name) :in view-names
+                 :do (let* ((schema-name (or schema-name "dbo"))
+                            (schema-entry
+                             (or (assoc schema-name including :test #'string=)
+                                 (progn (push (cons schema-name nil) including)
+                                        (assoc schema-name including
+                                               :test #'string=)))))
+                       (push-to-end view-name (cdr schema-entry))))))
+        (cond (view-names
+               (list-all-columns catalog
+                                 :including including
+                                 :table-type :view))
+
+              ((eq :all materialize-views)
+               (list-all-columns catalog :table-type :view))))
+
+      (when create-indexes
+        (list-all-indexes catalog
                           :including including
                           :excluding excluding))
 
-        ;; return how many objects we're going to deal with in total
-        ;; for stats collection
-        (+ (count-tables catalog) (count-indexes catalog))))
+      (when foreign-keys
+        (list-all-fkeys catalog
+                        :including including
+                        :excluding excluding))
+
+      ;; return how many objects we're going to deal with in total
+      ;; for stats collection
+      (+ (count-tables catalog) (count-indexes catalog))))
 
   ;; be sure to return the catalog itself
   catalog)
 
+
+(defmethod cleanup ((mssql copy-mssql) (catalog catalog) &key materialize-views)
+  "When there is a PostgreSQL error at prepare-pgsql-database step, we might
+   need to clean-up any view created in the MS SQL connection for the
+   migration purpose."
+  (when materialize-views
+    (with-connection (*mssql-db* (source-db mssql))
+      (drop-ms-views materialize-views))))

From 2cafa8360c23769e96ac4cabe882546458c52134 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 19 Dec 2018 10:51:04 +0100
Subject: [PATCH 53/69] Document newly added MATERIALIZE VIEWS for new sources.

Now it's possible to use this clause with a PostgreSQL or an MS SQL database
source.

Fixes #817.
---
 docs/ref/mssql.rst | 38 +++++++++++++++++++++++++++++++++++---
 docs/ref/pgsql.rst | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/docs/ref/mssql.rst b/docs/ref/mssql.rst
index 47cc834..e686d1a 100644
--- a/docs/ref/mssql.rst
+++ b/docs/ref/mssql.rst
@@ -31,7 +31,7 @@ MS SQL Database Migration Options: WITH
 ---------------------------------------
 
 When loading from a `MS SQL` database, the same options as when loading a
-`MySQL` database are supported. Please refer to the MySQL section. The
+`MS SQL` database are supported. Please refer to the MS SQL section. The
 following options are added:
 
   - *create schemas*
@@ -53,7 +53,39 @@ CAST
 The cast clause allows to specify custom casting rules, either to overload
 the default casting rules or to amend them with special cases.
 
-Please refer to the MySQL CAST clause for details.
+Please refer to the MS SQL CAST clause for details.
+
+MS SQL Views Support
+--------------------
+
+MS SQL views support allows pgloader to migrate view as if they were base
+tables. This feature then allows for on-the-fly transformation from MS SQL
+to PostgreSQL, as the view definition is used rather than the base data.
+
+MATERIALIZE VIEWS
+^^^^^^^^^^^^^^^^^
+
+This clause allows you to implement custom data processing at the data
+source by providing a *view definition* against which pgloader will query
+the data. It's not possible to just allow for plain `SQL` because we want to
+know a lot about the exact data types of each column involved in the query
+output.
+
+This clause expect a comma separated list of view definitions, each one
+being either the name of an existing view in your database or the following
+expression::
+
+  *name* `AS` `$$` *sql query* `$$`
+
+The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
+the beginning of the data loading, and the resulting view will then be
+dropped at the end of the data loading.
+
+MATERIALIZE ALL VIEWS
+^^^^^^^^^^^^^^^^^^^^^
+
+Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
+returned by MS SQL rather than asking the user to specify the list.
 
 MS SQL Partial Migration
 ------------------------
@@ -96,7 +128,7 @@ schema 'public' in the target database with this command::
 ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-See the MySQL explanation for this clause above. It works the same in the
+See the MS SQL explanation for this clause above. It works the same in the
 context of migrating from MS SQL, only with the added option to specify the
 name of the schema where to find the definition of the target tables.
 
diff --git a/docs/ref/pgsql.rst b/docs/ref/pgsql.rst
index d233ffa..53e9dcb 100644
--- a/docs/ref/pgsql.rst
+++ b/docs/ref/pgsql.rst
@@ -283,6 +283,38 @@ The supported casting options are:
 
       column enumerate.foo using empty-string-to-null
 
+PostgreSQL Views Support
+------------------------
+
+PostgreSQL views support allows pgloader to migrate view as if they were
+base tables. This feature then allows for on-the-fly transformation of the
+source schema, as the view definition is used rather than the base data.
+
+MATERIALIZE VIEWS
+^^^^^^^^^^^^^^^^^
+
+This clause allows you to implement custom data processing at the data
+source by providing a *view definition* against which pgloader will query
+the data. It's not possible to just allow for plain `SQL` because we want to
+know a lot about the exact data types of each column involved in the query
+output.
+
+This clause expect a comma separated list of view definitions, each one
+being either the name of an existing view in your database or the following
+expression::
+
+  *name* `AS` `$$` *sql query* `$$`
+
+The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
+the beginning of the data loading, and the resulting view will then be
+dropped at the end of the data loading.
+
+MATERIALIZE ALL VIEWS
+^^^^^^^^^^^^^^^^^^^^^
+
+Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
+returned by PostgreSQL rather than asking the user to specify the list.
+
 PostgreSQL Partial Migration
 ----------------------------
 

From ec071af0ad0330d2f20d780e9fc889a8177b54bc Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 19 Dec 2018 15:27:44 +0100
Subject: [PATCH 54/69] Add a Feature Matrix to the documentation.

That helps having both an overview of what pgloader is capable of doing with
a database migration, and also documenting that some sources don't have the
full support for some features yet.
---
 docs/index.rst | 12 ++++++++++++
 docs/intro.rst | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/docs/index.rst b/docs/index.rst
index aac16f7..ca5e672 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,6 +6,18 @@
 Welcome to pgloader's documentation!
 ====================================
 
+pgloader loads data from various sources into PostgreSQL. It can transform
+the data it reads on the fly and submit raw SQL before and after the
+loading. It uses the `COPY` PostgreSQL protocol to stream the data into the
+server, and manages errors by filling a pair of *reject.dat* and
+*reject.log* files.
+
+Thanks to being able to load data directly from a database source, pgloader
+also supports from migrations from other productions to PostgreSQL. In this
+mode of operations, pgloader handles both the schema and data parts of the
+migration, in a single unmanned command, allowing to implement **Continuous
+Migration**.
+
 .. toctree::
    :maxdepth: 2
    :caption: Table Of Contents:
diff --git a/docs/intro.rst b/docs/intro.rst
index ed981b7..f73c64d 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -53,6 +53,47 @@ PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
 In order to be able to follow this great methodology, you need tooling to
 implement the third step in a fully automated way. That's pgloader.
 
+Features Matrix
+---------------
+
+Here's a comparison of the features supported depending on the source
+database engine. Most features that are not supported can be added to
+pgloader, it's just that nobody had the need to do so yet.
+
+==========================   =======  ======  ======  ===========  =========
+Feature                      SQLite   MySQL   MS SQL  PostgreSQL   Redshift 
+==========================   =======  ======  ======  ===========  =========
+One-command migration           ✓       ✓       ✓           ✓          ✓
+Continuous Migration            ✓       ✓       ✓           ✓          ✓
+Schema discovery                ✓       ✓       ✓           ✓          ✓
+Partial Migrations              ✓       ✓       ✓           ✓          ✓
+Schema only                     ✓       ✓       ✓           ✓          ✓
+Data only                       ✓       ✓       ✓           ✓          ✓
+Repeatable (DROP+CREATE)        ✓       ✓       ✓           ✓          ✓
+User defined casting rules      ✓       ✓       ✓           ✓          ✓
+Encoding Overrides              ✗       ✓       ✗            ✗          ✗
+On error stop                   ✓       ✓       ✓           ✓          ✓
+On error resume next            ✓       ✓       ✓           ✓          ✓
+Pre/Post SQL commands           ✓       ✓       ✓           ✓          ✓
+Post-Schema SQL commands        ✗       ✓       ✓           ✓          ✓
+Primary key support             ✓       ✓       ✓           ✓          ✓
+Foreign key support             ✓       ✓       ✓           ✓          ✗
+Incremental data loading        ✓       ✓       ✓           ✓          ✓
+Online ALTER schema             ✓       ✓       ✓           ✓          ✓
+Materialized views              ✗       ✓       ✓           ✓          ✓
+Distribute to Citus             ✗       ✓       ✓           ✓          ✓
+==========================   =======  ======  ======  ===========  =========
+
+For more details about what the features are about, see the specific
+reference pages for your database source.
+
+For some of the features, missing support only means that the feature is not
+needed for the other sources, such as the capability to override MySQL
+encoding metadata about a table or a column. Only MySQL in this list is left
+completely unable to guarantee text encoding. Or Redshift not having foreign
+keys.
+
+
 Commands
 --------
 

From eab1cbf326c6667ad356f9e2fa875dc582827241 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 19 Dec 2018 22:40:32 +0100
Subject: [PATCH 55/69] More docs improvements.

Explain the feature list of pgloader better for improving discoverability of
what can be achieved with our nice little tool.
---
 docs/index.rst                     | 215 +++++++++++++++++++++++++++++
 docs/intro.rst                     |  29 +---
 docs/{tutorial => }/quickstart.rst |  18 +--
 docs/tutorial/tutorial.rst         |   3 +-
 4 files changed, 231 insertions(+), 34 deletions(-)
 rename docs/{tutorial => }/quickstart.rst (96%)

diff --git a/docs/index.rst b/docs/index.rst
index ca5e672..1751e71 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -18,11 +18,226 @@ mode of operations, pgloader handles both the schema and data parts of the
 migration, in a single unmanned command, allowing to implement **Continuous
 Migration**.
 
+Features Overview
+=================
+
+pgloader has two modes of operation: loading from files, migrating
+databases. In both cases, pgloader uses the PostgreSQL COPY protocol which
+implements a **streaming** to send data in a very efficient way.
+
+Loading file content in PostgreSQL
+----------------------------------
+
+When loading from files, pgloader implements the following features:
+
+Many source formats supported
+    Support for a wide variety of file based formats are included in
+    pgloader: the CSV family, fixed columns formats, dBase files (``db3``),
+    and IBM IXF files.
+
+    The SQLite database engine is accounted for in the next section:
+    pgloader considers SQLite as a database source and implements schema
+    discovery from SQLite catalogs.
+
+On the fly data transformation
+    Often enough the data as read from a CSV file (or another format) needs
+    some tweaking and clean-up before being sent to PostgreSQL.
+
+    For instance in the `geolite
+    <https://github.com/dimitri/pgloader/blob/master/test/archive.load>`_
+    example we can see that integer values are being rewritten as IP address
+    ranges, allowing to target an ``ip4r`` column directly.
+
+Full Field projections
+    pgloader supports loading data into less fields than found on file, or
+    more, doing some computation on the data read before sending it to
+    PostgreSQL.
+    
+Reading files from an archive
+    Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the
+    archive is extracted in a temporary directly and expanded files are then
+    loaded.
+    
+HTTP(S) support
+    pgloader knows how to download a source file or a source archive using
+    HTTP directly. It might be better to use ``curl -O- http://... |
+    pgloader` and read the data from *standard input*, then allowing for
+    streaming of the data from its source down to PostgreSQL.
+    
+Target schema discovery
+    When loading in an existing table, pgloader takes into account the
+    existing columns and may automatically guess the CSV format for you.
+  
+On error stop / On error resume next
+    In some cases the source data is so damaged as to be impossible to
+    migrate in full, and when loading from a file then the default for
+    pgloader is to use ``on error resume next`` option, where the rows
+    rejected by PostgreSQL are saved away and the migration continues with
+    the other rows.
+
+    In other cases loading only a part of the input data might not be a
+    great idea, and in such cases it's possible to use the ``on error stop``
+    option.
+
+Pre/Post SQL commands
+    This feature allows pgloader commands to include SQL commands to run
+    before and after loading a file. It might be about creating a table
+    first, then loading the data into it, and then doing more processing
+    on-top of the data (implementing an ``ELT`` pipeline then), or creating
+    specific indexes as soon as the data has been made ready.
+    
+One-command migration to PostgreSQL
+-----------------------------------
+  
+When migrating a full database in a single command, pgloader implements the
+following features:
+
+One-command migration
+    The whole migration is started with a single command line and then runs
+    unattended. pgloader is meant to be integrated in a fully automated
+    tooling that you can repeat as many times as needed.
+
+Schema discovery
+    The source database is introspected using its SQL catalogs to get the
+    list of tables, attributes (with data types, default values, not null
+    constraints, etc), primary key constraints, foreign key constraints,
+    indexes, comments, etc. This feeds an internal database catalog of all
+    the objects to migrate from the source database to the target database.
+
+User defined casting rules
+    Some source database have ideas about their data types that might not be
+    compatible with PostgreSQL implementaion of equivalent data types.
+
+    For instance, SQLite since version 3 has a `Dynamic Type System
+    <https://www.sqlite.org/datatype3.html>`_ which of course isn't
+    compatible with the idea of a `Relation
+    <https://en.wikipedia.org/wiki/Relation_(database)>`_. Or MySQL accepts
+    datetime for year zero, which doesn't exists in our calendar, and
+    doesn't have a boolean data type.
+
+    When migrating from another source database technology to PostgreSQL,
+    data type casting choices must be made. pgloader implements solid
+    defaults that you can rely upon, and a facility for **user defined data
+    type casting rules** for specific cases. The idea is to allow users to
+    specify the how the migration should be done, in order for it to be
+    repeatable and included in a *Continuous Migration* process.
+
+On the fly data transformations
+    The user defined casting rules come with on the fly rewrite of the data.
+    For instance zero dates (it's not just the year, MySQL accepts
+    ``0000-00-00`` as a valid datetime) are rewritten to NULL values by
+    default.
+    
+Partial Migrations
+    It is possible to include only a partial list of the source database
+    tables in the migration, or to exclude some of the tables on the source
+    database.
+
+Schema only, Data only
+    This is the **ORM compatibility** feature of pgloader, where it is
+    possible to create the schema using your ORM and then have pgloader
+    migrate the data targeting this already created schema.
+
+    When doing this, it is possible for pgloader to *reindex* the target
+    schema: before loading the data from the source database into PostgreSQL
+    using COPY, pgloader DROPs the indexes and constraints, and reinstalls
+    the exact same definitions of them once the data has been loaded.
+
+    The reason for operating that way is of course data load performance.
+    
+Repeatable (DROP+CREATE)
+    By default, pgloader issues DROP statements in the target PostgreSQL
+    database before issing any CREATE statement, so that you can repeat the
+    migration as many times as necessary until migration specifications and
+    rules are bug free.
+    
+On error stop / On error resume next
+    The default behavior of pgloader when migrating from a database is ``on
+    error stop``. The idea is to let the user fix either the migration
+    specifications or the source data, and run the process again, until it
+    works.
+
+    In some cases the source data is so damaged as to be impossible to
+    migrate in full, and it might be necessary to then resort to the ``on
+    error resume next`` option, where the rows rejected by PostgreSQL are
+    saved away and the migration continues with the other rows.
+
+Pre/Post SQL commands, Post-Schema SQL commands
+    While pgloader takes care of rewriting the schema to PostgreSQL
+    expectations, and even provides *user-defined data type casting rules*
+    support to that end, sometimes it is necessary to add some specific SQL
+    commands around the migration. It's of course supported right from
+    pgloader itself, without having to script around it.
+    
+Online ALTER schema
+    At times migrating to PostgreSQL is also a good opportunity to review
+    and fix bad decisions that were made in the past, or simply that are not
+    relevant to PostgreSQL.
+
+    The pgloader command syntax allows to ALTER pgloader's internal
+    representation of the target catalogs so that the target schema can be
+    created a little different from the source one. Changes supported
+    include target a different *schema* or *table* name.
+    
+Materialized Views, or schema rewrite on-the-fly
+    In some cases the schema rewriting goes deeper than just renaming the
+    SQL objects to being a full normalization exercise. Because PostgreSQL
+    is great at running a normalized schema in production under most
+    workloads.
+
+    pgloader implements full flexibility in on-the-fly schema rewriting, by
+    making it possible to migrate from a view definition. The view attribute
+    list becomes a table definition in PostgreSQL, and the data is fetched
+    by querying the view on the source system.
+
+    A SQL view allows to implement both content filtering at the column
+    level using the SELECT projection clause, and at the row level using the
+    WHERE restriction clause. And backfilling from reference tables thanks
+    to JOINs.
+    
+Distribute to Citus
+    When migrating from PostgreSQL to Citus, a important part of the process
+    consists of adjusting the schema to the distribution key. Read
+    `Preparing Tables and Ingesting Data
+    <https://docs.citusdata.com/en/v8.0/use_cases/multi_tenant.html>`_ in
+    the Citus documentation for a complete example showing how to do that.
+
+    When using pgloader it's possible to specify the distribution keys and
+    reference tables and let pgloader take care of adjusting the table,
+    indexes, primary keys and foreign key definitions all by itself.
+
+Encoding Overrides
+    MySQL doesn't actually enforce the encoding of the data in the database
+    to match the encoding known in the metadata, defined at the database,
+    table, or attribute level. Sometimes, it's necessary to override the
+    metadata in order to make sense of the text, and pgloader makes it easy
+    to do so.
+
+
+Continuous Migration
+--------------------
+
+pgloader is meant to migrate a whole database in a single command line and
+without any manual intervention. The goal is to be able to setup a
+*Continuous Integration* environment as described in the `Project
+Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
+PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
+
+  1. Setup your target PostgreSQL Architecture
+  2. Fork a Continuous Integration environment that uses PostgreSQL
+  3. Migrate the data over and over again every night, from production
+  4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
+  5. Migrate without suprise and enjoy! 
+
+In order to be able to follow this great methodology, you need tooling to
+implement the third step in a fully automated way. That's pgloader.
+
 .. toctree::
    :maxdepth: 2
    :caption: Table Of Contents:
 
    intro
+   quickstart
    tutorial/tutorial
    pgloader
    ref/csv
diff --git a/docs/intro.rst b/docs/intro.rst
index f73c64d..f733b72 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -35,30 +35,14 @@ expected input properties must be given to pgloader. In the case of a
 database, pgloader connects to the live service and knows how to fetch the
 metadata it needs directly from it.
 
-Continuous Migration
---------------------
-
-pgloader is meant to migrate a whole database in a single command line and
-without any manual intervention. The goal is to be able to setup a
-*Continuous Integration* environment as described in the `Project
-Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
-PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
-
-  1. Setup your target PostgreSQL Architecture
-  2. Fork a Continuous Integration environment that uses PostgreSQL
-  3. Migrate the data over and over again every night, from production
-  4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
-  5. Migrate without suprise and enjoy! 
-
-In order to be able to follow this great methodology, you need tooling to
-implement the third step in a fully automated way. That's pgloader.
-
 Features Matrix
 ---------------
 
 Here's a comparison of the features supported depending on the source
-database engine. Most features that are not supported can be added to
-pgloader, it's just that nobody had the need to do so yet.
+database engine. Some features that are not supported can be added to
+pgloader, it's just that nobody had the need to do so yet. Those features
+are marked with ✗. Empty cells are used when the feature doesn't make sense
+for the selected source database.
 
 ==========================   =======  ======  ======  ===========  =========
 Feature                      SQLite   MySQL   MS SQL  PostgreSQL   Redshift 
@@ -71,14 +55,13 @@ Schema only                     ✓       ✓       ✓           ✓          
 Data only                       ✓       ✓       ✓           ✓          ✓
 Repeatable (DROP+CREATE)        ✓       ✓       ✓           ✓          ✓
 User defined casting rules      ✓       ✓       ✓           ✓          ✓
-Encoding Overrides              ✗       ✓       ✗            ✗          ✗
+Encoding Overrides                      ✓
 On error stop                   ✓       ✓       ✓           ✓          ✓
 On error resume next            ✓       ✓       ✓           ✓          ✓
 Pre/Post SQL commands           ✓       ✓       ✓           ✓          ✓
 Post-Schema SQL commands        ✗       ✓       ✓           ✓          ✓
 Primary key support             ✓       ✓       ✓           ✓          ✓
-Foreign key support             ✓       ✓       ✓           ✓          ✗
-Incremental data loading        ✓       ✓       ✓           ✓          ✓
+Foreign key support             ✓       ✓       ✓           ✓
 Online ALTER schema             ✓       ✓       ✓           ✓          ✓
 Materialized views              ✗       ✓       ✓           ✓          ✓
 Distribute to Citus             ✗       ✓       ✓           ✓          ✓
diff --git a/docs/tutorial/quickstart.rst b/docs/quickstart.rst
similarity index 96%
rename from docs/tutorial/quickstart.rst
rename to docs/quickstart.rst
index abd303c..912a095 100644
--- a/docs/tutorial/quickstart.rst
+++ b/docs/quickstart.rst
@@ -1,10 +1,10 @@
-PgLoader Quick Start
---------------------
+Pgloader Quick Start
+====================
 
 In simple cases, pgloader is very easy to use.
 
 CSV
-^^^
+---
 
 Load data from a CSV file into a pre-existing table in your database::
 
@@ -26,7 +26,7 @@ For documentation about the available syntaxes for the `--field` and
 Note also that the PostgreSQL URI includes the target *tablename*.
 
 Reading from STDIN
-^^^^^^^^^^^^^^^^^^
+------------------
 
 File based pgloader sources can be loaded from the standard input, as in the
 following example::
@@ -46,7 +46,7 @@ pgloader with this technique, using the Unix pipe::
     gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo
 
 Loading from CSV available through HTTP
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+---------------------------------------
 
 The same command as just above can also be run if the CSV file happens to be
 found on a remote HTTP location::
@@ -84,7 +84,7 @@ Also notice that the same command will work against an archived version of
 the same data.
 
 Streaming CSV data from an HTTP compressed file
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------------------------------------
 
 Finally, it's important to note that pgloader first fetches the content from
 the HTTP URL it to a local file, then expand the archive when it's
@@ -110,7 +110,7 @@ and the commands and pgloader will take care of streaming the data down to
 PostgreSQL.
 
 Migrating from SQLite
-^^^^^^^^^^^^^^^^^^^^^
+---------------------
 
 The following command will open the SQLite database, discover its tables
 definitions including indexes and foreign keys, migrate those definitions
@@ -121,7 +121,7 @@ and then migrate the data over::
     pgloader ./test/sqlite/sqlite.db postgresql:///newdb
 
 Migrating from MySQL
-^^^^^^^^^^^^^^^^^^^^
+--------------------
 
 Just create a database where to host the MySQL data and definitions and have
 pgloader do the migration for you in a single command line::
@@ -130,7 +130,7 @@ pgloader do the migration for you in a single command line::
     pgloader mysql://user@localhost/sakila postgresql:///pagila
 
 Fetching an archived DBF file from a HTTP remote location
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+---------------------------------------------------------
 
 It's possible for pgloader to download a file from HTTP, unarchive it, and
 only then open it to discover the schema then load the data::
diff --git a/docs/tutorial/tutorial.rst b/docs/tutorial/tutorial.rst
index d542d12..8c6a4b2 100644
--- a/docs/tutorial/tutorial.rst
+++ b/docs/tutorial/tutorial.rst
@@ -1,7 +1,6 @@
-PgLoader Tutorial
+Pgloader Tutorial
 =================
 
-.. include:: quickstart.rst
 .. include:: csv.rst
 .. include:: fixed.rst
 .. include:: geolite.rst

From 3d089967778bc18156d8af0f8f6684734e6eadce Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 20 Dec 2018 10:05:54 +0100
Subject: [PATCH 56/69] Review the new documentation material.

---
 docs/index.rst | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index 1751e71..2a79f34 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -61,7 +61,7 @@ Reading files from an archive
 HTTP(S) support
     pgloader knows how to download a source file or a source archive using
     HTTP directly. It might be better to use ``curl -O- http://... |
-    pgloader` and read the data from *standard input*, then allowing for
+    pgloader`` and read the data from *standard input*, then allowing for
     streaming of the data from its source down to PostgreSQL.
     
 Target schema discovery
@@ -83,7 +83,7 @@ Pre/Post SQL commands
     This feature allows pgloader commands to include SQL commands to run
     before and after loading a file. It might be about creating a table
     first, then loading the data into it, and then doing more processing
-    on-top of the data (implementing an ``ELT`` pipeline then), or creating
+    on-top of the data (implementing an *ELT* pipeline then), or creating
     specific indexes as soon as the data has been made ready.
     
 One-command migration to PostgreSQL
@@ -150,12 +150,16 @@ Repeatable (DROP+CREATE)
     database before issing any CREATE statement, so that you can repeat the
     migration as many times as necessary until migration specifications and
     rules are bug free.
-    
-On error stop / On error resume next
-    The default behavior of pgloader when migrating from a database is ``on
-    error stop``. The idea is to let the user fix either the migration
-    specifications or the source data, and run the process again, until it
-    works.
+
+    The schedule the data migration to run every night (or even more often!)
+    for the whole duration of the code migration project. See the
+    `Continuous Migration <https://pgloader.io/blog/continuous-migration/>`_
+    methodology for more details about the approach.
+
+On error stop / On error resume next The default behavior of pgloader when
+    migrating from a database is ``on error stop``. The idea is to let the
+    user fix either the migration specifications or the source data, and run
+    the process again, until it works.
 
     In some cases the source data is so damaged as to be impossible to
     migrate in full, and it might be necessary to then resort to the ``on

From 65d323e4a3fef383149b9a881aeb4b1dd8d7f639 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Sun, 23 Dec 2018 20:51:36 +0100
Subject: [PATCH 57/69] Refrain from matching typemod expression to NIL
 typemod.

Fixes #879.
---
 src/sources/common/casting-rules.lisp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sources/common/casting-rules.lisp b/src/sources/common/casting-rules.lisp
index 3856c9c..f893db4 100644
--- a/src/sources/common/casting-rules.lisp
+++ b/src/sources/common/casting-rules.lisp
@@ -58,7 +58,8 @@
              ;; otherwide, we do the full dance
              (and
               (or (and t-s-p (string= type rule-source-type)))
-              (or (null tm-s-p) (typemod-expr-matches-p typemod-expr typemod))
+              (or (null tm-s-p) (when typemod
+                                  (typemod-expr-matches-p typemod-expr typemod)))
               (or (null d-s-p)  (string= default rule-source-default))
               (or (null u-s-p)  (eq unsigned rule-unsigned))
               (or (null n-s-p)  (eq not-null rule-source-not-null))

From b8e8cf7d183d6beb81b84fc70c34834ac279a047 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Fri, 28 Dec 2018 10:53:01 +0100
Subject: [PATCH 58/69] Fix bugs in the recent extended support for
 materialized views.

Materialized views without an explicit schema name are supported, but then
would raise an error when trying to use destructuring-bind on a string
rather than the (cons schema-name table-name). This patch fixes that.
---
 src/parsers/command-materialize-views.lisp | 5 ++++-
 src/sources/mssql/mssql-schema.lisp        | 4 ++--
 src/sources/pgsql/pgsql-schema.lisp        | 4 ++--
 test/pgsql-source.load                     | 2 +-
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/parsers/command-materialize-views.lisp b/src/parsers/command-materialize-views.lisp
index e963858..9c75a92 100644
--- a/src/parsers/command-materialize-views.lisp
+++ b/src/parsers/command-materialize-views.lisp
@@ -7,7 +7,10 @@
 (in-package #:pgloader.parser)
 
 (defrule view-name (or qualified-table-name maybe-quoted-namestring)
-  (:identity t))
+  (:lambda (vn)
+    (etypecase vn
+      (cons   vn)
+      (string (cons nil vn)))))
 
 (defrule view-sql (and kw-as dollar-quoted)
   (:destructure (as sql) (declare (ignore as)) sql))
diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp
index 9f83c86..7286ff6 100644
--- a/src/sources/mssql/mssql-schema.lisp
+++ b/src/sources/mssql/mssql-schema.lisp
@@ -229,7 +229,7 @@
         (loop :for (name . def) :in views
            :for sql := (destructuring-bind (schema . v-name) name
                          (format nil
-                                 "CREATE VIEW ~s.~s AS ~a"
+                                 "CREATE VIEW ~@[~s~].~s AS ~a"
                                  schema v-name def))
            :do (progn
                  (log-message :info "MS SQL: ~a" sql)
@@ -249,7 +249,7 @@
                    :do (destructuring-bind (name . def) view-definition
                          (declare (ignore def))
                          (format sql
-                                 "~@[, ~]~s.~s"
+                                 "~@[, ~]~@[~s.~]~s"
                                  (not (zerop i)) (car name) (cdr name)))))))
          (log-message :info "PostgreSQL Source: ~a" sql)
          (mssql-query sql))))))
diff --git a/src/sources/pgsql/pgsql-schema.lisp b/src/sources/pgsql/pgsql-schema.lisp
index 2654e45..c96178a 100644
--- a/src/sources/pgsql/pgsql-schema.lisp
+++ b/src/sources/pgsql/pgsql-schema.lisp
@@ -10,7 +10,7 @@
         (loop :for (name . def) :in views
            :for sql := (destructuring-bind (schema . v-name) name
                          (format nil
-                                 "CREATE VIEW ~s.~s AS ~a"
+                                 "CREATE VIEW ~@[~s.~]~s AS ~a"
                                  schema v-name def))
            :do (progn
                  (log-message :info "PostgreSQL Source: ~a" sql)
@@ -44,7 +44,7 @@
                    :do (destructuring-bind (name . def) view-definition
                          (declare (ignore def))
                          (format sql
-                                 "~@[, ~]~s.~s"
+                                 "~@[, ~]~@[~s.~]~s"
                                  (not (zerop i)) (car name) (cdr name)))))))
          (log-message :info "PostgreSQL Source: ~a" sql)
          (pgsql-execute sql))))))
diff --git a/test/pgsql-source.load b/test/pgsql-source.load
index 6e767df..69bb292 100644
--- a/test/pgsql-source.load
+++ b/test/pgsql-source.load
@@ -5,7 +5,7 @@ load database
   -- including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
   including only table names matching ~/geolocations/ in schema 'public'
 
-  materialize views public.some_usps
+  materialize views some_usps
   as $$
     select usps, geoid, aland, awater, aland_sqmi, awater_sqmi, location
       from districts

From 13bdb2d1185cf89435c8212a74bb03854177e822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Wirtel?= <stephane@wirtel.be>
Date: Mon, 7 Jan 2019 20:24:59 +0100
Subject: [PATCH 59/69] Fix section in rest (#883)

---
 docs/tutorial/mysql.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/tutorial/mysql.rst b/docs/tutorial/mysql.rst
index 6443b36..2518b9f 100644
--- a/docs/tutorial/mysql.rst
+++ b/docs/tutorial/mysql.rst
@@ -127,7 +127,8 @@ It's possible to use the *MATERIALIZE VIEWS* clause and give both the name
 and the SQL (in MySQL dialect) definition of view, then pgloader creates the
 view before loading the data, then drops it again at the end.
 
-## Loading the data
+Loading the data
+^^^^^^^^^^^^^^^^
 
 Let's start the `pgloader` command with our `sakila.load` command file::
 

From 9ce4088b484ee84d9658ceb2e1bb05a260d2544f Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 7 Jan 2019 20:42:12 +0100
Subject: [PATCH 60/69] Improvements to the `make save` facility.

---
 Makefile      |  3 +++
 src/save.lisp | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/Makefile b/Makefile
index 83c523a..38d8a38 100644
--- a/Makefile
+++ b/Makefile
@@ -167,6 +167,9 @@ test: $(PGLOADER)
 save: ./src/save.lisp $(LISP_SRC)
 	sbcl --no-userinit --load ./src/save.lisp
 
+check-saved: save
+	$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
+
 clean-bundle:
 	rm -rf $(BUNDLEDIR)
 	rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
diff --git a/src/save.lisp b/src/save.lisp
index 43b0de2..94226cb 100644
--- a/src/save.lisp
+++ b/src/save.lisp
@@ -12,6 +12,16 @@
    when unset."
   (or (sb-ext:posix-getenv name) default))
 
+;; So that we can #+pgloader-image some code away, see main.lisp
+(push :pgloader-image *features*)
+
+;;;
+;;; We need to support *print-circle* for the debug traces of the catalogs,
+;;; and while at it let's enforce *print-pretty* too.
+;;;
+(setf *print-circle* t *print-pretty* t)
+
+
 (require :asdf)                         ; should work in SBCL and CCL
 
 (defvar *quicklisp.lisp* "http://beta.quicklisp.org/quicklisp.lisp")

From e4a4edb276ebde69c41943252a112972b6f9db9f Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 7 Jan 2019 20:42:36 +0100
Subject: [PATCH 61/69] Make interactive debugging easier.

It's fair game to handle errors and issue logs instead when using the
pgloader binary image, as it distracts users a lot. That said, as a
developer the interactive debugger is very useful.

In passing install some experimental thread killing behavior in case of
errors and using on-error-stop setting (default for database migrations).
---
 src/load/copy-data.lisp | 10 +++++++---
 src/utils/monitor.lisp  |  6 ++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/load/copy-data.lisp b/src/load/copy-data.lisp
index 67a68d6..676b814 100644
--- a/src/load/copy-data.lisp
+++ b/src/load/copy-data.lisp
@@ -74,13 +74,17 @@
              (incf task-count)))
 
       (lp:task-handler-bind
-          ((copy-init-error
+          (#+pgloader-image
+           (copy-init-error
             #'(lambda (condition)
-                ;; everything has been handled already
+                ;; stop the other tasks and then transfer the control
+                (log-message :log "COPY INIT ERROR")
+                (lp:kill-tasks :default)
                 (lp:invoke-transfer-error condition)))
            (on-error-stop
             #'(lambda (condition)
-                ;; everything has been handled already
+                (log-message :log "ON ERROR STOP")
+                (lp:kill-tasks :default)
                 (lp:invoke-transfer-error condition)))
            #+pgloader-image
            (error
diff --git a/src/utils/monitor.lisp b/src/utils/monitor.lisp
index 569e4e2..c56652d 100644
--- a/src/utils/monitor.lisp
+++ b/src/utils/monitor.lisp
@@ -147,7 +147,8 @@
                       (*summary-pathname*    . ,*summary-pathname*)
                       (*sections*            . ',*sections*)))
          (kernel      (lp:make-kernel 1 :bindings bindings))
-         (lparallel:*kernel* kernel))
+         (lparallel:*kernel* kernel)
+         (lparallel:*task-category* :monitor))
 
     ;; make our kernel and channel visible from the outside
     (setf *monitoring-kernel* kernel
@@ -155,7 +156,8 @@
           *monitoring-queue*   (lq:make-queue))
 
     (lp:task-handler-bind
-        ((error
+        (#+pgloader-image
+         (error
           #'(lambda (c)
               ;; we can't log-message a monitor thread error
               (lp:invoke-transfer-error

From 204a0119cd910a3213aaf97598a8048185196089 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 7 Jan 2019 23:51:58 +0100
Subject: [PATCH 62/69] Add another debugging guard #+pgloader-image.

---
 src/load/migrate-database.lisp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/load/migrate-database.lisp b/src/load/migrate-database.lisp
index a9f1ba4..f62c5f3 100644
--- a/src/load/migrate-database.lisp
+++ b/src/load/migrate-database.lisp
@@ -341,6 +341,7 @@
                            (log-message :error "MSSQL ERROR: ~a" e)
                            (log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
                            (return-from copy-database))
+                         #+pgloader-image
                          (condition (e)
                            (log-message :error
                                         "~a: ~a"

From a4a52db5940a7997ece5be380d49c560a5ed633e Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 7 Jan 2019 23:52:29 +0100
Subject: [PATCH 63/69] Improve SQLite support for autoincrement and sequences.

It turns out that SQLite only creates an entry in its sqlite_sequence
catalogs when some data make it to a table using a sequence, not at create
table time. It means that pgloader must do some more catalog querying to
figure out if a column is "autoincrement", and apparently the only way to
get to the information is to parse the SQL statement given in the
sqlite_master table.

Fixes #882.
---
 src/sources/sqlite/sql/get-create-table.sql |  1 +
 src/sources/sqlite/sqlite-schema.lisp       | 56 +++++++++++++++++----
 2 files changed, 48 insertions(+), 9 deletions(-)
 create mode 100644 src/sources/sqlite/sql/get-create-table.sql

diff --git a/src/sources/sqlite/sql/get-create-table.sql b/src/sources/sqlite/sql/get-create-table.sql
new file mode 100644
index 0000000..04265b9
--- /dev/null
+++ b/src/sources/sqlite/sql/get-create-table.sql
@@ -0,0 +1 @@
+select sql from sqlite_master where name = '~a'
diff --git a/src/sources/sqlite/sqlite-schema.lisp b/src/sources/sqlite/sqlite-schema.lisp
index 17f05ba..c3684a3 100644
--- a/src/sources/sqlite/sqlite-schema.lisp
+++ b/src/sources/sqlite/sqlite-schema.lisp
@@ -67,6 +67,47 @@
     (loop for (name) in (sqlite:execute-to-list db sql)
        collect name)))
 
+(defun find-sequence (db table-name column-name)
+  "Find if table-name.column-name is attached to a sequence in
+   sqlite_sequence catalog."
+  (let* ((sql (format nil (sql "/sqlite/find-sequence.sql") table-name))
+         (seq (sqlite:execute-single db sql)))
+    (when (and seq (not (zerop seq)))
+      ;; magic marker for `apply-casting-rules'
+      (log-message :notice "SQLite column ~a.~a uses a sequence"
+                   table-name column-name)
+      seq)))
+
+(defun find-auto-increment-in-create-sql (db table-name column-name)
+  "The sqlite_sequence catalog is only created when some content has been
+   added to the table. So we might fail to FIND-SEQUENCE, and still need to
+   consider the column has an autoincrement. Parse the SQL definition of the
+   table to find out."
+  (let* ((sql (format nil (sql "/sqlite/get-create-table.sql") table-name))
+         (create-table (sqlite:execute-single db sql))
+         (open-paren   (+ 1 (position #\( create-table)))
+         (close-paren  (position #\) create-table :from-end t))
+         (coldefs
+          (mapcar (lambda (def) (string-trim (list #\Space) def))
+                  (split-sequence:split-sequence #\,
+                                                 create-table
+                                                 :start open-paren
+                                                 :end close-paren))))
+    (loop :for coldef :in coldefs
+       :do (let* ((words (mapcar (lambda (w) (string-trim '(#\" #\') w))
+                                 (split-sequence:split-sequence #\Space coldef)))
+                  (colname (first words))
+                  (props   (rest words)))
+             (when (and (string= colname column-name)
+                        (member "autoincrement" props :test #'string-equal))
+               ;; we know the target column has no sequence because we
+               ;; looked into that first by calling find-sequence, and we
+               ;; only call find-auto-increment-in-create-sql when
+               ;; find-sequence failed to find anything.
+               (log-message :notice "SQLite column ~a.~a is autoincrement, but has no sequence"
+                            table-name column-name)
+               (return t))))))
+
 (defun list-columns (table &key db-has-sequences (db *sqlite-db*) )
   "Return the list of columns found in TABLE-NAME."
   (let* ((table-name (table-source-name table))
@@ -85,17 +126,14 @@
                                       pk-id)))
              (when (and db-has-sequences
                         (not (zerop pk-id))
-                        (string-equal (coldef-ctype field) "integer"))
+                        (string-equal (coldef-ctype field) "integer")
+                        (or (find-sequence db table-name name)
+                            (find-auto-increment-in-create-sql db
+                                                               table-name
+                                                               name)))
                ;; then it might be an auto_increment, which we know by
                ;; looking at the sqlite_sequence catalog
-               (let* ((sql
-                       (format nil (sql "/sqlite/find-sequence.sql") table-name))
-                      (seq (sqlite:execute-single db sql)))
-                 (when (and seq (not (zerop seq)))
-                   ;; magic marker for `apply-casting-rules'
-                   (log-message :notice "SQLite column ~a.~a uses a sequence"
-                                table-name name)
-                   (setf (coldef-extra field) :auto-increment))))
+               (setf (coldef-extra field) :auto-increment))
              (add-field table field)))))
 
 (defun list-all-columns (schema

From 44514deaa7bbd5a83830542011638f16ff08f4d7 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 8 Jan 2019 22:09:13 +0100
Subject: [PATCH 64/69] Improve ALTER TABLE documentation.

---
 docs/ref/mssql.rst | 27 ++++++++++++++++++++++++---
 docs/ref/mysql.rst |  5 ++---
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/docs/ref/mssql.rst b/docs/ref/mssql.rst
index e686d1a..7797abf 100644
--- a/docs/ref/mssql.rst
+++ b/docs/ref/mssql.rst
@@ -128,9 +128,30 @@ schema 'public' in the target database with this command::
 ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-See the MS SQL explanation for this clause above. It works the same in the
-context of migrating from MS SQL, only with the added option to specify the
-name of the schema where to find the definition of the target tables.
+Introduce a comma separated list of table names or *regular expressions*
+that you want to target in the pgloader *ALTER TABLE* command. Available
+actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
+
+    ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
+      IN SCHEMA 'dbo'
+     SET SCHEMA 'mv'
+   
+    ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
+    
+    ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
+
+You can use as many such rules as you need. The list of tables to be
+migrated is searched in pgloader memory against the *ALTER TABLE* matching
+rules, and for each command pgloader stops at the first matching criteria
+(regexp or string).
+
+No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
+the level of the pgloader in-memory representation of your source database
+schema. In case of a name change, the mapping is kept and reused in the
+*foreign key* and *index* support.
+
+The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
+command that pgloader will run when it has to create a table.
 
 The matching is done in pgloader itself, with a Common Lisp regular
 expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,
diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst
index a55f5d7..5b83759 100644
--- a/docs/ref/mysql.rst
+++ b/docs/ref/mysql.rst
@@ -499,9 +499,8 @@ ALTER TABLE NAMES MATCHING
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Introduce a comma separated list of table names or *regular expressions*
-that you want to target in the pgloader *ALTER TABLE* command. The only two
-available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted
-string as parameter::
+that you want to target in the pgloader *ALTER TABLE* command. Available
+actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
 
     ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
      SET SCHEMA 'mv'

From f28f8e577d55070ef8041f612b6b6b212b6cfa15 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 8 Jan 2019 22:44:07 +0100
Subject: [PATCH 65/69] Review log-level for stored procedures.

Some MySQL schema level features (on update current_timestamp) are migrated
to stored procedures and triggers. We would log the CREATE PROCEDURE
statements as LOG level entries instead of SQL level entries, most likely a
stray devel/debug choice.
---
 src/pgsql/pgsql-create-schema.lisp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pgsql/pgsql-create-schema.lisp b/src/pgsql/pgsql-create-schema.lisp
index b06c31d..44c511a 100644
--- a/src/pgsql/pgsql-create-schema.lisp
+++ b/src/pgsql/pgsql-create-schema.lisp
@@ -153,7 +153,7 @@
                        :collect (format-create-sql (trigger-procedure trigger))
                        :collect (format-create-sql trigger)))))
     (pgsql-execute-with-timing section label sql-list
-                               :log-level :log
+                               :log-level :sql
                                :client-min-messages client-min-messages)))
 
 

From 2147a1d07b55e254a491cf531a4263e53e033b97 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 8 Jan 2019 22:47:45 +0100
Subject: [PATCH 66/69] Implement ALTER TABLE ... SET TABLESPACE ... as a
 pgloader clause.

This allows creating tables in any target tablespace rather than the default
one, and is supported for the various sources having support for the ALTER
TABLE clause already.
---
 docs/ref/mssql.rst                   |  5 +++++
 docs/ref/mysql.rst                   |  5 +++++
 docs/ref/pgsql.rst                   | 15 ++++++++++-----
 src/package.lisp                     |  1 +
 src/parsers/command-alter-table.lisp |  7 ++++++-
 src/parsers/command-keywords.lisp    |  1 +
 src/pgsql/pgsql-ddl.lisp             |  3 +++
 src/utils/alter-table.lisp           |  4 ++++
 src/utils/catalog.lisp               |  3 ++-
 test/mysql/my.load                   |  1 +
 10 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/docs/ref/mssql.rst b/docs/ref/mssql.rst
index 7797abf..ed024c2 100644
--- a/docs/ref/mssql.rst
+++ b/docs/ref/mssql.rst
@@ -139,6 +139,8 @@ actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
     ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
     
     ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
+    
+    ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc'
 
 You can use as many such rules as you need. The list of tables to be
 migrated is searched in pgloader memory against the *ALTER TABLE* matching
@@ -153,6 +155,9 @@ schema. In case of a name change, the mapping is kept and reused in the
 The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
 command that pgloader will run when it has to create a table.
 
+The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
+`CREATE TABLE` command that pgloader will run when it has to create a table.
+
 The matching is done in pgloader itself, with a Common Lisp regular
 expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,
 nor on the lack of support for regular expressions in the engine.
diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst
index 5b83759..e54b42b 100644
--- a/docs/ref/mysql.rst
+++ b/docs/ref/mysql.rst
@@ -509,6 +509,8 @@ actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
     
     ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
 
+    ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
+
 You can use as many such rules as you need. The list of tables to be
 migrated is searched in pgloader memory against the *ALTER TABLE* matching
 rules, and for each command pgloader stops at the first matching criteria
@@ -522,6 +524,9 @@ schema. In case of a name change, the mapping is kept and reused in the
 The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
 command that pgloader will run when it has to create a table.
 
+The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
+`CREATE TABLE` command that pgloader will run when it has to create a table.
+
 MySQL Migration: limitations
 ----------------------------
 
diff --git a/docs/ref/pgsql.rst b/docs/ref/pgsql.rst
index 53e9dcb..06dd406 100644
--- a/docs/ref/pgsql.rst
+++ b/docs/ref/pgsql.rst
@@ -346,16 +346,18 @@ ALTER TABLE NAMES MATCHING
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Introduce a comma separated list of table names or *regular expressions*
-that you want to target in the pgloader *ALTER TABLE* command. The only two
-available actions are *SET SCHEMA* and *RENAME TO*, both take a quoted
-string as parameter::
+that you want to target in the pgloader *ALTER TABLE* command. Available
+actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
 
     ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
+      IN SCHEMA 'public'
      SET SCHEMA 'mv'
    
-    ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
+    ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'public' RENAME TO 'films'
     
-    ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
+    ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET (fillfactor='40')
+    
+    ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET TABLESPACE 'pg_default'
 
 You can use as many such rules as you need. The list of tables to be
 migrated is searched in pgloader memory against the *ALTER TABLE* matching
@@ -370,6 +372,9 @@ schema. In case of a name change, the mapping is kept and reused in the
 The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
 command that pgloader will run when it has to create a table.
 
+The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
+`CREATE TABLE` command that pgloader will run when it has to create a table.
+
 PostgreSQL Migration: limitations
 ---------------------------------
 
diff --git a/src/package.lisp b/src/package.lisp
index c6eda82..0de7e96 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -94,6 +94,7 @@
            #:table-oid
            #:table-comment
            #:table-storage-parameter-list
+           #:table-tablespace
            #:table-field-list
            #:table-column-list
            #:table-index-list
diff --git a/src/parsers/command-alter-table.lisp b/src/parsers/command-alter-table.lisp
index 3e11c7a..b10479b 100644
--- a/src/parsers/command-alter-table.lisp
+++ b/src/parsers/command-alter-table.lisp
@@ -47,9 +47,14 @@
     (bind (((_ _ parameters _) stmt))
       (list #'pgloader.catalog::alter-table-set-storage-parameters parameters))))
 
+(defrule set-tablespace (and kw-set kw-tablespace quoted-namestring)
+  (:lambda (stmt)
+    (list #'pgloader.catalog::alter-table-set-tablespace (third stmt))))
+
 (defrule alter-table-action (or rename-to
                                 set-schema
-                                set-storage-parameters))
+                                set-storage-parameters
+                                set-tablespace))
 
 (defrule alter-table-command (and alter-table-names-matching
                                   (? in-schema)
diff --git a/src/parsers/command-keywords.lisp b/src/parsers/command-keywords.lisp
index 9a4dcea..84d8bb3 100644
--- a/src/parsers/command-keywords.lisp
+++ b/src/parsers/command-keywords.lisp
@@ -26,6 +26,7 @@
   (def-keyword-rule "with")
   (def-keyword-rule "when")
   (def-keyword-rule "set")
+  (def-keyword-rule "tablespace")
   (def-keyword-rule "database")
   (def-keyword-rule "messages")
   (def-keyword-rule "matches")
diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp
index 662bc95..03e8962 100644
--- a/src/pgsql/pgsql-ddl.lisp
+++ b/src/pgsql/pgsql-ddl.lisp
@@ -92,6 +92,9 @@
                       (alexandria:alist-plist
                        (table-storage-parameter-list table))))
 
+            (when (table-tablespace table)
+              (format s "~%TABLESPACE ~a" (table-tablespace table)))
+
             (format s ";~%"))))
 
 (defmethod format-drop-sql ((table table) &key (stream nil) cascade (if-exists t))
diff --git a/src/utils/alter-table.lisp b/src/utils/alter-table.lisp
index 56536e4..c5da02b 100644
--- a/src/utils/alter-table.lisp
+++ b/src/utils/alter-table.lisp
@@ -75,6 +75,10 @@
   "Alter the storage parameters of TABLE."
   (setf (table-storage-parameter-list table) parameters))
 
+(defun alter-table-set-tablespace (table tablespace)
+  "Alter the tablespace slot of TABLE"
+  (setf (table-tablespace table) tablespace))
+
 
 ;;;
 ;;; Apply the match rules as given by the parser to a table name.
diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp
index 6c3ca3a..8489394 100644
--- a/src/utils/catalog.lisp
+++ b/src/utils/catalog.lisp
@@ -47,7 +47,8 @@
 (defstruct schema source-name name catalog in-search-path
            table-list view-list extension-list sqltype-list)
 
-(defstruct table source-name name schema oid comment storage-parameter-list
+(defstruct table source-name name schema oid comment
+           storage-parameter-list tablespace
            ;; field is for SOURCE
            ;; column is for TARGET
            ;; citus is an extra slot for citus support
diff --git a/test/mysql/my.load b/test/mysql/my.load
index 3ac8806..158ed95 100644
--- a/test/mysql/my.load
+++ b/test/mysql/my.load
@@ -9,6 +9,7 @@ load database
       quote identifiers
 
  ALTER SCHEMA 'pgloader' RENAME TO 'mysql'
+ ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
 
  CAST column utilisateurs__Yvelines2013-06-28.sexe
           to text drop not null using empty-string-to-null,

From 1306b4c953057de56ea79c6089ed8bacc8abe08f Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Wed, 9 Jan 2019 18:57:33 +0100
Subject: [PATCH 67/69] Desultory improvements.

Killing tasks in the error handling must be done carefully, and given this
testing session it seems better to refrain from doing it when erroring out
at COPY init time (missing column is an example of that). The approach
around that is still very much ad-hoc rather than systematic.

In passing improve the `make save` option to producing a binary image: have
the make recipe respect the CL variable. The command line options
differences were already accounted for.
---
 Makefile                | 2 +-
 src/load/copy-data.lisp | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 38d8a38..f077847 100644
--- a/Makefile
+++ b/Makefile
@@ -165,7 +165,7 @@ test: $(PGLOADER)
 	$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
 
 save: ./src/save.lisp $(LISP_SRC)
-	sbcl --no-userinit --load ./src/save.lisp
+	$(CL) $(CL_OPTS) --load ./src/save.lisp
 
 check-saved: save
 	$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
diff --git a/src/load/copy-data.lisp b/src/load/copy-data.lisp
index 676b814..5b19c11 100644
--- a/src/load/copy-data.lisp
+++ b/src/load/copy-data.lisp
@@ -79,7 +79,6 @@
             #'(lambda (condition)
                 ;; stop the other tasks and then transfer the control
                 (log-message :log "COPY INIT ERROR")
-                (lp:kill-tasks :default)
                 (lp:invoke-transfer-error condition)))
            (on-error-stop
             #'(lambda (condition)

From dae5dec03c3618050ebbfaef788eedb00ea3269e Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Tue, 15 Jan 2019 22:39:08 +0100
Subject: [PATCH 68/69] Allow fields/columns projections when parsing header.

When using a CSV header, we might find fields in a different order than the
target table columns, and maybe not all of the fields are going to be read.
Take account of the header we read rather than expecting the header to look
like the target table definition.

Fix #888.
---
 src/load/load-file.lisp              |  2 ++
 src/sources/common/api.lisp          |  2 +-
 src/sources/common/md-methods.lisp   | 10 +++------
 src/sources/csv/csv.lisp             | 32 ++++++++++++++++++----------
 test/csv-header.load                 |  8 +++----
 test/regress/expected/csv-header.out |  4 ++--
 6 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/src/load/load-file.lisp b/src/load/load-file.lisp
index 02ddb45..de525dc 100644
--- a/src/load/load-file.lisp
+++ b/src/load/load-file.lisp
@@ -98,6 +98,8 @@
         (loop :for path-spec :in path-list
            :count t
            :do (let ((table-source (clone-copy-for copy path-spec)))
+                 (when (and (header table-source) (null (fields table-source)))
+                   (parse-header table-source))
                  (incf task-count
                        (copy-from table-source
                                   :concurrency concurrency
diff --git a/src/sources/common/api.lisp b/src/sources/common/api.lisp
index 091e3b5..d0d1a38 100644
--- a/src/sources/common/api.lisp
+++ b/src/sources/common/api.lisp
@@ -95,7 +95,7 @@
                 :initform nil))           ;
   (:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
 
-(defgeneric parse-header (md-copy header)
+(defgeneric parse-header (md-copy)
   (:documentation "Parse the file header and return a list of fields."))
 
 (defgeneric process-rows (md-copy stream process-fn)
diff --git a/src/sources/common/md-methods.lisp b/src/sources/common/md-methods.lisp
index 8694dd1..0a8eacc 100644
--- a/src/sources/common/md-methods.lisp
+++ b/src/sources/common/md-methods.lisp
@@ -4,7 +4,7 @@
 
 (in-package #:pgloader.sources)
 
-(defmethod parse-header ((copy md-copy) header)
+(defmethod parse-header ((copy md-copy))
   "Unsupported by default, to be implemented in each md-copy subclass."
   (error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
 
@@ -59,12 +59,8 @@
      ;; about skipping the first line
       (loop :repeat (skip-lines copy) :do (read-line input nil nil))
 
-      ;; we might now have to read the fields from the header line
-      (when (header copy)
-        (setf (fields copy)
-              (parse-header copy (read-line input nil nil)))
-
-        (log-message :debug "Parsed header columns ~s" (fields copy)))
+      ;; we might now have to skip the header line
+      (when (header copy) (read-line input nil nil))
 
       ;; read in the text file, split it into columns
       (process-rows copy input process-row-fn))))
diff --git a/src/sources/csv/csv.lisp b/src/sources/csv/csv.lisp
index f58007c..4fc6495 100644
--- a/src/sources/csv/csv.lisp
+++ b/src/sources/csv/csv.lisp
@@ -57,19 +57,29 @@
 ;;;
 ;;; Read a file format in CSV format, and call given function on each line.
 ;;;
-(defmethod parse-header ((csv copy-csv) header)
+(defmethod parse-header ((csv copy-csv))
   "Parse the header line given csv setup."
   ;; a field entry is a list of field name and options
-  (mapcar #'list
-          (car                          ; parsing a single line
-           (cl-csv:read-csv header
-                            :separator (csv-separator csv)
-                            :quote (csv-quote csv)
-                            :escape (csv-escape csv)
-                            :unquoted-empty-string-is-nil t
-                            :quoted-empty-string-is-nil nil
-                            :trim-outer-whitespace (csv-trim-blanks csv)
-                            :newline (csv-newline csv)))))
+  (with-connection (cnx (source csv)
+                        :direction :input
+                        :external-format (encoding csv)
+                        :if-does-not-exist nil)
+    (let ((input (md-strm cnx)))
+      (loop :repeat (skip-lines csv) :do (read-line input nil nil))
+      (let* ((header-line (read-line input nil nil))
+             (field-name-list
+              (mapcar #'list            ; we need each field to be a list
+                      (car              ; parsing a single line
+                       (cl-csv:read-csv header-line
+                                        :separator (csv-separator csv)
+                                        :quote (csv-quote csv)
+                                        :escape (csv-escape csv)
+                                        :unquoted-empty-string-is-nil t
+                                        :quoted-empty-string-is-nil nil
+                                        :trim-outer-whitespace (csv-trim-blanks csv)
+                                        :newline (csv-newline csv))))))
+        (log-message :notice "Parsed header columns ~s" (fields csv))
+        (setf (fields csv) field-name-list )))))
 
 (defmethod process-rows ((csv copy-csv) stream process-fn)
   "Process rows from STREAM according to COPY specifications and PROCESS-FN."
diff --git a/test/csv-header.load b/test/csv-header.load
index a8b32eb..45b75de 100644
--- a/test/csv-header.load
+++ b/test/csv-header.load
@@ -15,11 +15,11 @@ LOAD CSV
           "repl$grpid"   text,
           "repl$id"      text,
           another        text,
-          fields         text
+          fields         integer
        )
     $$;
 
 
-somefields,rekplcode,repl$grpid,repl$id,another,fields
-a,b,c,d,e,f
-foo,bar,baz,quux,foobar,fizzbuzz
+somefields,rekplcode,repl$grpid,repl$id,fields,another
+a,b,c,d,1,e
+foo,bar,baz,quux,2,foobar
diff --git a/test/regress/expected/csv-header.out b/test/regress/expected/csv-header.out
index 512042d..71c7ec2 100644
--- a/test/regress/expected/csv-header.out
+++ b/test/regress/expected/csv-header.out
@@ -1,2 +1,2 @@
-a	b	c	d	e	f
-foo	bar	baz	quux	foobar	fizzbuzz
+a	b	c	d	e	1
+foo	bar	baz	quux	foobar	2

From 25c937879affdfffb37210b8dc125de86b1db7e2 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 21 Jan 2019 15:02:39 +0100
Subject: [PATCH 69/69] Fix building for 3.6.1.

The pgloader-image feature must be added in the lisp image before
reading/compiling the pgloader sources for it to be useful.
---
 Makefile               | 14 ++++++++++----
 bundle/Makefile        |  5 ++++-
 src/hooks.lisp         | 10 +++++++---
 src/main.lisp          |  5 +++++
 src/params.lisp        |  6 +++---
 src/utils/monitor.lisp |  3 ++-
 6 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile
index f077847..42511f5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # pgloader build tool
 APP_NAME   = pgloader
-VERSION    = 3.6.0
+VERSION    = 3.6.1
 
 # use either sbcl or ccl
 CL	   = sbcl
@@ -24,7 +24,7 @@ QLDIR      = $(BUILDDIR)/quicklisp
 MANIFEST   = $(BUILDDIR)/manifest.ql
 LATEST     = $(BUILDDIR)/pgloader-latest.tgz
 
-BUNDLEDIST = 2018-10-18
+BUNDLEDIST = 2019-01-07
 BUNDLENAME = pgloader-bundle-$(VERSION)
 BUNDLEDIR  = $(BUILDDIR)/bundle/$(BUNDLENAME)
 BUNDLE     = $(BUILDDIR)/$(BUNDLENAME).tgz
@@ -99,8 +99,11 @@ clones: $(QLDIR)/local-projects/cl-ixf \
         $(QLDIR)/local-projects/cl-csv \
         $(QLDIR)/local-projects/qmynd ;
 
-$(LIBS): $(QLDIR)/setup.lisp clones
+$(LIBS): $(QLDIR)/setup.lisp
 	$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp                   \
+             --eval '(push :pgloader-image *features*)'               \
+             --eval '(setf *print-circle* t *print-pretty* t)'        \
+             --eval '(ql:quickload "pgloader")'                       \
              --eval '(push "$(PWD)/" ql:*local-project-directories*)' \
              --eval '(ql:quickload "pgloader")'                       \
              --eval '(quit)'
@@ -141,8 +144,11 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
                          --manifest-file $(MANIFEST)             \
                          --asdf-tree $(QLDIR)/dists              \
                          --asdf-path .                           \
-                         --load-system $(APP_NAME)               \
+                         --load-system cffi                      \
+                         --load-system cl+ssl                    \
+                         --load-system mssql                     \
                          --load src/hooks.lisp                   \
+                         --load-system $(APP_NAME)               \
                          --entry pgloader:main                   \
                          --dynamic-space-size $(DYNSIZE)         \
                          $(COMPRESS_CORE_OPT)                    \
diff --git a/bundle/Makefile b/bundle/Makefile
index 9102bd4..fbeae71 100644
--- a/bundle/Makefile
+++ b/bundle/Makefile
@@ -48,9 +48,12 @@ $(PGLOADER): $(BUILDAPP)
                          $(BUILDAPP_OPTS)                             \
                          --sbcl $(CL)                                 \
                          --asdf-tree .                                \
+                         --load-system cffi                           \
+                         --load-system cl+ssl                         \
+                         --load-system mssql                          \
+                         --load $(SRCDIR)/src/hooks.lisp              \
                          --load-system $(APP_NAME)                    \
                          --eval '(setf pgloader.params::*version-string* "$(VERSION)")' \
-                         --load $(SRCDIR)/src/hooks.lisp              \
                          --entry pgloader:main                        \
                          --dynamic-space-size $(DYNSIZE)              \
                          $(COMPRESS_CORE_OPT)                         \
diff --git a/src/hooks.lisp b/src/hooks.lisp
index 62c878b..c8cf1bc 100644
--- a/src/hooks.lisp
+++ b/src/hooks.lisp
@@ -9,6 +9,8 @@
 ;;; :cl+ssl in its system definition.
 ;;;
 
+(in-package #:cl-user)
+
 ;; So that we can #+pgloader-image some code away, see main.lisp
 (push :pgloader-image *features*)
 
@@ -18,9 +20,6 @@
 ;;;
 (setf *print-circle* t *print-pretty* t)
 
-
-(in-package #:cl-user)
-
 (defun close-foreign-libs ()
   "Close Foreign libs in use by pgloader at application save time."
   (let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning))
@@ -47,6 +46,10 @@
 ;;; Register all loaded systems in the image, so that ASDF don't search for
 ;;; them again when doing --self-upgrade
 ;;;
+
+;;; FIXME: this idea kept failing.
+
+#|
 (defun register-preloaded-system (system)
   (unless (string= "pgloader" (asdf::coerce-name system))
     (let ((version (slot-value system 'asdf::version)))
@@ -64,3 +67,4 @@
                                              (asdf:find-system system-name)))
      when (typep o 'asdf:load-source-op)
      append (asdf:input-files o c)))
+|#
diff --git a/src/main.lisp b/src/main.lisp
index c8e43f7..ba75c04 100644
--- a/src/main.lisp
+++ b/src/main.lisp
@@ -243,6 +243,11 @@
 
 	;; Then process options
 	(when debug
+          (format t "pgloader version ~a~%" *version-string*)
+          #+pgloader-image
+          (format t "compiled with ~a ~a~%"
+                  (lisp-implementation-type)
+                  (lisp-implementation-version))
 	  #+sbcl
           (format t "sb-impl::*default-external-format* ~s~%"
 		  sb-impl::*default-external-format*)
diff --git a/src/params.lisp b/src/params.lisp
index 8080dd7..60a754b 100644
--- a/src/params.lisp
+++ b/src/params.lisp
@@ -40,11 +40,11 @@
 
 (in-package :pgloader.params)
 
-(defparameter *release* nil
+(defparameter *release* t
   "non-nil when this build is a release build.")
 
-(defparameter *major-version* "3.5")
-(defparameter *minor-version* "2")
+(defparameter *major-version* "3.6")
+(defparameter *minor-version* "1")
 
 (defun git-hash ()
   "Return the current abbreviated git hash of the development tree."
diff --git a/src/utils/monitor.lisp b/src/utils/monitor.lisp
index c56652d..696eb50 100644
--- a/src/utils/monitor.lisp
+++ b/src/utils/monitor.lisp
@@ -214,7 +214,8 @@
            (start
             (when (start-start-logger event)
               (pgloader.logs:start-logger))
-            (cl-log:log-message :info "Starting monitor"))
+            (cl-log:log-message :info "Starting monitor")
+            (cl-log:log-message :log "pgloader version ~s" *version-string*))
 
            (stop
             (cl-log:log-message :info "Stopping monitor")