Compare commits

..

1 Commits

Author SHA1 Message Date
Dimitri Fontaine
63ed605b19 Prepare for release 3.2.0. 2015-01-15 20:05:06 +01:00
360 changed files with 21916 additions and 33545 deletions

View File

@ -1,5 +0,0 @@
.git
.vagrant
build
Dockerfile
Dockerfile.ccl

1
.gitattributes vendored
View File

@ -1 +0,0 @@
test/**/*.sql linguist-vendored

1
.github/FUNDING.yml vendored
View File

@ -1 +0,0 @@
github: dimitri

View File

@ -1,33 +0,0 @@
name: Debian Autopkgtest
on:
pull_request: {}
push: {}
jobs:
debian-build:
# focal is too old, use jammy
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install postgresql-common
run: sudo apt-get install -y postgresql-common
- name: Install pgapt repository
run: sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
- name: Install build-dependencies
run: sudo apt-get build-dep -y .
- name: Build pgloader.deb
run: dpkg-buildpackage --no-sign --buildinfo-option=--version -b
- name: Install autopkgtest
run: sudo apt-get install -y autopkgtest
- name: Autopkgtest
run: sudo autopkgtest ./ ../pgloader_*_amd64.deb -- null

View File

@ -1,100 +0,0 @@
name: Docker
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
on:
push:
branches: [ master ]
# Publish semver tags as releases.
tags: [ 'v*.*.*' ]
pull_request:
branches: [ master ]
env:
# Use docker.io for Docker Hub if empty
REGISTRY: ghcr.io
# github.repository as <account>/<repo>
IMAGE_NAME: ${{ github.repository }}
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v2
# Install the cosign tool (not used on PR, still installed)
# https://github.com/sigstore/cosign-installer
- name: Install cosign
uses: sigstore/cosign-installer@main
with:
cosign-release: 'v2.2.3'
- name: Check cosign version
run: cosign version
# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v3.6.2
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=latest,enable=${{ endsWith(github.ref, github.event.repository.default_branch) }}
type=semver,pattern={{version}}
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
# Sign the resulting Docker image digest except on PRs.
# This will only write to the public Rekor transparency log when the Docker
# repository is public to avoid leaking data. If you would like to publish
# transparency data even for private images, pass --force to cosign below.
# https://github.com/sigstore/cosign
- name: Sign the published Docker image
if: ${{ github.event_name != 'pull_request' }}
# This step uses the identity token to provision an ephemeral certificate
# against the sigstore community Fulcio instance.
run: cosign sign --yes ${TAGS}
env:
TAGS: ${{ steps.meta.outputs.tags }}
# should use @${{ steps.build-and-push.outputs.digest }}
# but that leads to "entity not found in registry"
COSIGN_EXPERIMENTAL: "true"

5
.gitignore vendored
View File

@ -2,7 +2,6 @@
local-data local-data
pgloader.html pgloader.html
pgloader.pdf pgloader.pdf
debian/home/
debian/pgloader.debhelper.log debian/pgloader.debhelper.log
debian/pgloader.substvars debian/pgloader.substvars
debian/pgloader/ debian/pgloader/
@ -15,7 +14,3 @@ web/howto/mysql.html
web/howto/pgloader.1.html web/howto/pgloader.1.html
web/howto/quickstart.html web/howto/quickstart.html
web/howto/sqlite.html web/howto/sqlite.html
.DS_Store
system-index.txt
buildapp.log
docs/_build

View File

@ -1,11 +0,0 @@
version: 2
# Build from the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Explicitly set the version of Python and its requirements
python:
version: 3.7
install:
- requirements: docs/requirements.txt

View File

@ -1,72 +0,0 @@
#!/bin/bash
set -eu
lisp_install() {
case "$LISP" in
ccl)
ccl_checksum='08e885e8c2bb6e4abd42b8e8e2b60f257c6929eb34b8ec87ca1ecf848fac6d70'
ccl_version='1.11'
remote_file "/tmp/ccl-${ccl_version}.tgz" "https://github.com/Clozure/ccl/releases/download/v${ccl_version}/ccl-${ccl_version}-linuxx86.tar.gz" "$ccl_checksum"
tar --file "/tmp/ccl-${ccl_version}.tgz" --extract --exclude='.svn' --directory '/tmp'
sudo mv --no-target-directory '/tmp/ccl' '/usr/local/src/ccl'
sudo ln --no-dereference --force --symbolic "/usr/local/src/ccl/scripts/ccl64" '/usr/local/bin/ccl'
;;
sbcl)
sbcl_checksum='22ccd9409b2ea16d4be69235c5ad5fde833452955cb24483815312d3b1d7401c'
sbcl_version='1.5.2'
remote_file "/tmp/sbcl-${sbcl_version}.tgz" "http://prdownloads.sourceforge.net/sbcl/sbcl-${sbcl_version}-x86-64-linux-binary.tar.bz2" "$sbcl_checksum"
tar --file "/tmp/sbcl-${sbcl_version}.tgz" --extract --directory '/tmp'
( cd "/tmp/sbcl-${sbcl_version}-x86-64-linux" && sudo ./install.sh )
;;
*)
echo "Unrecognized Lisp: '$LISP'"
exit 1
;;
esac
}
pgdg_repositories() {
local sourcelist='sources.list.d/pgdg.list'
sudo tee "/etc/apt/$sourcelist" <<-repositories
deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main
deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg-testing main 10
repositories
sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net' --recv-keys 'ACCC4CF8'
sudo apt-get -o Dir::Etc::sourcelist="$sourcelist" -o Dir::Etc::sourceparts='-' -o APT::Get::List-Cleanup='0' update
}
postgresql_install() {
if [ -z "${PGVERSION:-}" ]; then
echo 'PGVERSION environment variable not set.';
exit 1
fi
xargs sudo apt-get -y install <<-packages
postgresql-${PGVERSION}
postgresql-${PGVERSION}-ip4r
packages
sudo tee /etc/postgresql/${PGVERSION}/main/pg_hba.conf > /dev/null <<-config
local all all trust
host all all 127.0.0.1/32 trust
config
sudo service postgresql restart
}
remote_file() {
local target="$1" origin="$2" sum="$3"
local check="shasum --algorithm $(( 4 * ${#sum} )) --check"
local filesum="$sum $target"
curl --location --output "$target" "$origin" && $check <<< "$filesum"
}
$1

View File

@ -1,38 +1,22 @@
language: shell language: common-lisp
os: linux
dist: xenial
env:
matrix:
- LISP=ccl PGVERSION=9.6
- LISP=ccl PGVERSION=10
- LISP=ccl PGVERSION=11
- LISP=ccl PGVERSION=12
- LISP=ccl PGVERSION=13
- LISP=sbcl PGVERSION=9.6
- LISP=sbcl PGVERSION=10
- LISP=sbcl PGVERSION=11
- LISP=sbcl PGVERSION=12
- LISP=sbcl PGVERSION=13
install: install:
- ./.travis.sh lisp_install - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
- ./.travis.sh pgdg_repositories - echo "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
- ./.travis.sh postgresql_install - sudo apt-get update
- sudo apt-get install -y unzip libsqlite3-dev gawk freetds-dev - sudo DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" dist-upgrade
- wget http://pgsql.tapoueh.org/sbcl/sbcl_1.2.0-1_amd64.deb
- sudo dpkg -i sbcl_1.2.0-1_amd64.deb
- sudo apt-get install -f
- sudo apt-get install sbcl unzip libsqlite3-dev gawk freetds-dev
- sudo apt-get install postgresql-9.1-ip4r
before_script: before_script:
- PGUSER=postgres createuser -S -R -D -E -l pgloader - sudo -u postgres createuser -S -R -D -E -l pgloader
- PGUSER=postgres createdb -E UTF8 -O pgloader pgloader - sudo -u postgres createdb -E UTF8 -O pgloader -hlocalhost pgloader
- PGUSER=postgres psql -d pgloader -c "create extension ip4r;" - sudo -u postgres psql -h localhost -d pgloader -c "create extension ip4r;"
- PGUSER=pgloader psql -d pgloader -c "create schema expected;"
- PGUSER=pgloader psql -d pgloader -c "create schema err;"
- make --version - make --version
- make "CL=$LISP" clones save - make
script: script:
- PGUSER=pgloader make "CL=$LISP" check-saved - PGUSER=pgloader make check
notifications: notifications:
email: email:
- dim@tapoueh.org - dim@tapoueh.org

View File

@ -1,53 +0,0 @@
FROM debian:bookworm-slim AS builder
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bzip2 \
ca-certificates \
curl \
freetds-dev \
gawk \
git \
libsqlite3-dev \
libssl3 \
libzip-dev \
make \
openssl \
patch \
sbcl \
time \
unzip \
wget \
cl-ironclad \
cl-babel \
&& rm -rf /var/lib/apt/lists/*
COPY ./ /opt/src/pgloader
ARG DYNSIZE=16384
RUN mkdir -p /opt/src/pgloader/build/bin \
&& cd /opt/src/pgloader \
&& make DYNSIZE=$DYNSIZE clones save
FROM debian:bookworm-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates \
curl \
freetds-dev \
gawk \
libsqlite3-dev \
libzip-dev \
make \
sbcl \
unzip \
&& update-ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
ADD conf/freetds.conf /etc/freetds/freetds.conf
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"

View File

@ -1,53 +0,0 @@
FROM debian:bookworm-slim as builder
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bzip2 \
ca-certificates \
curl \
freetds-dev \
gawk \
git \
libsqlite3-dev \
libssl3 \
libzip-dev \
make \
openssl \
patch \
time \
unzip \
wget \
cl-ironclad \
cl-babel \
&& rm -rf /var/lib/apt/lists/*
RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.12/ccl-1.12-linuxx86.tar.gz \
| tar xz -C /usr/local/src/ \
&& mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
COPY ./ /opt/src/pgloader
ARG DYNSIZE=256
RUN mkdir -p /opt/src/pgloader/build/bin \
&& cd /opt/src/pgloader \
&& make CL=ccl DYNSIZE=$DYNSIZE clones save
FROM debian:bookworm-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
freetds-dev \
gawk \
libsqlite3-dev \
libssl3 \
libzip-dev \
make \
sbcl \
unzip \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"

View File

@ -2,7 +2,7 @@
pgloader version 3.x is written in Common Lisp. pgloader version 3.x is written in Common Lisp.
## Dependencies ## The lisp parts
The steps depend on the OS you are currently using. The steps depend on the OS you are currently using.
@ -23,49 +23,7 @@ You will note in particular:
We need a recent enough [SBCL](http://sbcl.org/) version and that means We need a recent enough [SBCL](http://sbcl.org/) version and that means
backporting the one found in `sid` rather than using the very old one found backporting the one found in `sid` rather than using the very old one found
in current *stable* debian release. See `bootstrap-debian.sh` for details in current *stable* debian release. See `bootstrap-debian.sh` for details
about how to backport a recent enough SBCL here (1.2.5 or newer). about how to backport a recent enough SBCL here (1.1.14 or newer).
### Redhat / CentOS
To build and install pgloader the Steel Bank Common Lisp package (sbcl) from EPEL,
and the freetds packages are required.
With RHEL/CentOS 6, if the packaged version of sbcl isn't >=1.3.6, you'll need
to build it from source.
It is recommended to build the RPM yourself, see below, to ensure that all installed
files are properly tracked and that you can safely update to newer versions of
pgloader as they're released.
To do an adhoc build and install run `boostrap-centos.sh` for CentOS 6 or
`bootstrap-centos7.sh` for CentOS 7 to install the required dependencies.
[Build pgloader](INSTALL.md#building-pgloader).
#### rpmbuild
The spec file in the root of the pgloader repository can be used to build your
own RPM. For production deployments it is recommended that you build this RPM on
a dedicated build box and then copy the RPM to your production environment for
use; it is considered bad practice to have compilers and build tools present in
production environments.
1. Install the [EPEL repo](https://fedoraproject.org/wiki/EPEL#Quickstart).
1. Install rpmbuild dependencies:
sudo yum -y install yum-utils rpmdevtools @"Development Tools"
1. Install pgloader build dependencies:
sudo yum-builddep pgloader.spec
1. Download pgloader source:
spectool -g -R pgloader.spec
1. Build the source and binary RPMs (see `rpmbuild --help` for other build options):
rpmbuild -ba pgloader.spec
### Mac OS X ### Mac OS X
@ -79,11 +37,6 @@ You will need to install either SBCL or CCL separately, and when using
brew install sbcl brew install sbcl
brew install clozure-cl brew install clozure-cl
NOTE: Make sure you installed the universal binaries of Freetds, so that
they can be loaded correctly.
brew install freetds --universal --build-from-source
### Compiling SBCL by yourself ### Compiling SBCL by yourself
If you ended up building SBCL yourself or you just want to do that, you can If you ended up building SBCL yourself or you just want to do that, you can
@ -102,9 +55,9 @@ Now that the dependences are installed, just type make.
make make
If your `SBCL` supports core compression, the make process will use it If using Mac OS X, and depending on how you did install `SBCL` and which
to generate a smaller binary. To force disabling core compression, you version you have (the brew default did change recently), you might need to
may use: ask the Makefile to refrain from trying to compress your binary image:
make COMPRESS_CORE=no make COMPRESS_CORE=no
@ -134,12 +87,65 @@ Now the `./build/bin/pgloader` that you get only uses 1GB.
## Building a docker image ## Building a docker image
A `Dockerfile` is provided, to use it: We start with a `debian` image:
docker build -t pgloader:debian . docker run -it debian bash
docker run --rm --name pgloader pgloader:debian bash -c "pgloader --version"
The `build` step install build dependencies in a debian jessie container, And then run the following steps:
then `git clone` and build `pgloader` in `/opt/src/pgloader` and finally
copy the resulting binary image in `/usr/local/bin/pgloader` so that it's # apt-get update
easily available. # apt-get install -y wget curl make git bzip2 time libzip-dev openssl-dev
# apt-get install -y patch unzip libsqlite3-dev gawk freetds-dev
# useradd -m -s /bin/bash dim
# su - dim
Install a binary version on SBCL, which unfortunately has no support for
core compression, so only use it to build another SBCL version from sources
with proper options:
$ mkdir sbcl
$ cd sbcl
$ wget http://prdownloads.sourceforge.net/sbcl/sbcl-1.2.6-x86-64-linux-binary.tar.bz2
$ wget http://prdownloads.sourceforge.net/sbcl/sbcl-1.2.6-source.tar.bz2?download
$ mv sbcl-1.2.6-source.tar.bz2\?download sbcl-1.2.6-source.tar.bz2
$ tar xf sbcl-1.2.6-x86-64-linux-binary.tar.bz2
$ tar xf sbcl-1.2.6-source.tar.bz2
$ exit
Install SBCL as root
# cd /home/dim/sbcl/sbcl-1.2.6-x86-64-linux
# bash install.sh
Now back as the unprivileged user (dim) to compile SBCL from sources:
# su - dim
$ cd sbcl/sbcl-1.2.6
$ sh make.sh --with-sb-core-compression --with-sb-thread > build.out 2>&1
$ exit
And install the newly compiled SBCL as root:
# cd /home/dim/sbcl/sbcl-1.2.6
# sh install.sh
Now build pgloader from sources:
# su - dim
$ git clone https://github.com/dimitri/pgloader
$ cd pgloader
$ make
$ ./build/bin/pgloader --help
$ exit
Now install pgloader in `/usr/local/bin` to make it easy to use:
# cp /home/dim/pgloader/build/bin/pgloader /usr/local/bin
# pgloader --version
Commit the docker instance and push it, from the host:
$ docker login
$ docker ps -l
$ docker commit <id> dimitri/pgloader-3.1.cd52654
$ docker push dimitri/pgloader-3.1.cd52654

View File

@ -1,96 +0,0 @@
Thanks for contributing to [pgloader](https://pgloader.io) by reporting an
issue! Reporting an issue is the only way we can solve problems, fix bugs,
and improve both the software and its user experience in general.
The best bug reports follow those 3 simple steps:
1. show what you did,
2. show the result you got,
3. explain how the result is not what you expected.
In the case of pgloader, here's the information I will need to read in your
bug report. Having all of this is a big help, and often means the bug you
reported can be fixed very efficiently as soon as I get to it.
Please provide the following information:
<!-- delete text above this line -->
- [ ] pgloader --version
```
<fill pgloader version here>
```
- [ ] did you test a fresh compile from the source tree?
Compiling pgloader from sources is documented in the
[README](https://github.com/dimitri/pgloader#build-from-sources), it's
easy to do, and if patches are to be made to fix your bug, you're going
to have to build from sources to get the fix anyway…
- [ ] did you search for other similar issues?
- [ ] how can I reproduce the bug?
Incude a self-contained pgloader command file.
If you're loading from a database, consider attaching a database dump to
your issue. For MySQL, use `mysqldump`. For SQLite, just send over your
source file, that's easy. Maybe be the one with your production data, of
course, the one with just the sample of data that allows me to reproduce
your bug.
When using a proprietary database system as a source, consider creating
a sample database on some Cloud service or somewhere you can then give
me access to, and see my email address on my GitHub profile to send me
the credentials. Still open a public issue for tracking and as
documentation for other users.
```
--
-- EDIT THIS FILE TO MATCH YOUR BUG REPORT
--
LOAD CSV
FROM INLINE with encoding 'ascii'
INTO postgresql:///pgloader
TARGET TABLE jordane
WITH truncate,
fields terminated by '|',
fields not enclosed,
fields escaped by backslash-quote
SET work_mem to '128MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ drop table if exists jordane; $$,
$$ CREATE TABLE jordane
(
"NOM" character(20),
"PRENOM" character(20)
)
$$;
BORDET|Jordane
BORDET|Audrey
LASTNAME|"opening quote
BONNIER|testprenombe~aucouptroplong
JOURDAIN|héhé¶
```
- [ ] pgloader output you obtain
```
PASTE HERE THE OUTPUT OF THE PGLOADER COMMAND
```
- [ ] data that is being loaded, if relevant
```
PASTE HERE THE DATA THAT HAS BEEN LOADED
```
- [ ] How the data is different from what you expected, if relevant

View File

@ -1,9 +0,0 @@
pgloader
Copyright (c) 2005-2017, The PostgreSQL Global Development Group
Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

147
Makefile
View File

@ -1,19 +1,17 @@
# pgloader build tool # pgloader build tool
APP_NAME = pgloader APP_NAME = pgloader
VERSION = 3.6.10 VERSION = 3.2.0
# use either sbcl or ccl # use either sbcl or ccl
CL = sbcl CL = sbcl
# default to 16 GB of RAM size in the image # default to 4096 MB of RAM size in the image
DYNSIZE = 16384 DYNSIZE = 4096
LISP_SRC = $(wildcard src/*lisp) \ LISP_SRC = $(wildcard src/*lisp) \
$(wildcard src/monkey/*lisp) \ $(wildcard src/monkey/*lisp) \
$(wildcard src/utils/*lisp) \ $(wildcard src/utils/*lisp) \
$(wildcard src/load/*lisp) \
$(wildcard src/parsers/*lisp) \ $(wildcard src/parsers/*lisp) \
$(wildcard src/pg-copy/*lisp) \
$(wildcard src/pgsql/*lisp) \ $(wildcard src/pgsql/*lisp) \
$(wildcard src/sources/*lisp) \ $(wildcard src/sources/*lisp) \
pgloader.asd pgloader.asd
@ -24,12 +22,6 @@ QLDIR = $(BUILDDIR)/quicklisp
MANIFEST = $(BUILDDIR)/manifest.ql MANIFEST = $(BUILDDIR)/manifest.ql
LATEST = $(BUILDDIR)/pgloader-latest.tgz LATEST = $(BUILDDIR)/pgloader-latest.tgz
BUNDLEDIST = 2022-02-20
BUNDLENAME = pgloader-bundle-$(VERSION)
BUNDLEDIR = $(BUILDDIR)/bundle/$(BUNDLENAME)
BUNDLE = $(BUILDDIR)/$(BUNDLENAME).tgz
BUNDLETESTD= $(BUILDDIR)/bundle/test
ifeq ($(OS),Windows_NT) ifeq ($(OS),Windows_NT)
EXE = .exe EXE = .exe
COMPRESS_CORE = no COMPRESS_CORE = no
@ -43,26 +35,27 @@ BUILDAPP_CCL = $(BUILDDIR)/bin/buildapp.ccl$(EXE)
BUILDAPP_SBCL = $(BUILDDIR)/bin/buildapp.sbcl$(EXE) BUILDAPP_SBCL = $(BUILDDIR)/bin/buildapp.sbcl$(EXE)
ifeq ($(CL),sbcl) ifeq ($(CL),sbcl)
BUILDAPP = $(BUILDAPP_SBCL) BUILDAPP = $(BUILDAPP_SBCL)
BUILDAPP_OPTS = --require sb-posix \ CL_OPTS = --no-sysinit --no-userinit
--require sb-bsd-sockets \
--require sb-rotate-byte
CL_OPTS = --noinform --no-sysinit --no-userinit
else else
BUILDAPP = $(BUILDAPP_CCL) BUILDAPP = $(BUILDAPP_CCL)
CL_OPTS = --no-init CL_OPTS = --no-init
endif endif
COMPRESS_CORE ?= yes
ifeq ($(CL),sbcl) ifeq ($(CL),sbcl)
COMPRESS_CORE ?= $(shell $(CL) --noinform \
--quit \
--eval '(when (member :sb-core-compression cl:*features*) (write-string "yes"))')
endif
# note: on Windows_NT, we never core-compress; see above.
ifeq ($(COMPRESS_CORE),yes) ifeq ($(COMPRESS_CORE),yes)
COMPRESS_CORE_OPT = --compress-core COMPRESS_CORE_OPT = --compress-core
else
COMPRESS_CORE_OPT =
endif
endif
ifeq ($(CL),sbcl)
BUILDAPP_OPTS = --require sb-posix \
--require sb-bsd-sockets \
--require sb-rotate-byte
endif endif
DEBUILD_ROOT = /tmp/pgloader DEBUILD_ROOT = /tmp/pgloader
@ -70,43 +63,41 @@ DEBUILD_ROOT = /tmp/pgloader
all: $(PGLOADER) all: $(PGLOADER)
clean: clean:
rm -rf $(LIBS) $(QLDIR) $(MANIFEST) $(BUILDAPP) $(PGLOADER) \ rm -rf $(LIBS) $(QLDIR) $(MANIFEST) $(BUILDAPP) $(PGLOADER)
buildapp.log build/bundle/* build/pgloader-bundle* build/quicklisp.lisp docs/_build
$(MAKE) -C test clean docs:
ronn -roff pgloader.1.md
$(QLDIR)/local-projects/qmynd: $(QLDIR)/local-projects/qmynd:
git clone --depth 1 https://github.com/qitab/qmynd.git $@ git clone https://github.com/qitab/qmynd.git $@
$(QLDIR)/local-projects/cl-ixf: $(QLDIR)/local-projects/cl-ixf:
git clone --depth 1 https://github.com/dimitri/cl-ixf.git $@ git clone https://github.com/dimitri/cl-ixf.git $@
$(QLDIR)/local-projects/cl-db3:
git clone --depth 1 https://github.com/dimitri/cl-db3.git $@
$(QLDIR)/local-projects/cl-csv: $(QLDIR)/local-projects/cl-csv:
git clone --depth 1 https://github.com/AccelerationNet/cl-csv.git $@ git clone https://github.com/AccelerationNet/cl-csv.git $@
$(QLDIR)/local-projects/esrap:
git clone -b wip-better-errors https://github.com/scymtym/esrap.git $@
$(QLDIR)/setup.lisp: $(QLDIR)/setup.lisp:
mkdir -p $(BUILDDIR) mkdir -p $(BUILDDIR)
curl -o $(BUILDDIR)/quicklisp.lisp http://beta.quicklisp.org/quicklisp.lisp curl -o $(BUILDDIR)/quicklisp.lisp http://beta.quicklisp.org/quicklisp.lisp
$(CL) $(CL_OPTS) --load $(BUILDDIR)/quicklisp.lisp \ $(CL) $(CL_OPTS) --load $(BUILDDIR)/quicklisp.lisp \
--load src/getenv.lisp \ --eval '(quicklisp-quickstart:install :path "$(BUILDDIR)/quicklisp")' \
--eval '(quicklisp-quickstart:install :path "$(BUILDDIR)/quicklisp" :proxy (getenv "http_proxy"))' \
--eval '(quit)' --eval '(quit)'
quicklisp: $(QLDIR)/setup.lisp ; quicklisp: $(QLDIR)/setup.lisp ;
clones: $(QLDIR)/local-projects/cl-ixf \ clones: $(QLDIR)/local-projects/cl-ixf \
$(QLDIR)/local-projects/cl-db3 \
$(QLDIR)/local-projects/cl-csv \ $(QLDIR)/local-projects/cl-csv \
$(QLDIR)/local-projects/qmynd ; $(QLDIR)/local-projects/qmynd \
$(QLDIR)/local-projects/esrap ;
$(LIBS): $(QLDIR)/setup.lisp $(LIBS): $(QLDIR)/setup.lisp clones
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \ $(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
--eval '(push :pgloader-image *features*)' \ --eval '(push "$(PWD)/" asdf:*central-registry*)' \
--eval '(setf *print-circle* t *print-pretty* t)' \ --eval '(ql:quickload "pgloader")' \
--eval '(push "$(PWD)/" ql:*local-project-directories*)' \
--eval '(ql:quickload "pgloader")' \
--eval '(quit)' --eval '(quit)'
touch $@ touch $@
@ -145,11 +136,8 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
--manifest-file $(MANIFEST) \ --manifest-file $(MANIFEST) \
--asdf-tree $(QLDIR)/dists \ --asdf-tree $(QLDIR)/dists \
--asdf-path . \ --asdf-path . \
--load-system cffi \
--load-system cl+ssl \
--load-system mssql \
--load src/hooks.lisp \
--load-system $(APP_NAME) \ --load-system $(APP_NAME) \
--load src/hooks.lisp \
--entry pgloader:main \ --entry pgloader:main \
--dynamic-space-size $(DYNSIZE) \ --dynamic-space-size $(DYNSIZE) \
$(COMPRESS_CORE_OPT) \ $(COMPRESS_CORE_OPT) \
@ -160,67 +148,28 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
pgloader: $(PGLOADER) ; pgloader: $(PGLOADER) ;
pgloader-standalone: pgloader-standalone:
$(BUILDAPP) $(BUILDAPP_OPTS) \ $(BUILDAPP) --require sb-posix \
--sbcl $(CL) \ --require sb-bsd-sockets \
--load-system $(APP_NAME) \ --require sb-rotate-byte \
--load src/hooks.lisp \ --load-system pgloader \
--entry pgloader:main \ --entry pgloader:main \
--dynamic-space-size $(DYNSIZE) \ --dynamic-space-size $(DYNSIZE) \
$(COMPRESS_CORE_OPT) \ --compress-core \
--output $(PGLOADER) --output $(PGLOADER)
test: $(PGLOADER) test: $(PGLOADER)
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress $(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C test regress
save: ./src/save.lisp $(LISP_SRC)
$(CL) $(CL_OPTS) --load ./src/save.lisp
check-saved:
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
clean-bundle:
rm -rf $(BUNDLEDIR)
rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
$(BUNDLETESTD):
mkdir -p $@
$(BUNDLEDIR): quicklisp
mkdir -p $@
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
--eval '(defvar *bundle-dir* "$@")' \
--eval '(defvar *pwd* "$(PWD)/")' \
--eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \
--load bundle/ql.lisp
$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR)
echo "\"$(VERSION)\"" > $@
$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp
cp bundle/README.md $(BUNDLEDIR)
cp bundle/save.lisp $(BUNDLEDIR)
sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile
git archive --format=tar --prefix=pgloader-$(VERSION)/ master \
| tar -C $(BUNDLEDIR)/local-projects/ -xf -
make QLDIR=$(BUNDLEDIR) clones
tar -C build/bundle \
--exclude bin \
--exclude test/sqlite \
-czf $@ $(BUNDLENAME)
bundle: clean-bundle $(BUNDLE) $(BUNDLETESTD)
tar -C $(BUNDLETESTD) -xf $(BUNDLE)
make -C $(BUNDLETESTD)/$(BUNDLENAME)
$(BUNDLETESTD)/$(BUNDLENAME)/bin/pgloader --version
test-bundle:
$(MAKE) -C $(BUNDLEDIR) test
deb: deb:
# intended for use on a debian system # intended for use on a debian system
mkdir -p $(DEBUILD_ROOT) && rm -rf $(DEBUILD_ROOT)/* mkdir -p $(DEBUILD_ROOT) && rm -rf $(DEBUILD_ROOT)/*
rsync -Ca --exclude 'build' \ rsync -Ca --exclude 'build' \
--exclude '.vagrant' \ --exclude '.vagrant' \
--exclude 'test/sqlite-chinook.load' \
--exclude 'test/sqlite' \
--exclude 'test/data/2013_Gaz_113CDs_national.txt' \
--exclude 'test/data/reg2013.dbf' \
--exclude 'test/data/sakila-db.zip' \
./ $(DEBUILD_ROOT)/ ./ $(DEBUILD_ROOT)/
cd $(DEBUILD_ROOT) && make -f debian/rules orig cd $(DEBUILD_ROOT) && make -f debian/rules orig
cd $(DEBUILD_ROOT) && debuild -us -uc -sa cd $(DEBUILD_ROOT) && debuild -us -uc -sa
@ -253,4 +202,4 @@ latest:
check: test ; check: test ;
.PHONY: test pgloader-standalone docs bundle .PHONY: test pgloader-standalone

187
README.md
View File

@ -1,9 +1,5 @@
# PGLoader # PGLoader
[![Build Status](https://travis-ci.org/dimitri/pgloader.svg?branch=master)](https://travis-ci.org/dimitri/pgloader)
[![Join the chat at https://gitter.im/dimitri/pgloader](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dimitri/pgloader?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Read The Docs Status](https://readthedocs.org/projects/pgloader/badge/?version=latest&style=plastic)](http://pgloader.readthedocs.io/en/latest/)
pgloader is a data loading tool for PostgreSQL, using the `COPY` command. pgloader is a data loading tool for PostgreSQL, using the `COPY` command.
Its main advantage over just using `COPY` or `\copy`, and over using a Its main advantage over just using `COPY` or `\copy`, and over using a
@ -20,81 +16,140 @@ being the transformation of MySQL datestamps `0000-00-00` and
`0000-00-00 00:00:00` to PostgreSQL `NULL` value (because our calendar `0000-00-00 00:00:00` to PostgreSQL `NULL` value (because our calendar
never had a *year zero*). never had a *year zero*).
## Documentation ## Versioning
Full documentation is available online, including manual pages of all the pgloader version 1.x is quite old and was devleoped in `TCL`.
pgloader sub-commands. Check out When faced with maintaining that code, the new emerging development
[https://pgloader.readthedocs.io/](https://pgloader.readthedocs.io/en/latest/). team (hi!) picked `python` instead because that made sense at the
time. So pgloader version 2.x was written in python.
``` The current version of pgloader is the 3.x series, which is written in
$ pgloader --help [Common Lisp](http://cliki.net/) for better development flexibility,
pgloader [ option ... ] SOURCE TARGET runtime performance, and support of real threading.
--help -h boolean Show usage and exit.
--version -V boolean Displays pgloader version and exit.
--quiet -q boolean Be quiet
--verbose -v boolean Be verbose
--debug -d boolean Display debug level information.
--client-min-messages string Filter logs seen at the console (default: "warning")
--log-min-messages string Filter logs seen in the logfile (default: "notice")
--summary -S string Filename where to copy the summary
--root-dir -D string Output root directory. (default: #P"/tmp/pgloader/")
--upgrade-config -U boolean Output the command(s) corresponding to .conf file for v2.x
--list-encodings -E boolean List pgloader known encodings and exit.
--logfile -L string Filename where to send the logs.
--load-lisp-file -l string Read user code from files
--dry-run boolean Only check database connections, don't load anything.
--on-error-stop boolean Refrain from handling errors properly.
--no-ssl-cert-verification boolean Instruct OpenSSL to bypass verifying certificates.
--context -C string Command Context Variables
--with string Load options
--set string PostgreSQL options
--field string Source file fields specification
--cast string Specific cast rules
--type string Force input source type
--encoding string Source expected encoding
--before string SQL script to run before loading the data
--after string SQL script to run after loading the data
--self-upgrade string Path to pgloader newer sources
--regress boolean Drive regression testing
```
## Usage The versioning is now following the Emacs model, where any X.0 release
number means you're using a development version (alpha, beta, or release
candidate). The next stable versions are going to be `3.1` then `3.2` etc.
You can either give a command file to pgloader or run it all from the When using a development snapshot rather than a released version the version
command line, see the number includes the git hash (in its abbreviated form):
[pgloader quick start](https://pgloader.readthedocs.io/en/latest/tutorial/tutorial.html#pgloader-quick-start) on
<https://pgloader.readthedocs.io> for more details.
$ ./build/bin/pgloader --help - `pgloader version "3.0.99"`
$ ./build/bin/pgloader <file.load>
Release candidate 9 for pgloader version 3.1, with a *git tag* named
`v3.0.99` so that it's easy to checkout the same sources as the
released code.
- `pgloader version "3.0.fecae2c"`
Development snapshot again *git hash* `fecae2c`. It's possible to have
the same sources on another setup with using the git command `git
checkout fecae2c`.
For example, for a full migration from SQLite: - `pgloader version "3.1.0"`
$ createdb newdb Stable release.
$ pgloader ./test/sqlite/sqlite.db postgresql:///newdb
Or for a full migration from MySQL, including schema definition (tables,
indexes, foreign keys, comments) and parallel loading of the corrected data:
$ createdb pagila
$ pgloader mysql://user@localhost/sakila postgresql:///pagila
## LICENCE ## LICENCE
pgloader is available under [The PostgreSQL pgloader is available under [The PostgreSQL Licence](http://www.postgresql.org/about/licence/).
Licence](http://www.postgresql.org/about/licence/).
## INSTALL ## INSTALL
Please see full documentation at pgloader is now a Common Lisp program, tested using the
[https://pgloader.readthedocs.io/](https://pgloader.readthedocs.io/en/latest/install.html). [SBCL](http://sbcl.org/) (>= 1.1.14) and
[Clozure CL](http://ccl.clozure.com/) implementations with
[Quicklisp](http://www.quicklisp.org/beta/).
If you're using debian, it's already available: $ apt-get install sbcl unzip libsqlite3-dev make curl gawk freetds-dev libzip-dev
$ cd /path/to/pgloader
$ make pgloader
$ ./build/bin/pgloader --help
$ apt-get install pgloader You can also fetch pre-made binary packages at
[pgloader.io](http://pgloader.io/download.html).
If you're using docker, you can use the latest version built by the CI at ## Testing a new feature
each commit to the master branch:
$ docker pull ghcr.io/dimitri/pgloader:latest Being a Common Lisp program, pgloader is able to *upgrade itself* at run
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --version time, and provides the command-line option `--self-upgrade` that just does
that.
If you want to test the current repository version (or any checkout really),
it's possible to clone the sources then load them with an older pgloader
release:
$ /usr/bin/pgloader --version
pgloader version "3.0.99"
compiled with SBCL 1.1.17
$ git clone https://github.com/dimitri/pgloader.git /tmp/pgloader
$ /usr/bin/pgloader --self-upgrade /tmp/pgloader --version
Self-upgrading from sources at "/tmp/pgloader/"
pgloader version "3.0.fecae2c"
compiled with SBCL 1.1.17
Here, the code from the *git clone* will be used at run-time. Self-upgrade
is done first, then the main program entry point is called again with the
new coded loaded in.
Please note that the *binary* file (`/usr/bin/pgloader` or
`./build/bin/pgloader`) is not modified in-place, so that if you want to run
the same upgraded code again you will have to use the `--self-upgrade`
command again. It might warrant for an option rename before `3.1.0` stable
release.
## The pgloader.lisp script
Now you can use the `#!` script or build a self-contained binary executable
file, as shown below.
./pgloader.lisp --help
Each time you run the `pgloader` command line, it will check that all its
dependencies are installed and compiled and if that's not the case fetch
them from the internet and prepare them (thanks to *Quicklisp*). So please
be patient while that happens and make sure we can actually connect and
download the dependencies.
## Build Self-Contained binary file
The `Makefile` target `pgloader` knows how to produce a Self Contained
Binary file for pgloader, named `pgloader.exe`:
$ make pgloader
By default, the `Makefile` uses [SBCL](http://sbcl.org/) to compile your
binary image, though it's possible to also build using
[CCL](http://ccl.clozure.com/).
$ make CL=ccl pgloader
Note that the `Makefile` uses the `--compress-core` option when using SBCL,
that should be enabled in your local copy of `SBCL`. If that's not the case,
it's probably because you did compile and install `SBCL` yourself, so that
you have a decently recent version to use. Then you need to compile it with
the `--with-sb-core-compression` option.
You can also remove the `--compress-core` option that way:
$ make COMPRESS_CORE=no pgloader
The `--compress-core` is unique to SBCL, so not used when `CC` is different
from the `sbcl` value.
The `make pgloader` command when successful outputs a `./build/bin/pgloader`
file for you to use.
## Usage
Give as many command files that you need to pgloader:
$ ./build/bin/pgloader --help
$ ./build/bin/pgloader <file.load>
See the documentation file `pgloader.1.md` for details. You can compile that
file into a manual page or an HTML page thanks to the `ronn` application:
$ apt-get install ruby-ronn
$ make docs

View File

@ -80,12 +80,12 @@ simpler.
### performances ### performances
- some more parallelizing options - some more parallelizing options
- support for partitioning in pgloader itself - support for partitionning in pgloader itself
### UI ### UI
- add a web controller with pretty monitoring - add a web controler with pretty monitoring
- launch new jobs from the web controller - launch new jobs from the web controler
### crazy ideas ### crazy ideas

View File

@ -1,23 +1,24 @@
#!/usr/bin/env bash #!/usr/bin/env bash
SBCL_VERSION=2.2.5 sudo yum -y install yum-utils rpmdevtools @development-tools \
sbcl sqlite-devel zlib-devel
sudo yum -y install yum-utils rpmdevtools @"Development Tools" \ # SBCL 1.1.14
sqlite-devel zlib-devel # http://www.mikeivanov.com/post/66510551125/installing-sbcl-1-1-on-rhel-centos-systems
sudo yum -y groupinstall "Development Tools"
wget http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
sudo rpm -Uvh epel-release-6*.rpm
sudo yum install -y sbcl.x86_64
# SBCL 1.3, we'll overwrite the repo version of sbcl with a more recent one wget http://downloads.sourceforge.net/project/sbcl/sbcl/1.1.14/sbcl-1.1.14-source.tar.bz2
sudo yum -y install epel-release tar xfj sbcl-1.1.14-source.tar.bz2
sudo yum install -y sbcl.x86_64 --enablerepo=epel cd sbcl-1.1.14
./make.sh --with-sb-thread --with-sb-core-compression > /dev/null 2>&1
wget http://downloads.sourceforge.net/project/sbcl/sbcl/$SBCL_VERSION/sbcl-$SBCL_VERSION-source.tar.bz2
tar xfj sbcl-$SBCL_VERSION-source.tar.bz2
cd sbcl-$SBCL_VERSION
./make.sh --with-sb-thread --with-sb-core-compression --prefix=/usr > /dev/null 2>&1
sudo sh install.sh sudo sh install.sh
cd cd
# Missing dependencies # remove the old version that we used to compile the newer one.
sudo yum -y install freetds-devel sudo yum remove -y sbcl
# prepare the rpmbuild setup # prepare the rpmbuild setup
rpmdev-setuptree rpmdev-setuptree

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
sudo yum -y install yum-utils rpmdevtools @"Development Tools" \
sqlite-devel zlib-devel
# Enable epel for sbcl
sudo yum -y install epel-release
sudo yum -y install sbcl
# Missing dependency
sudo yum install freetds freetds-devel -y
sudo ln -s /usr/lib64/libsybdb.so.5 /usr/lib64/libsybdb.so
# prepare the rpmbuild setup
rpmdev-setuptree

View File

@ -27,12 +27,12 @@ sudo apt-key adv --recv-keys --keyserver keyserver.ubuntu.com 0xcbcb082a1bb943db
sudo add-apt-repository 'deb http://mirrors.linsrv.net/mariadb/repo/10.0/debian wheezy main' sudo add-apt-repository 'deb http://mirrors.linsrv.net/mariadb/repo/10.0/debian wheezy main'
sudo apt-get update sudo apt-get update
sudo apt-get install -y postgresql-15 \ sudo apt-get install -y postgresql-9.3 postgresql-contrib-9.3 \
postgresql-15-ip4r \ postgresql-9.3-ip4r \
sbcl \ sbcl \
git patch unzip \ git patch unzip \
devscripts pandoc \ devscripts pandoc \
freetds-dev libsqlite3-dev \ libsqlite3-dev \
gnupg gnupg-agent gnupg gnupg-agent
sudo DEBIAN_FRONTEND=noninteractive \ sudo DEBIAN_FRONTEND=noninteractive \
@ -40,9 +40,11 @@ sudo DEBIAN_FRONTEND=noninteractive \
# SBCL # SBCL
# #
# we used to need to backport SBCL, it's only the case now in wheezy, all # we need to backport SBCL from sid to have a recent enough version of the
# the later distributions are uptodate enough for our needs here. # compiler and run time we depend on
sudo apt-get -y install sbcl sudo apt-get -y build-dep sbcl
sudo apt-get source -b sbcl > /dev/null 2>&1 # too verbose
sudo dpkg -i *.deb
HBA=/etc/postgresql/9.3/main/pg_hba.conf HBA=/etc/postgresql/9.3/main/pg_hba.conf
echo "local all all trust" | sudo tee $HBA echo "local all all trust" | sudo tee $HBA

1
build/.gitignore vendored
View File

@ -2,5 +2,4 @@
* *
# Except this file # Except this file
!bin !bin
!bundle
!.gitignore !.gitignore

View File

@ -1,4 +0,0 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

View File

@ -1,70 +0,0 @@
# pgloader build tool for bundle tarball
# only supports SBCL
CL = sbcl
APP_NAME = pgloader
VERSION = %VERSION%
ifeq ($(OS),Windows_NT)
EXE = .exe
COMPRESS_CORE = no
DYNSIZE = 1024 # support for windows 32 bits
else
DYNSIZE = 16384
EXE =
endif
BUILDDIR = bin
BUILDAPP = $(BUILDDIR)/buildapp$(EXE)
PGLOADER = ./bin/pgloader
SRCDIR = local-projects/pgloader-$(VERSION)
BUILDAPP_OPTS = --require sb-posix \
--require sb-bsd-sockets \
--require sb-rotate-byte
CL_OPTS = --noinform --no-sysinit --no-userinit
COMPRESS_CORE ?= $(shell $(CL) --noinform \
--quit \
--eval '(when (member :sb-core-compression cl:*features*) (write-string "yes"))')
ifeq ($(COMPRESS_CORE),yes)
COMPRESS_CORE_OPT = --compress-core
endif
pgloader: $(PGLOADER) ;
buildapp: $(BUILDAPP) ;
$(BUILDAPP):
mkdir -p $(BUILDDIR)
$(CL) $(CL_OPTS) --load bundle.lisp \
--eval '(asdf:load-system :buildapp)' \
--eval '(buildapp:build-buildapp "$@")' \
--eval '(quit)'
$(PGLOADER): $(BUILDAPP)
$(BUILDAPP) --logfile /tmp/pgloader-bundle-build.log \
$(BUILDAPP_OPTS) \
--sbcl $(CL) \
--asdf-tree . \
--load-system cffi \
--load-system cl+ssl \
--load-system mssql \
--load $(SRCDIR)/src/hooks.lisp \
--load-system $(APP_NAME) \
--eval '(setf pgloader.params::*version-string* "$(VERSION)")' \
--entry pgloader:main \
--dynamic-space-size $(DYNSIZE) \
$(COMPRESS_CORE_OPT) \
--output $@.tmp
# that's ugly, but necessary when building on Windows :(
mv $@.tmp $@
test: $(PGLOADER)
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress
save:
sbcl --no-userinit --load ./save.lisp
check: test ;

View File

@ -1,26 +0,0 @@
# pgloader source bundle
In order to ease building pgloader for non-lisp users, the *bundle*
distribution is a tarball containing pgloader and its build dependencies.
See the the following documentation for more details:
<https://www.quicklisp.org/beta/bundles.html>
The *bundle* comes with a specific `Makefile` so that building it is as
simple as the following (which includes testing the resulting binary):
make
LANG=en_US.UTF-8 make test
The compilation might takes a while, it's because SBCL is trying hard to
generate run-time binary code that is fast and efficient. Yes you need to be
in a unicide environment to run the test suite, so that it matches with the
encoding of the test *.load files.
You can then package or use the pgloader binary:
./bin/pgloader --version
./bin/pgloader --help
Note that the SQLite test files are not included in the bundle, for weithing
too much here.

View File

@ -1,30 +0,0 @@
;;;
;;; Script used to prepare a pgloader bundle
;;;
;; fetch a list of recent candidates with
;; (subseq (ql-dist:available-versions (ql-dist:dist "quicklisp")) 0 5)
;;
;; the 2017-06-30 QL release is broken, avoid it.
;;
(defvar *ql-dist* :latest)
(defvar *ql-dist-url-format*
"http://beta.quicklisp.org/dist/quicklisp/~a/distinfo.txt")
(let ((pkgs (append '("pgloader" "buildapp")
(getf (read-from-string
(uiop:read-file-string
(uiop:merge-pathnames* "pgloader.asd" *pwd*)))
:depends-on)))
(dist (if (or (eq :latest *ql-dist*)
(string= "latest" *ql-dist*))
(cdr
;; available-versions is an alist of (date . url), and the
;; first one is the most recent one
(first
(ql-dist:available-versions (ql-dist:dist "quicklisp"))))
(format nil *ql-dist-url-format* *ql-dist*))))
(ql-dist:install-dist dist :prompt nil :replace t)
(ql:bundle-systems pkgs :to *bundle-dir*))
(quit)

View File

@ -1,47 +0,0 @@
;;;
;;; Create a build/bin/pgloader executable from the source code, using
;;; Quicklisp to load pgloader and its dependencies.
;;;
(in-package #:cl-user)
(require :asdf) ; should work in SBCL and CCL
(let* ((cwd (uiop:getcwd))
(bundle.lisp (uiop:merge-pathnames* "bundle.lisp" cwd))
(version-file (uiop:merge-pathnames* "version.sexp" cwd))
(version-string (uiop:read-file-form version-file))
(asdf:*central-registry* (list cwd)))
(format t "Loading bundle.lisp~%")
(load bundle.lisp)
(format t "Loading system pgloader ~a~%" version-string)
(asdf:load-system :pgloader :verbose nil)
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))
(let* ((pgl (find-package "PGLOADER"))
(version-symbol (find-symbol "*VERSION-STRING*" pgl)))
(setf (symbol-value version-symbol) version-string)))
(defun pgloader-image-main ()
(let ((argv #+sbcl sb-ext:*posix-argv*
#+ccl ccl:*command-line-argument-list*))
(pgloader::main argv)))
(let* ((cwd (uiop:getcwd))
(bin-dir (uiop:merge-pathnames* "bin/" cwd))
(bin-filename (uiop:merge-pathnames* "pgloader" bin-dir)))
(ensure-directories-exist bin-dir)
#+ccl
(ccl:save-application bin-filename
:toplevel-function #'cl-user::pgloader-image-main
:prepend-kernel t)
#+sbcl
(sb-ext:save-lisp-and-die bin-filename
:toplevel #'cl-user::pgloader-image-main
:executable t
:save-runtime-options t
:compression t))

View File

@ -1,4 +0,0 @@
[global]
tds version = 8.0
client charset = UTF-8

154
debian/changelog vendored
View File

@ -1,157 +1,3 @@
pgloader (3.6.10-2) unstable; urgency=medium
* Limit architectures to those that have sbcl available and working thread
support (notably, this excludes armel and armhf).
-- Christoph Berg <myon@debian.org> Fri, 22 Mar 2024 14:59:27 +0100
pgloader (3.6.10-1) unstable; urgency=medium
* New upstream version.
* Bump ip4r dependencies to 16. (Closes: #1052837)
-- Christoph Berg <myon@debian.org> Thu, 02 Nov 2023 17:44:07 +0100
pgloader (3.6.9-1) unstable; urgency=medium
* New upstream version.
* Bump ip4r dependencies to 15. (Closes: #1022296)
-- Christoph Berg <myon@debian.org> Mon, 24 Oct 2022 12:58:09 +0200
pgloader (3.6.8-1) unstable; urgency=medium
* New upstream version.
* Depend on libsqlite3-0.
-- Christoph Berg <myon@debian.org> Mon, 26 Sep 2022 14:24:02 +0200
pgloader (3.6.7-1) unstable; urgency=medium
* New upstream version:
* Set SBCL dynamic space size to 16 GB on 64 bit architectures.
* Improve documentation with command lines and defaults.
* SBCL compiler notes should not be fatal to pgloader.
-- Christoph Berg <myon@debian.org> Sat, 13 Aug 2022 10:32:41 +0200
pgloader (3.6.6-1) unstable; urgency=medium
* New upstream version.
* Run tests at build-time as well.
-- Christoph Berg <myon@debian.org> Mon, 27 Jun 2022 11:03:00 +0200
pgloader (3.6.4-1) unstable; urgency=medium
* New upstream version.
* debian/tests/testsuite: Run regression tests.
-- Christoph Berg <myon@debian.org> Fri, 24 Jun 2022 14:32:54 +0200
pgloader (3.6.3-1) unstable; urgency=medium
* New upstream version.
* Remove cl-pgloader, deprecated upstream.
* debian/tests/ssl: Force md5 auth if cl-postmodern is too old.
-- Christoph Berg <myon@debian.org> Tue, 21 Dec 2021 10:09:53 +0100
pgloader (3.6.2-1) unstable; urgency=medium
* New upstream version.
* debian/tests/ssl: Add --debug to get backtraces.
* debian/rules: Sync loaded systems with Makefile.
* debian/rules: Print actual compiler log.
* debian/rules: Skip dh_dwz like dh_strip as it fails on buster.
* Bump required cl-db3 version to 20200212.
* Note that we need cl-plus-ssl 20190204 or later.
* Note that we need cl-csv 20180712 or later.
* DH 13.
-- Christoph Berg <myon@debian.org> Tue, 14 Jul 2020 17:02:30 +0200
pgloader (3.6.1-1) unstable; urgency=medium
* New upstream version.
* SSL is always enabled now, drop our patch.
* Add B-D on python3-sphinx-rtd-theme.
-- Christoph Berg <christoph.berg@credativ.de> Mon, 21 Jan 2019 16:09:17 +0100
pgloader (3.5.2-3) unstable; urgency=medium
* Make cl-pgloader test depend on ca-certificates so the snakeoil
certificate is recognized as a valid CA. (Needs the /etc/ssl/certs/*.0
file.)
-- Christoph Berg <christoph.berg@credativ.de> Tue, 31 Jul 2018 16:24:03 +0200
pgloader (3.5.2-2) unstable; urgency=medium
* Install pgloader.asd into correct location. (Closes: #857226)
* Test cl-pgloader through sbcl --eval.
* Skip building and manpage generation in arch-indep builds.
-- Christoph Berg <myon@debian.org> Tue, 03 Jul 2018 22:51:48 +0200
pgloader (3.5.2-1) unstable; urgency=medium
* New upstream version.
* All included test data has been verified as free, stop building a +dfsg
tarball.
* debian/source/options: Ignore changes in src/params.lisp (release vs
non-release).
* Enable SSL in src/hooks.lisp.
* Run wrap-and-sort -st.
* Add new B-D cl-mustache, cl-yason, cl-zs3, sync Depends to cl-pgloader.
* Depend on the libssl version cl-plus-ssl depends on. (Closes: #864309)
* Build and install new sphinx docs instead.
* Build manpage using help2man.
* Priority: optional, move cl-pgloader to Section: lisp.
* Update S-V.
* Add watch file.
-- Christoph Berg <christoph.berg@credativ.de> Tue, 03 Jul 2018 16:59:07 +0200
pgloader (3.4.1+dfsg-1) unstable; urgency=medium
* New release, bugfixes and new features
-- Dimitri Fontaine <dim@tapoueh.org> Thu, 06 Jul 2017 16:51:53 +0300
pgloader (3.3.2+dfsg-1) unstable; urgency=medium
* Fixes github issue 453 (Closes: #843555)
* Maintenance release.
-- Dimitri Fontaine <dim@tapoueh.org> Sat, 03 Dec 2016 19:36:56 +0300
pgloader (3.3.1+dfsg-2) unstable; urgency=medium
* Add tzdata to build-depends (Closes: #839468)
-- Christoph Berg <christoph.berg@credativ.de> Thu, 03 Nov 2016 14:32:28 +0100
pgloader (3.3.1+dfsg-1) unstable; urgency=medium
* New release, bugfixes and new features
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 28 Aug 2016 21:07:47 +0300
pgloader (3.2.2+dfsg-1) unstable; urgency=medium
* New release, lots of bugfixes, some new features
* New build dependencies
-- Dimitri Fontaine <dim@tapoueh.org> Thu, 03 Sep 2015 19:17:12 +0300
pgloader (3.2.1~preview+dfsg-2) unstable; urgency=medium
* Interim bugfix release
-- Dimitri Fontaine <dim@tapoueh.org> Thu, 22 Jan 2015 04:06:51 +0400
pgloader (3.2.0+dfsg-1) unstable; urgency=medium pgloader (3.2.0+dfsg-1) unstable; urgency=medium
* Implement COPY files support * Implement COPY files support

2
debian/cl-pgloader.dirs vendored Normal file
View File

@ -0,0 +1,2 @@
usr/share/common-lisp/source/pgloader
usr/share/common-lisp/systems

2
debian/cl-pgloader.docs vendored Normal file
View File

@ -0,0 +1,2 @@
README.md
pgloader.1.md

3
debian/cl-pgloader.install vendored Normal file
View File

@ -0,0 +1,3 @@
pgloader.asd usr/share/common-lisp/source/simple-date
pgloader.lisp usr/share/common-lisp/source/pgloader
src usr/share/common-lisp/source/pgloader

1
debian/cl-pgloader.links vendored Normal file
View File

@ -0,0 +1 @@
usr/share/common-lisp/source/pgloader/pgloader.asd usr/share/common-lisp/systems/pgloader.asd

1
debian/clean vendored
View File

@ -1 +0,0 @@
buildapp.*

1
debian/compat vendored Normal file
View File

@ -0,0 +1 @@
8

86
debian/control vendored
View File

@ -1,74 +1,34 @@
Source: pgloader Source: pgloader
Section: database Section: database
Priority: optional Priority: extra
Maintainer: Dimitri Fontaine <dim@tapoueh.org> Maintainer: Dimitri Fontaine <dim@tapoueh.org>
Uploaders: Uploaders: Christoph Berg <myon@debian.org>
Christoph Berg <myon@debian.org>, Build-Depends: debhelper (>= 8.0.0), sbcl (>= 1.1.13), ruby-ronn, buildapp (>= 1.5), cl-asdf (>= 3.0.3), cl-log, cl-postmodern, cl-simple-date, cl-qmynd, cl-split-sequence, cl-unicode, cl-interpol, cl-csv, cl-fad, cl-lparallel, cl-esrap, cl-alexandria, cl-drakma, cl-flexi-streams, cl-usocket, cl-local-time, cl-command-line-arguments, cl-abnf, cl-db3, cl-py-configparser, cl-sqlite, cl-trivial-backtrace, cl-markdown, cl-md5, cl-asdf-finalizers, cl-asdf-system-connections, cl-cffi (>= 1:0.12.0), cl-ixf, gawk, cl-bordeaux-threads (>= 0.8.3), cl-metabang-bind, cl-mssql, cl-uuid, cl-trivial-utf-8
Build-Depends: Standards-Version: 3.9.6
buildapp (>= 1.5),
cl-abnf,
cl-alexandria,
cl-asdf (>= 3.0.3),
cl-asdf-finalizers,
cl-asdf-system-connections,
cl-bordeaux-threads (>= 0.8.3),
cl-cffi (>= 1:0.12.0),
cl-command-line-arguments,
cl-csv (>= 20180712),
cl-db3 (>= 20200212),
cl-drakma,
cl-esrap,
cl-fad,
cl-flexi-streams,
cl-interpol,
cl-ixf,
cl-local-time,
cl-log,
cl-lparallel,
cl-markdown,
cl-md5,
cl-metabang-bind,
cl-mssql,
cl-mustache,
cl-plus-ssl (>= 20190204),
cl-postmodern,
cl-ppcre,
cl-py-configparser,
cl-qmynd,
cl-quri,
cl-simple-date,
cl-split-sequence,
cl-sqlite,
cl-trivial-backtrace,
cl-trivial-utf-8,
cl-unicode,
cl-usocket,
cl-utilities,
cl-uuid,
cl-yason,
cl-zs3,
debhelper-compat (= 13),
gawk,
help2man,
libsqlite3-dev,
postgresql-16-ip4r <!nocheck> | postgresql-ip4r <!nocheck>,
python3-sphinx,
python3-sphinx-rtd-theme,
sbcl (>= 1.1.13),
tzdata,
Standards-Version: 4.6.0
Homepage: https://github.com/dimitri/pgloader Homepage: https://github.com/dimitri/pgloader
Vcs-Git: https://github.com/dimitri/pgloader.git Vcs-Git: https://github.com/dimitri/pgloader.git
Vcs-Browser: https://github.com/dimitri/pgloader Vcs-Browser: https://github.com/dimitri/pgloader
Package: pgloader Package: pgloader
Architecture: amd64 arm64 i386 ppc64el powerpc ppc64 Architecture: any
Depends: Depends: ${shlibs:Depends}, ${misc:Depends}, freetds-dev
freetds-dev, Description: extract, transform and load data into PostgreSQL
${misc:Depends}, pgloader imports data from different kind of sources and COPY it into
${shlibs:Depends}, PostgreSQL.
${sqlite:Depends}, .
${ssl:Depends}, The command language is described in the manual page and allows one to
describe where to find the data source, its format, and to describe data
processing and transformation.
.
Supported source formats include CSV, fixed width flat files, dBase3 files
(DBF), and SQLite and MySQL databases. In most of those formats, pgloader
is able to auto-discover the schema and create the tables and the indexes
in PostgreSQL. In the MySQL case it's possible to edit CASTing rules from
the pgloader command directly.
Package: cl-pgloader
Architecture: all
Depends: ${misc:Depends}, cl-asdf (>= 3.0.3), cl-log, cl-postmodern, cl-simple-date, cl-qmynd, cl-split-sequence, cl-unicode, cl-interpol, cl-csv, cl-fad, cl-lparallel, cl-esrap, cl-alexandria, cl-drakma, cl-flexi-streams, cl-usocket, cl-local-time, cl-command-line-arguments, cl-abnf, cl-db3, cl-py-configparser, cl-sqlite, cl-trivial-backtrace, cl-markdown, cl-md5, cl-asdf-finalizers, cl-asdf-system-connections, cl-cffi (>= 1:0.12.0), cl-bordeaux-threads (>= 0.8.3), cl-metabang-bind, cl-uuid, cl-trivial-utf-8
Description: extract, transform and load data into PostgreSQL Description: extract, transform and load data into PostgreSQL
pgloader imports data from different kind of sources and COPY it into pgloader imports data from different kind of sources and COPY it into
PostgreSQL. PostgreSQL.

74
debian/copyright vendored
View File

@ -20,76 +20,4 @@ License: PostgreSQL
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON
AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Files: test/sqlite/Chinook*
Copyright: Copyright (c) 2008-2017 Luis Rocha
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS
IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Files: test/data/2013_Gaz_113CDs_national.txt
Copyright: public domain
License: us-public-domain
All U.S. Census Bureau materials, regardless of the media, are entirely in the
public domain. There are no user fees, site licenses, or any special agreements
etc for the public or private use, and or reuse of any census title. As tax
funded product, it's all in the public record.
Files: test/data/reg2013.dbf
Copyright: public comain
License: fr-public-domain
Les publications et données mises à disposition sur le présent site sont
consultables et téléchargeables gratuitement. Sauf spécification contraire,
elles peuvent être réutilisées, y compris à des fins commerciales, sans licence
et sans versement de redevances autres que celles collectées par les sociétés
de perception et de répartition des droits d'auteur régies par le titre II du
livre III du code de la propriété intellectuelle. La réutilisation est
toutefois subordonnée au respect de l'intégrité de l'information et des données
et à la mention précise des sources.
.
https://www.insee.fr/fr/information/2008466
Files: test/data/sakila-db.zip
Copyright: Copyright © 2007, 2018, Oracle and/or its affiliates. All rights reserved.
License: new-bsd-license
The contents of the sakila-schema.sql and sakila-data.sql files are licensed
under the New BSD license.
.
Information on the New BSD license can be found at
http://www.opensource.org/licenses/bsd-license.php and
http://en.wikipedia.org/wiki/BSD_License.
.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
.
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,14 +0,0 @@
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -92,11 +92,6 @@ html_theme = 'sphinx_rtd_theme'
#
# html_theme_options = {}
html_theme_options = {
- 'github_user': 'dimitri',
- 'github_repo': 'pgloader',
- 'description': 'your migration companion',
- 'travis_button': True,
- 'show_related': True,
#'sidebar_collapse': False,
}

View File

@ -1 +0,0 @@
#bionic-theme-options

View File

@ -1,2 +1,3 @@
README.md README.md
docs/_build/html pgloader.1.md
web/src/*.md

1
debian/pgloader.manpages vendored Normal file
View File

@ -0,0 +1 @@
pgloader.1

98
debian/rules vendored
View File

@ -1,87 +1,55 @@
#!/usr/bin/make -f #!/usr/bin/make -f
# -*- makefile -*-
# Sample debian/rules that uses debhelper.
# This file was originally written by Joey Hess and Craig Small.
# As a special exception, when this file is copied by dh-make into a
# dh-make output file, you may use that output file without restriction.
# This special exception was added by Craig Small in version 0.37 of dh-make.
include /usr/share/dpkg/pkg-info.mk # Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# get libsqlite3 package name from libsqlite3-dev PKGVERS = $(shell dpkg-parsechangelog | awk -F '[:-]' '/^Version:/ { print substr($$2, 2) }')
LIBSQLITE := $(shell dpkg-query --showformat='$${Depends}' --show libsqlite3-dev | grep -o 'libsqlite[^ ]*') EXCLUDE = --exclude-vcs --exclude=debian --exclude=build --exclude=.vagrant
# make pgloader depend on the libssl package cl-plus-ssl depends on
LIBSSL := $(shell dpkg-query --showformat='$${Depends}' --show cl-plus-ssl | grep -o 'libssl[^ ]*')
BITS = $(shell dpkg-architecture -qDEB_BUILD_ARCH_BITS) BITS = $(shell dpkg-architecture -qDEB_BUILD_ARCH_BITS)
ifeq ($(BITS),32) ifeq ($(BITS),32)
SIZE=1024 SIZE=1024
else else
SIZE=16384 SIZE=4096
endif endif
MAKEFILE_VERSION = $(shell awk '/^VERSION/ { print $$3 }' Makefile)
DOC_VERSION = $(shell awk '/^release/ { print $$3 }' docs/conf.py | tr -d "'")
SPECFILE_VERSION = $(shell awk '/^Version/ { print $$2 }' pgloader.spec)
DEBIAN_VERSION = $(shell dpkg-parsechangelog -SVersion | cut -d- -f 1)
PGLOADER_MAJOR_VERSION = $(shell awk '/^.defparameter .major-version/ { print $$3 }' src/params.lisp | grep -Eo '[0-9.]+')
PGLOADER_MINOR_VERSION = $(shell awk '/^.defparameter .minor-version/ { print $$3 }' src/params.lisp | grep -Eo '[0-9.]+')
# buildd provides a build environment where $HOME is not writable, but the # buildd provides a build environment where $HOME is not writable, but the
# CL compilers here will need to fill-in a per-user cache # CL compilers here will need to fill-in a per-user cache
export HOME = $(CURDIR)/debian/home export HOME = $(CURDIR)/debian/home
override_dh_auto_clean: orig: clean
dh_auto_clean rm -rf $(HOME)
rm -rf debian/home cd .. && tar czf pgloader_$(PKGVERS).orig.tar.gz $(EXCLUDE) pgloader
# sanity checks on version number
[ "$(MAKEFILE_VERSION)" = "$(DOC_VERSION)" ] # Makefile = docs/conf.py version
[ "$(MAKEFILE_VERSION)" = "$(SPECFILE_VERSION)" ] # Makefile = pgloader.spec version
[ "$(MAKEFILE_VERSION)" = "$(DEBIAN_VERSION)" ] # Makefile = debian/changelog version
[ "$(MAKEFILE_VERSION)" = "$(PGLOADER_MAJOR_VERSION).$(PGLOADER_MINOR_VERSION)" ] # Makefile = src/params.lisp version
override_dh_auto_build-indep: override_dh_auto_build:
# do nothing make docs
override_dh_auto_build-arch:
mkdir -p build/bin mkdir -p build/bin
mkdir -p $(HOME) mkdir -p $(HOME)
buildapp --require sb-posix \ buildapp --require sb-posix \
--require sb-bsd-sockets \ --require sb-bsd-sockets \
--load /usr/share/common-lisp/source/cl-asdf/build/asdf.lisp \ --load /usr/share/common-lisp/source/cl-asdf/build/asdf.lisp \
--asdf-path . \ --asdf-path . \
--asdf-tree /usr/share/common-lisp/systems \ --asdf-tree /usr/share/common-lisp/systems \
--load-system asdf-finalizers \ --load-system asdf-finalizers \
--load-system asdf-system-connections \ --load-system asdf-system-connections \
--load-system cffi \ --load-system pgloader \
--load-system cl+ssl \ --load src/hooks.lisp \
--load-system mssql \ --entry pgloader:main \
--load src/hooks.lisp \ --dynamic-space-size $(SIZE) \
--load-system pgloader \ --compress-core \
--entry pgloader:main \ --output build/bin/pgloader
--dynamic-space-size $(SIZE) \
--compress-core \
--logfile buildapp.log \
--output build/bin/pgloader \
|| echo $$? > buildapp.fail
cat buildapp.log
test ! -f buildapp.fail
ls -l build/bin/pgloader
$(MAKE) -C docs html
override_dh_auto_test: override_dh_auto_test:
PATH=$(CURDIR)/build/bin:$(PATH) debian/tests/testsuite # no nothing
override_dh_strip override_dh_dwz: override_dh_strip:
# do nothing, sbcl doesn't write any debug info # do nothing
override_dh_installman-arch:
mkdir -p debian/pgloader/usr/share/man/man1/
PATH=debian/pgloader/usr/bin:$(PATH) \
help2man --version-string $(DEB_VERSION_UPSTREAM) \
--no-info \
--name "extract, transform and load data into PostgreSQL" \
pgloader > \
debian/pgloader/usr/share/man/man1/pgloader.1
override_dh_gencontrol:
dh_gencontrol -- \
-V"sqlite:Depends=$(LIBSQLITE)" \
-V"ssl:Depends=$(LIBSSL)"
%: %:
dh $@ dh $@

View File

@ -1,2 +0,0 @@
# ignore release/non-release status
extend-diff-ignore=src/params.lisp

13
debian/tests/control vendored
View File

@ -1,13 +0,0 @@
Depends:
ca-certificates,
cl-postmodern,
pgloader,
postgresql,
Tests: ssl
Restrictions: allow-stderr, needs-root
Depends:
pgloader,
postgresql-16-ip4r | postgresql-ip4r,
Tests: testsuite
Restrictions: allow-stderr

34
debian/tests/ssl vendored
View File

@ -1,34 +0,0 @@
#!/bin/sh
# test needs root so we have a SSL certificate
set -eux
trap "rm -rf /tmp/pgloader" EXIT
# check if cl-postmodern is new enough to support scram-sha-256
postmodern=$(dpkg-query --show --showformat='${Version}' cl-postmodern)
if dpkg --compare-versions "$postmodern" lt 20200101; then
AUTH="-i--auth-local=trust -i--auth-host=md5"
fi
pg_virtualenv ${AUTH:-} <<-'EOF'
set -eux
# force SSL connection
HBA=$(psql -XAtc 'SHOW hba_file')
sed -i -e 's/^host/hostssl/' $HBA
psql -XAtc 'SELECT pg_reload_conf()'
createdb pgloader
export PGDATABASE=pgloader
psql -XAtc 'create schema expected'
# test UNIX socket
rm -rf /tmp/pgloader
PGHOST=/var/run/postgresql su -c 'pgloader --debug --regress test/allcols.load' postgres
# test SSL connection
rm -rf /tmp/pgloader
PGSSLMODE=require pgloader --debug --regress test/allcols.load
EOF

View File

@ -1,11 +0,0 @@
#!/bin/sh
set -eux
case $USER in
root) PGSUPERUSER=postgres ;;
*) PGSUPERUSER=$USER ;;
esac
# use trust authentication to avoid scram failures on bullseye/buster/stretch/impish/focal/bionic
PGLOADER=pgloader PGSUPERUSER=$PGSUPERUSER pg_virtualenv -i'-Atrust' make -C test prepare regress

2
debian/watch vendored
View File

@ -1,2 +0,0 @@
version=4
https://github.com/dimitri/pgloader/tags .*/v(.*).tar.gz

View File

@ -1 +0,0 @@
pgloader.org

View File

@ -1,20 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = pgloader
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -1,123 +0,0 @@
Batch Processing
================
To load data to PostgreSQL, pgloader uses the `COPY` streaming protocol.
While this is the faster way to load data, `COPY` has an important drawback:
as soon as PostgreSQL emits an error with any bit of data sent to it,
whatever the problem is, the whole data set is rejected by PostgreSQL.
To work around that, pgloader cuts the data into *batches* of 25000 rows
each, so that when a problem occurs it's only impacting that many rows of
data. Each batch is kept in memory while the `COPY` streaming happens, in
order to be able to handle errors should some happen.
When PostgreSQL rejects the whole batch, pgloader logs the error message
then isolates the bad row(s) from the accepted ones by retrying the batched
rows in smaller batches. To do that, pgloader parses the *CONTEXT* error
message from the failed COPY, as the message contains the line number where
the error was found in the batch, as in the following example::
CONTEXT: COPY errors, line 3, column b: "2006-13-11"
Using that information, pgloader will reload all rows in the batch before
the erroneous one, log the erroneous one as rejected, then try loading the
remaining of the batch in a single attempt, which may or may not contain
other erroneous data.
At the end of a load containing rejected rows, you will find two files in
the *root-dir* location, under a directory named the same as the target
database of your setup. The filenames are the target table, and their
extensions are `.dat` for the rejected data and `.log` for the file
containing the full PostgreSQL client side logs about the rejected data.
The `.dat` file is formatted in PostgreSQL the text COPY format as documented
in `http://www.postgresql.org/docs/9.2/static/sql-copy.html#AEN66609`.
It is possible to use the following WITH options to control pgloader batch
behavior:
- *on error stop*, *on error resume next*
This option controls if pgloader is using building batches of data at
all. The batch implementation allows pgloader to recover errors by
sending the data that PostgreSQL accepts again, and by keeping away the
data that PostgreSQL rejects.
To enable retrying the data and loading the good parts, use the option
*on error resume next*, which is the default to file based data loads
(such as CSV, IXF or DBF).
When migrating from another RDMBS technology, it's best to have a
reproducible loading process. In that case it's possible to use *on
error stop* and fix either the casting rules, the data transformation
functions or in cases the input data until your migration runs through
completion. That's why *on error resume next* is the default for SQLite,
MySQL and MS SQL source kinds.
A Note About Performance
------------------------
pgloader has been developed with performance in mind, to be able to cope
with ever growing needs in loading large amounts of data into PostgreSQL.
The basic architecture it uses is the old Unix pipe model, where a thread is
responsible for loading the data (reading a CSV file, querying MySQL, etc)
and fills pre-processed data into a queue. Another threads feeds from the
queue, apply some more *transformations* to the input data and stream the
end result to PostgreSQL using the COPY protocol.
When given a file that the PostgreSQL `COPY` command knows how to parse, and
if the file contains no erroneous data, then pgloader will never be as fast
as just using the PostgreSQL `COPY` command.
Note that while the `COPY` command is restricted to read either from its
standard input or from a local file on the server's file system, the command
line tool `psql` implements a `\copy` command that knows how to stream a
file local to the client over the network and into the PostgreSQL server,
using the same protocol as pgloader uses.
A Note About Parallelism
------------------------
pgloader uses several concurrent tasks to process the data being loaded:
- a reader task reads the data in and pushes it to a queue,
- at last one write task feeds from the queue and formats the raw into the
PostgreSQL COPY format in batches (so that it's possible to then retry a
failed batch without reading the data from source again), and then sends
the data to PostgreSQL using the COPY protocol.
The parameter *workers* allows to control how many worker threads are
allowed to be active at any time (that's the parallelism level); and the
parameter *concurrency* allows to control how many tasks are started to
handle the data (they may not all run at the same time, depending on the
*workers* setting).
We allow *workers* simultaneous workers to be active at the same time in the
context of a single table. A single unit of work consist of several kinds of
workers:
- a reader getting raw data from the source,
- N writers preparing and sending the data down to PostgreSQL.
The N here is setup to the *concurrency* parameter: with a *CONCURRENCY* of
2, we start (+ 1 2) = 3 concurrent tasks, with a *concurrency* of 4 we start
(+ 1 4) = 5 concurrent tasks, of which only *workers* may be active
simultaneously.
The defaults are `workers = 4, concurrency = 1` when loading from a database
source, and `workers = 8, concurrency = 2` when loading from something else
(currently, a file). Those defaults are arbitrary and waiting for feedback
from users, so please consider providing feedback if you play with the
settings.
As the `CREATE INDEX` threads started by pgloader are only waiting until
PostgreSQL is done with the real work, those threads are *NOT* counted into
the concurrency levels as detailed here.
By default, as many `CREATE INDEX` threads as the maximum number of indexes
per table are found in your source schema. It is possible to set the `max
parallel create index` *WITH* option to another number in case there's just
too many of them to create.

View File

@ -1,49 +0,0 @@
Reporting Bugs
==============
pgloader is a software and as such contains bugs. Most bugs are easy to
solve and taken care of in a short delay. For this to be possible though,
bug reports need to follow those recommandations:
- include pgloader version,
- include problematic input and output,
- include a description of the output you expected,
- explain the difference between the ouput you have and the one you expected,
- include a self-reproducing test-case
Test Cases to Reproduce Bugs
----------------------------
Use the *inline* source type to help reproduce a bug, as in the pgloader tests::
LOAD CSV
FROM INLINE
INTO postgresql://dim@localhost/pgloader?public."HS"
WITH truncate,
fields terminated by '\t',
fields not enclosed,
fields escaped by backslash-quote,
quote identifiers
SET work_mem to '128MB',
standard_conforming_strings to 'on',
application_name to 'my app name'
BEFORE LOAD DO
$$ create extension if not exists hstore; $$,
$$ drop table if exists "HS"; $$,
$$ CREATE TABLE "HS"
(
id serial primary key,
kv hstore
)
$$;
1 email=>foo@example.com,a=>b
2 test=>value
3 a=>b,c=>"quoted hstore value",d=>other
4 baddata

View File

@ -1,380 +0,0 @@
Command Syntax
==============
pgloader implements a Domain Specific Language allowing to setup complex
data loading scripts handling computed columns and on-the-fly sanitization
of the input data. For more complex data loading scenarios, you will be
required to learn that DSL's syntax. It's meant to look familiar to DBA by
being inspired by SQL where it makes sense, which is not that much after
all.
The pgloader commands follow the same global grammar rules. Each of them
might support only a subset of the general options and provide specific
options.
::
LOAD <source-type>
FROM <source-url>
[ HAVING FIELDS <source-level-options> ]
INTO <postgresql-url>
[ TARGET TABLE [ "<schema>" ]."<table name>" ]
[ TARGET COLUMNS <columns-and-options> ]
[ WITH <load-options> ]
[ SET <postgresql-settings> ]
[ BEFORE LOAD [ DO <sql statements> | EXECUTE <sql file> ] ... ]
[ AFTER LOAD [ DO <sql statements> | EXECUTE <sql file> ] ... ]
;
The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each
command implements. Some command then implement the `SET` command, or some
specific clauses such as the `CAST` clause.
.. _common_clauses:
Command Clauses
---------------
The pgloader command syntax allows composing CLAUSEs together. Some clauses
are specific to the FROM source-type, most clauses are always available.
FROM
----
The *FROM* clause specifies where to read the data from, and each command
introduces its own variant of sources. For instance, the *CSV* source
supports `inline`, `stdin`, a filename, a quoted filename, and a *FILENAME
MATCHING* clause (see above); whereas the *MySQL* source only supports a
MySQL database URI specification.
INTO
----
The PostgreSQL connection URI must contains the name of the target table
where to load the data into. That table must have already been created in
PostgreSQL, and the name might be schema qualified.
Then *INTO* option also supports an optional comma separated list of target
columns, which are either the name of an input *field* or the white space
separated list of the target column name, its PostgreSQL data type and a
*USING* expression.
The *USING* expression can be any valid Common Lisp form and will be read
with the current package set to `pgloader.transforms`, so that you can use
functions defined in that package, such as functions loaded dynamically with
the `--load` command line parameter.
Each *USING* expression is compiled at runtime to native code.
This feature allows pgloader to load any number of fields in a CSV file into
a possibly different number of columns in the database, using custom code
for that projection.
WITH
----
Set of options to apply to the command, using a global syntax of either:
- *key = value*
- *use option*
- *do not use option*
See each specific command for details.
All data sources specific commands support the following options:
- *on error stop*, *on error resume next*
- *batch rows = R*
- *batch size = ... MB*
- *prefetch rows = ...*
See the section BATCH BEHAVIOUR OPTIONS for more details.
In addition, the following settings are available:
- *workers = W*
- *concurrency = C*
- *max parallel create index = I*
See section A NOTE ABOUT PARALLELISM for more details.
SET
---
This clause allows to specify session parameters to be set for all the
sessions opened by pgloader. It expects a list of parameter name, the equal
sign, then the single-quoted value as a comma separated list.
The names and values of the parameters are not validated by pgloader, they
are given as-is to PostgreSQL.
BEFORE LOAD DO
--------------
You can run SQL queries against the database before loading the data from
the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT EXISTS` so
that the data can be loaded.
Each command must be *dollar-quoted*: it must begin and end with a double
dollar sign, `$$`. Dollar-quoted queries are then comma separated. No extra
punctuation is expected after the last SQL query.
BEFORE LOAD EXECUTE
-------------------
Same behaviour as in the *BEFORE LOAD DO* clause. Allows you to read the SQL
queries from a SQL file. Implements support for PostgreSQL dollar-quoting
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
they are the same thing).
AFTER LOAD DO
-------------
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
section are executed once the load is done. That's the right time to create
indexes and constraints, or re-enable triggers.
AFTER LOAD EXECUTE
------------------
Same behaviour as in the *AFTER LOAD DO* clause. Allows you to read the SQL
queries from a SQL file. Implements support for PostgreSQL dollar-quoting
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
they are the same thing).
AFTER CREATE SCHEMA DO
----------------------
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
section are executed once the schema has been created by pgloader, and
before the data is loaded. It's the right time to ALTER TABLE or do some
custom implementation on-top of what pgloader does, like maybe partitioning.
AFTER CREATE SCHEMA EXECUTE
---------------------------
Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read
the SQL queries from a SQL file. Implements support for PostgreSQL
dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch
mode (where they are the same thing).
Connection String
-----------------
The `<postgresql-url>` parameter is expected to be given as a *Connection URI*
as documented in the PostgreSQL documentation at
http://www.postgresql.org/docs/9.3/static/libpq-connect.html#LIBPQ-CONNSTRING.
::
postgresql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
Where:
- *user*
Can contain any character, including colon (`:`) which must then be
doubled (`::`) and at-sign (`@`) which must then be doubled (`@@`).
When omitted, the *user* name defaults to the value of the `PGUSER`
environment variable, and if it is unset, the value of the `USER`
environment variable.
- *password*
Can contain any character, including the at sign (`@`) which must then
be doubled (`@@`). To leave the password empty, when the *user* name
ends with at at sign, you then have to use the syntax user:@.
When omitted, the *password* defaults to the value of the `PGPASSWORD`
environment variable if it is set, otherwise the password is left
unset.
When no *password* is found either in the connection URI nor in the
environment, then pgloader looks for a `.pgpass` file as documented at
https://www.postgresql.org/docs/current/static/libpq-pgpass.html. The
implementation is not that of `libpq` though. As with `libpq` you can
set the environment variable `PGPASSFILE` to point to a `.pgpass` file,
and pgloader defaults to `~/.pgpass` on unix like systems and
`%APPDATA%\postgresql\pgpass.conf` on windows. Matching rules and syntax
are the same as with `libpq`, refer to its documentation.
- *netloc*
Can be either a hostname in dotted notation, or an ipv4, or an Unix
domain socket path. Empty is the default network location, under a
system providing *unix domain socket* that method is preferred, otherwise
the *netloc* default to `localhost`.
It's possible to force the *unix domain socket* path by using the syntax
`unix:/path/to/where/the/socket/file/is`, so to force a non default
socket path and a non default port, you would have:
postgresql://unix:/tmp:54321/dbname
The *netloc* defaults to the value of the `PGHOST` environment
variable, and if it is unset, to either the default `unix` socket path
when running on a Unix system, and `localhost` otherwise.
Socket path containing colons are supported by doubling the colons
within the path, as in the following example:
postgresql://unix:/tmp/project::region::instance:5432/dbname
- *dbname*
Should be a proper identifier (letter followed by a mix of letters,
digits and the punctuation signs comma (`,`), dash (`-`) and underscore
(`_`).
When omitted, the *dbname* defaults to the value of the environment
variable `PGDATABASE`, and if that is unset, to the *user* value as
determined above.
- *options*
The optional parameters must be supplied with the form `name=value`, and
you may use several parameters by separating them away using an
ampersand (`&`) character.
Only some options are supported here, *tablename* (which might be
qualified with a schema name) *sslmode*, *host*, *port*, *dbname*,
*user* and *password*.
The *sslmode* parameter values can be one of `disable`, `allow`,
`prefer` or `require`.
For backward compatibility reasons, it's possible to specify the
*tablename* option directly, without spelling out the `tablename=`
parts.
The options override the main URI components when both are given, and
using the percent-encoded option parameters allow using passwords
starting with a colon and bypassing other URI components parsing
limitations.
Regular Expressions
-------------------
Several clauses listed in the following accept *regular expressions* with
the following input rules:
- A regular expression begins with a tilde sign (`~`),
- is then followed with an opening sign,
- then any character is allowed and considered part of the regular
expression, except for the closing sign,
- then a closing sign is expected.
The opening and closing sign are allowed by pair, here's the complete list
of allowed delimiters::
~//
~[]
~{}
~()
~<>
~""
~''
~||
~##
Pick the set of delimiters that don't collide with the *regular expression*
you're trying to input. If your expression is such that none of the
solutions allow you to enter it, the places where such expressions are
allowed should allow for a list of expressions.
Comments
--------
Any command may contain comments, following those input rules:
- the `--` delimiter begins a comment that ends with the end of the
current line,
- the delimiters `/*` and `*/` respectively start and end a comment, which
can be found in the middle of a command or span several lines.
Any place where you could enter a *whitespace* will accept a comment too.
Batch behaviour options
-----------------------
All pgloader commands have support for a *WITH* clause that allows for
specifying options. Some options are generic and accepted by all commands,
such as the *batch behaviour options*, and some options are specific to a
data source kind, such as the CSV *skip header* option.
The global batch behaviour options are:
- *batch rows*
Takes a numeric value as argument, used as the maximum number of rows
allowed in a batch. The default is `25 000` and can be changed to try
having better performance characteristics or to control pgloader memory
usage;
- *batch size*
Takes a memory unit as argument, such as *20 MB*, its default value.
Accepted multipliers are *kB*, *MB*, *GB*, *TB* and *PB*. The case is
important so as not to be confused about bits versus bytes, we're only
talking bytes here.
- *prefetch rows*
Takes a numeric value as argument, defaults to `100000`. That's the
number of rows that pgloader is allowed to read in memory in each reader
thread. See the *workers* setting for how many reader threads are
allowed to run at the same time.
Other options are specific to each input source, please refer to specific
parts of the documentation for their listing and covering.
A batch is then closed as soon as either the *batch rows* or the *batch
size* threshold is crossed, whichever comes first. In cases when a batch has
to be closed because of the *batch size* setting, a *debug* level log
message is printed with how many rows did fit in the *oversized* batch.
Templating with Mustache
------------------------
pgloader implements the https://mustache.github.io/ templating system so
that you may have dynamic parts of your commands. See the documentation for
this template system online.
A specific feature of pgloader is the ability to fetch a variable from the
OS environment of the pgloader process, making it possible to run pgloader
as in the following example::
$ DBPATH=sqlite/sqlite.db pgloader ./test/sqlite-env.load
or in several steps::
$ export DBPATH=sqlite/sqlite.db
$ pgloader ./test/sqlite-env.load
The variable can then be used in a typical mustache fashion::
load database
from '{{DBPATH}}'
into postgresql:///pgloader;
It's also possible to prepare a INI file such as the following::
[pgloader]
DBPATH = sqlite/sqlite.db
And run the following command, feeding the INI values as a *context* for
pgloader templating system::
$ pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load
The mustache templates implementation with OS environment support replaces
former `GETENV` implementation, which didn't work anyway.

View File

@ -1,118 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# pgloader documentation build configuration file, created by
# sphinx-quickstart on Tue Dec 5 19:23:32 2017.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'pgloader'
copyright = '2005-2022, Dimitri Fontaine'
author = 'Dimitri Fontaine'
version = '3.6'
release = '3.6.10'
# -- General configuration ------------------------------------------------
# The master toctree document.
master_doc = 'index'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
#html_theme = 'alabaster'
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
#html_static_path = ['_static']
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pgloader.tex', 'pgloader Documentation',
'Dimitri Fontaine', 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pgloader', 'pgloader Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pgloader', 'pgloader Documentation',
author, 'pgloader', 'One line description of project.',
'Miscellaneous'),
]

View File

@ -1,296 +0,0 @@
.. pgloader documentation master file, created by
sphinx-quickstart on Tue Dec 5 19:23:32 2017.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to pgloader's documentation!
====================================
The `pgloader`__ project is an Open Source Software project. The development
happens at `https://github.com/dimitri/pgloader`__ and is public: everyone
is welcome to participate by opening issues, pull requests, giving feedback,
etc.
__ https://github.com/dimitri/pgloader
__ https://github.com/dimitri/pgloader
pgloader loads data from various sources into PostgreSQL. It can transform
the data it reads on the fly and submit raw SQL before and after the
loading. It uses the `COPY` PostgreSQL protocol to stream the data into the
server, and manages errors by filling a pair of *reject.dat* and
*reject.log* files.
Thanks to being able to load data directly from a database source, pgloader
also supports from migrations from other productions to PostgreSQL. In this
mode of operations, pgloader handles both the schema and data parts of the
migration, in a single unmanned command, allowing to implement **Continuous
Migration**.
Features Overview
=================
pgloader has two modes of operation: loading from files, migrating
databases. In both cases, pgloader uses the PostgreSQL COPY protocol which
implements a **streaming** to send data in a very efficient way.
Loading file content in PostgreSQL
----------------------------------
When loading from files, pgloader implements the following features:
Many source formats supported
Support for a wide variety of file based formats are included in
pgloader: the CSV family, fixed columns formats, dBase files (``db3``),
and IBM IXF files.
The SQLite database engine is accounted for in the next section:
pgloader considers SQLite as a database source and implements schema
discovery from SQLite catalogs.
On the fly data transformation
Often enough the data as read from a CSV file (or another format) needs
some tweaking and clean-up before being sent to PostgreSQL.
For instance in the `geolite
<https://github.com/dimitri/pgloader/blob/master/test/archive.load>`_
example we can see that integer values are being rewritten as IP address
ranges, allowing to target an ``ip4r`` column directly.
Full Field projections
pgloader supports loading data into less fields than found on file, or
more, doing some computation on the data read before sending it to
PostgreSQL.
Reading files from an archive
Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the
archive is extracted in a temporary directly and expanded files are then
loaded.
HTTP(S) support
pgloader knows how to download a source file or a source archive using
HTTP directly. It might be better to use ``curl -O- http://... |
pgloader`` and read the data from *standard input*, then allowing for
streaming of the data from its source down to PostgreSQL.
Target schema discovery
When loading in an existing table, pgloader takes into account the
existing columns and may automatically guess the CSV format for you.
On error stop / On error resume next
In some cases the source data is so damaged as to be impossible to
migrate in full, and when loading from a file then the default for
pgloader is to use ``on error resume next`` option, where the rows
rejected by PostgreSQL are saved away and the migration continues with
the other rows.
In other cases loading only a part of the input data might not be a
great idea, and in such cases it's possible to use the ``on error stop``
option.
Pre/Post SQL commands
This feature allows pgloader commands to include SQL commands to run
before and after loading a file. It might be about creating a table
first, then loading the data into it, and then doing more processing
on-top of the data (implementing an *ELT* pipeline then), or creating
specific indexes as soon as the data has been made ready.
One-command migration to PostgreSQL
-----------------------------------
When migrating a full database in a single command, pgloader implements the
following features:
One-command migration
The whole migration is started with a single command line and then runs
unattended. pgloader is meant to be integrated in a fully automated
tooling that you can repeat as many times as needed.
Schema discovery
The source database is introspected using its SQL catalogs to get the
list of tables, attributes (with data types, default values, not null
constraints, etc), primary key constraints, foreign key constraints,
indexes, comments, etc. This feeds an internal database catalog of all
the objects to migrate from the source database to the target database.
User defined casting rules
Some source database have ideas about their data types that might not be
compatible with PostgreSQL implementaion of equivalent data types.
For instance, SQLite since version 3 has a `Dynamic Type System
<https://www.sqlite.org/datatype3.html>`_ which of course isn't
compatible with the idea of a `Relation
<https://en.wikipedia.org/wiki/Relation_(database)>`_. Or MySQL accepts
datetime for year zero, which doesn't exists in our calendar, and
doesn't have a boolean data type.
When migrating from another source database technology to PostgreSQL,
data type casting choices must be made. pgloader implements solid
defaults that you can rely upon, and a facility for **user defined data
type casting rules** for specific cases. The idea is to allow users to
specify the how the migration should be done, in order for it to be
repeatable and included in a *Continuous Migration* process.
On the fly data transformations
The user defined casting rules come with on the fly rewrite of the data.
For instance zero dates (it's not just the year, MySQL accepts
``0000-00-00`` as a valid datetime) are rewritten to NULL values by
default.
Partial Migrations
It is possible to include only a partial list of the source database
tables in the migration, or to exclude some of the tables on the source
database.
Schema only, Data only
This is the **ORM compatibility** feature of pgloader, where it is
possible to create the schema using your ORM and then have pgloader
migrate the data targeting this already created schema.
When doing this, it is possible for pgloader to *reindex* the target
schema: before loading the data from the source database into PostgreSQL
using COPY, pgloader DROPs the indexes and constraints, and reinstalls
the exact same definitions of them once the data has been loaded.
The reason for operating that way is of course data load performance.
Repeatable (DROP+CREATE)
By default, pgloader issues DROP statements in the target PostgreSQL
database before issuing any CREATE statement, so that you can repeat the
migration as many times as necessary until migration specifications and
rules are bug free.
The schedule the data migration to run every night (or even more often!)
for the whole duration of the code migration project. See the
`Continuous Migration <https://pgloader.io/blog/continuous-migration/>`_
methodology for more details about the approach.
On error stop / On error resume next
The default behavior of pgloader when migrating from a database is
``on error stop``. The idea is to let the user fix either the migration
specifications or the source data, and run the process again, until
it works.
In some cases the source data is so damaged as to be impossible to
migrate in full, and it might be necessary to then resort to the ``on
error resume next`` option, where the rows rejected by PostgreSQL are
saved away and the migration continues with the other rows.
Pre/Post SQL commands, Post-Schema SQL commands
While pgloader takes care of rewriting the schema to PostgreSQL
expectations, and even provides *user-defined data type casting rules*
support to that end, sometimes it is necessary to add some specific SQL
commands around the migration. It's of course supported right from
pgloader itself, without having to script around it.
Online ALTER schema
At times migrating to PostgreSQL is also a good opportunity to review
and fix bad decisions that were made in the past, or simply that are not
relevant to PostgreSQL.
The pgloader command syntax allows to ALTER pgloader's internal
representation of the target catalogs so that the target schema can be
created a little different from the source one. Changes supported
include target a different *schema* or *table* name.
Materialized Views, or schema rewrite on-the-fly
In some cases the schema rewriting goes deeper than just renaming the
SQL objects to being a full normalization exercise. Because PostgreSQL
is great at running a normalized schema in production under most
workloads.
pgloader implements full flexibility in on-the-fly schema rewriting, by
making it possible to migrate from a view definition. The view attribute
list becomes a table definition in PostgreSQL, and the data is fetched
by querying the view on the source system.
A SQL view allows to implement both content filtering at the column
level using the SELECT projection clause, and at the row level using the
WHERE restriction clause. And backfilling from reference tables thanks
to JOINs.
Distribute to Citus
When migrating from PostgreSQL to Citus, a important part of the process
consists of adjusting the schema to the distribution key. Read
`Preparing Tables and Ingesting Data
<https://docs.citusdata.com/en/v8.0/use_cases/multi_tenant.html>`_ in
the Citus documentation for a complete example showing how to do that.
When using pgloader it's possible to specify the distribution keys and
reference tables and let pgloader take care of adjusting the table,
indexes, primary keys and foreign key definitions all by itself.
Encoding Overrides
MySQL doesn't actually enforce the encoding of the data in the database
to match the encoding known in the metadata, defined at the database,
table, or attribute level. Sometimes, it's necessary to override the
metadata in order to make sense of the text, and pgloader makes it easy
to do so.
Continuous Migration
--------------------
pgloader is meant to migrate a whole database in a single command line and
without any manual intervention. The goal is to be able to setup a
*Continuous Integration* environment as described in the `Project
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
1. Setup your target PostgreSQL Architecture
2. Fork a Continuous Integration environment that uses PostgreSQL
3. Migrate the data over and over again every night, from production
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
5. Migrate without suprise and enjoy!
In order to be able to follow this great methodology, you need tooling to
implement the third step in a fully automated way. That's pgloader.
.. toctree::
:hidden:
:caption: Getting Started
intro
quickstart
tutorial/tutorial
install
bugreport
.. toctree::
:hidden:
:caption: Reference Manual
pgloader
command
batches
ref/transforms
.. toctree::
:hidden:
:caption: Manual for file formats
ref/csv
ref/fixed
ref/copy
ref/dbf
ref/ixf
ref/archive
.. toctree::
:maxdepth: 2
:hidden:
:caption: Manual for Database Servers
ref/mysql
ref/sqlite
ref/mssql
ref/pgsql
ref/pgsql-citus-target
ref/pgsql-redshift
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,221 +0,0 @@
Installing pgloader
===================
Several distributions are available for pgcopydb.
debian packages
---------------
You can install pgloader directly from `apt.postgresql.org`__ and from
official debian repositories, see `packages.debian.org/pgloader`__.
::
$ apt-get install pgloader
__ https://wiki.postgresql.org/wiki/Apt
__ https://packages.debian.org/search?keywords=pgloader
RPM packages
------------
The Postgres community repository for RPM packages is `yum.postgresql.org`__
and does include binary packages for pgloader.
__ https://yum.postgresql.org
Docker Images
-------------
Docker images are maintained for each tagged release at dockerhub, and also
built from the CI/CD integration on GitHub at each commit to the `main`
branch.
The DockerHub `dimitri/pgloader`__ repository is where the tagged releases
are made available. The image uses the Postgres version currently in debian
stable.
__ https://hub.docker.com/r/dimitri/pgloader
To use the ``dimitri/pgloader`` docker image::
$ docker run --rm -it dimitri/pgloader:latest pgloader --version
Or you can use the CI/CD integration that publishes packages from the main
branch to the GitHub docker repository::
$ docker pull ghcr.io/dimitri/pgloader:latest
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --version
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --help
Build from sources
------------------
pgloader is a Common Lisp program, tested using the `SBCL`__ (>= 1.2.5) and
`Clozure CL`__ implementations and with `Quicklisp`__ to fetch build
dependencies.
__ http://sbcl.org/
__ http://ccl.clozure.com/
__ http://www.quicklisp.org/beta/
When building from sources, you should always build from the current git
HEAD as it's basically the only source that is managed in a way to ensure it
builds against current set of dependencies versions.
The build system for pgloader uses a Makefile and the Quicklisp Common Lisp
packages distribution system.
The modern build system for pgloader is entirely written in Common Lisp,
where the historical name for our operation is `save-lisp-and-die` and can
be used that way:
::
$ make save
The legacy build system also uses Buildapp and can be used that way:
::
$ make pgloader
Building from sources on debian
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Install the build dependencies first, then use the Makefile::
$ apt-get install sbcl unzip libsqlite3-dev make curl gawk freetds-dev libzip-dev
$ cd /path/to/pgloader
$ make save
$ ./build/bin/pgloader --help
Building from sources on RedHat/CentOS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
To build and install pgloader the Steel Bank Common Lisp package (sbcl) from
EPEL, and the freetds packages are required.
It is recommended to build the RPM yourself, see below, to ensure that all
installed files are properly tracked and that you can safely update to newer
versions of pgloader as they're released.
To do an adhoc build and install run ``boostrap-centos.sh`` for CentOS 6 or
``bootstrap-centos7.sh`` for CentOS 7 to install the required dependencies.
Building a pgloader RPM from sources
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The spec file in the root of the pgloader repository can be used to build your
own RPM. For production deployments it is recommended that you build this RPM on
a dedicated build box and then copy the RPM to your production environment for
use; it is considered bad practice to have compilers and build tools present in
production environments.
1. Install the [EPEL repo](https://fedoraproject.org/wiki/EPEL#Quickstart).
2. Install rpmbuild dependencies::
sudo yum -y install yum-utils rpmdevtools @"Development Tools"
3. Install pgloader build dependencies::
sudo yum-builddep pgloader.spec
4. Download pgloader source::
spectool -g -R pgloader.spec
5. Build the source and binary RPMs (see `rpmbuild --help` for other build
options)::
rpmbuild -ba pgloader.spec
Building from sources on macOS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We suppose you already have ``git`` and ``make`` available, if that's not
the case now is the time to install those tools. The SQLite lib that comes
in MacOSX is fine, no need for extra software here.
You will need to install either SBCL or CCL separately, and when using
[brew](http://brew.sh/) it's as simple as:
::
$ brew install sbcl
$ brew install clozure-cl
NOTE: Make sure you installed the universal binaries of Freetds, so that
they can be loaded correctly.
::
$ brew install freetds --universal --build-from-source
Then use the normal build system for pgloader:
::
$ make save
$ ./build/bin/pgloader --version
Building from sources on Windows
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Building pgloader on Windows is supported (in theory), thanks to Common Lisp
implementations being available on that platform, and to the Common Lisp
Standard for making it easy to write actually portable code.
It is recommended to have a look at the `issues labelled with Windows
support`__ if you run into trouble when building pgloader, because the
development team is lacking windows user and in practice we can't maintain
the support for that Operating System:
__ https://github.com/dimitri/pgloader/issues?utf8=✓&q=label%3A%22Windows%20support%22%20>
If you need ``pgloader.exe`` on windows please condider contributing fixes
for that environment and maybe longer term support then. Specifically, a CI
integration with a windows build host would allow ensuring that we continue
to support that target.
Building Docker image from sources
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can build a Docker image from source using SBCL by default::
$ docker build .
Or Clozure CL (CCL)::
$ docker build -f Dockerfile.ccl .
More options when building from source
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``Makefile`` target ``save`` knows how to produce a Self Contained
Binary file for pgloader, found at ``./build/bin/pgloader``::
$ make save
By default, the ``Makefile`` uses `SBCL`__ to compile your binary image,
though it's possible to build using `Clozure-CL`__.
__ http://sbcl.org/
__ http://ccl.clozure.com/
::
$ make CL=ccl64 save
It is possible to to tweak the default amount of memory that the pgloader
image will allow itself using when running through your data (don't ask for
more than your current RAM tho). At the moment only the legacy build system
includes support for this custom build::
$ make DYNSIZE=8192 pgloader
The ``make pgloader`` command when successful outputs a
`./build/bin/pgloader` file for you to use.

View File

@ -1,100 +0,0 @@
Introduction
============
pgloader loads data from various sources into PostgreSQL. It can
transform the data it reads on the fly and submit raw SQL before and
after the loading. It uses the `COPY` PostgreSQL protocol to stream
the data into the server, and manages errors by filling a pair of
*reject.dat* and *reject.log* files.
pgloader knows how to read data from different kind of sources:
* Files
* CSV
* Fixed Format
* Postgres COPY text format
* DBF
* IXF
* Databases
* SQLite
* MySQL
* MS SQL Server
* PostgreSQL
* Redshift
pgloader knows how to target different products using the PostgreSQL Protocol:
* PostgreSQL
* `Citus <https://www.citusdata.com>`_
* Redshift
The level of automation provided by pgloader depends on the data source
type. In the case of CSV and Fixed Format files, a full description of the
expected input properties must be given to pgloader. In the case of a
database, pgloader connects to the live service and knows how to fetch the
metadata it needs directly from it.
Features Matrix
---------------
Here's a comparison of the features supported depending on the source
database engine. Some features that are not supported can be added to
pgloader, it's just that nobody had the need to do so yet. Those features
are marked with ✗. Empty cells are used when the feature doesn't make sense
for the selected source database.
========================== ======= ====== ====== =========== =========
Feature SQLite MySQL MS SQL PostgreSQL Redshift
========================== ======= ====== ====== =========== =========
One-command migration ✓ ✓ ✓ ✓ ✓
Continuous Migration ✓ ✓ ✓ ✓ ✓
Schema discovery ✓ ✓ ✓ ✓ ✓
Partial Migrations ✓ ✓ ✓ ✓ ✓
Schema only ✓ ✓ ✓ ✓ ✓
Data only ✓ ✓ ✓ ✓ ✓
Repeatable (DROP+CREATE) ✓ ✓ ✓ ✓ ✓
User defined casting rules ✓ ✓ ✓ ✓ ✓
Encoding Overrides ✓
On error stop ✓ ✓ ✓ ✓ ✓
On error resume next ✓ ✓ ✓ ✓ ✓
Pre/Post SQL commands ✓ ✓ ✓ ✓ ✓
Post-Schema SQL commands ✗ ✓ ✓ ✓ ✓
Primary key support ✓ ✓ ✓ ✓ ✓
Foreign key support ✓ ✓ ✓ ✓
Online ALTER schema ✓ ✓ ✓ ✓ ✓
Materialized views ✗ ✓ ✓ ✓ ✓
Distribute to Citus ✗ ✓ ✓ ✓ ✓
========================== ======= ====== ====== =========== =========
For more details about what the features are about, see the specific
reference pages for your database source.
For some of the features, missing support only means that the feature is not
needed for the other sources, such as the capability to override MySQL
encoding metadata about a table or a column. Only MySQL in this list is left
completely unable to guarantee text encoding. Or Redshift not having foreign
keys.
Commands
--------
pgloader implements its own *Command Language*, a DSL that allows to specify
every aspect of the data load and migration to implement. Some of the
features provided in the language are only available for a specific source
type.
Command Line
------------
The pgloader command line accepts those two variants::
pgloader [<options>] [<command-file>]...
pgloader [<options>] SOURCE TARGET
Either you have a *command-file* containing migration specifications in the
pgloader *Command Language*, or you can give a *Source* for the data and a
PostgreSQL database connection *Target* where to load the data into.

View File

@ -1,235 +0,0 @@
Command Line
============
pgloader loads data from various sources into PostgreSQL. It can
transform the data it reads on the fly and submit raw SQL before and
after the loading. It uses the `COPY` PostgreSQL protocol to stream
the data into the server, and manages errors by filling a pair of
*reject.dat* and *reject.log* files.
pgloader operates either using commands which are read from files::
pgloader commands.load
or by using arguments and options all provided on the command line::
pgloader SOURCE TARGET
Arguments
---------
The pgloader arguments can be as many load files as needed, or a couple of
connection strings to a specific input file.
Source Connection String
^^^^^^^^^^^^^^^^^^^^^^^^
The source connection string format is as follows::
format:///absolute/path/to/file.ext
format://./relative/path/to/file.ext
Where format might be one of `csv`, `fixed`, `copy`, `dbf`, `db3` or `ixf`.::
db://user:pass@host:port/dbname
Where db might be of `sqlite`, `mysql` or `mssql`.
When using a file based source format, pgloader also support natively
fetching the file from an http location and decompressing an archive if
needed. In that case it's necessary to use the `--type` option to specify
the expected format of the file. See the examples below.
Also note that some file formats require describing some implementation
details such as columns to be read and delimiters and quoting when loading
from csv.
For more complex loading scenarios, you will need to write a full fledge
load command in the syntax described later in this document.
Target Connection String
^^^^^^^^^^^^^^^^^^^^^^^^
The target connection string format is described in details later in this
document, see Section Connection String.
Options
-------
Inquiry Options
^^^^^^^^^^^^^^^
Use these options when you want to know more about how to use pgloader, as
those options will cause pgloader not to load any data.
--help
Show command usage summary and exit.
--version
Show pgloader version string and exit.
--with-encodings
List known encodings in this version of pgloader.
--upgrade-config
Parse given files in the command line as ``pgloader.conf`` files with
the INI syntax that was in use in pgloader versions 2.x, and output the
new command syntax for pgloader on standard output.
General Options
^^^^^^^^^^^^^^^
Those options are meant to tweak pgloader behavior when loading data.
--verbose
Be verbose.
--quiet
Be quiet.
--debug
Show debug level information messages.
--root-dir
Set the root working directory (defaults to ``/tmp/pgloader``).
--logfile
Set the pgloader log file (defaults to ``/tmp/pgloader/pgloader.log``).
--log-min-messages
Minimum level of verbosity needed for log message to make it to the
logfile. One of critical, log, error, warning, notice, info or debug.
--client-min-messages
Minimum level of verbosity needed for log message to make it to the
console. One of critical, log, error, warning, notice, info or debug.
--summary
A filename where to copy the summary output. When relative, the filename
is expanded into ``*root-dir*``.
The format of the filename defaults to being *human readable*. It is
possible to have the output in machine friendly formats such as *CSV*,
*COPY* (PostgreSQL's own COPY format) or *JSON* by specifying a filename
with the extension resp. ``.csv``, ``.copy`` or ``.json``.
--load-lisp-file <file>
Specify a lisp <file> to compile and load into the pgloader image before
reading the commands, allowing to define extra transformation function.
Those functions should be defined in the ``pgloader.transforms``
package. This option can appear more than once in the command line.
--dry-run
Allow testing a ``.load`` file without actually trying to load any data.
It's useful to debug it until it's ok, in particular to fix connection
strings.
--on-error-stop
Alter pgloader behavior: rather than trying to be smart about error
handling and continue loading good data, separating away the bad one,
just stop as soon as PostgreSQL refuses anything sent to it. Useful to
debug data processing, transformation function and specific type
casting.
--self-upgrade <directory>
Specify a <directory> where to find pgloader sources so that one of the
very first things it does is dynamically loading-in (and compiling to
machine code) another version of itself, usually a newer one like a very
recent git checkout.
--no-ssl-cert-verification
Uses the OpenSSL option to accept a locally issued server-side
certificate, avoiding the following error message::
SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY
The right way to fix the SSL issue is to use a trusted certificate, of
course. Sometimes though it's useful to make progress with the pgloader
setup while the certificate chain of trust is being fixed, maybe by
another team. That's when this option is useful.
Command Line Only Operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Those options are meant to be used when using pgloader from the command line
only, rather than using a command file and the rich command clauses and
parser. In simple cases, it can be much easier to use the *SOURCE* and
*TARGET* directly on the command line, then tweak the loading with those
options:
--with <option>
Allows setting options from the command line. You can use that option as
many times as you want. The option arguments must follow the *WITH*
clause for the source type of the ``SOURCE`` specification, as described
later in this document.
--set
Allows setting PostgreSQL configuration from the command line. Note that
the option parsing is the same as when used from the *SET* command
clause, in particular you must enclose the guc value with single-quotes.
Use ``--set "guc_name='value'"``.
--field
Allows setting a source field definition. Fields are accumulated in the
order given on the command line. It's possible to either use a
``--field`` option per field in the source file, or to separate field
definitions by a comma, as you would do in the *HAVING FIELDS* clause.
--cast <rule>
Allows setting a specific casting rule for loading the data.
--type <csv|fixed|db3|ixf|sqlite|mysql|mssql>
Allows forcing the source type, in case when the *SOURCE* parsing isn't
satisfying.
--encoding <encoding>
Set the encoding of the source file to load data from.
--before <filename>
Parse given filename for SQL queries and run them against the target
database before loading the data from the source. The queries are parsed
by pgloader itself: they need to be terminated by a semi-colon (;) and
the file may include `\i` or `\ir` commands to *include* another file.
--after <filename>
Parse given filename for SQL queries and run them against the target
database after having loaded the data from the source. The queries are
parsed in the same way as with the `--before` option, see above.
More Debug Information
^^^^^^^^^^^^^^^^^^^^^^
To get the maximum amount of debug information, you can use both the
`--verbose` and the `--debug` switches at the same time, which is equivalent
to saying `--client-min-messages data`. Then the log messages will show the
data being processed, in the cases where the code has explicit support for
it.

View File

@ -1,120 +0,0 @@
Archive (http, zip)
===================
This command instructs pgloader to load data from one or more files contained
in an archive. Currently the only supported archive format is *ZIP*, and the
archive might be downloaded from an *HTTP* URL.
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader archive.load
And the contents of the ``archive.load`` file could be inspired from the
following:
::
LOAD ARCHIVE
FROM /Users/dim/Downloads/GeoLiteCity-latest.zip
INTO postgresql:///ip4r
BEFORE LOAD
DO $$ create extension if not exists ip4r; $$,
$$ create schema if not exists geolite; $$,
EXECUTE 'geolite.sql'
LOAD CSV
FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/
WITH ENCODING iso-8859-1
(
locId,
country,
region null if blanks,
city null if blanks,
postalCode null if blanks,
latitude,
longitude,
metroCode null if blanks,
areaCode null if blanks
)
INTO postgresql:///ip4r?geolite.location
(
locid,country,region,city,postalCode,
location point using (format nil "(~a,~a)" longitude latitude),
metroCode,areaCode
)
WITH skip header = 2,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ','
AND LOAD CSV
FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/
WITH ENCODING iso-8859-1
(
startIpNum, endIpNum, locId
)
INTO postgresql:///ip4r?geolite.blocks
(
iprange ip4r using (ip-range startIpNum endIpNum),
locId
)
WITH skip header = 2,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ','
FINALLY DO
$$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
Archive Source Specification: FROM
----------------------------------
Filename or HTTP URI where to load the data from. When given an HTTP URL the
linked file will get downloaded locally before processing.
If the file is a `zip` file, the command line utility `unzip` is used to
expand the archive into files in `$TMPDIR`, or `/tmp` if `$TMPDIR` is unset
or set to a non-existing directory.
Then the following commands are used from the top level directory where the
archive has been expanded.
Archive Sub Commands
--------------------
- command [ *AND* command ... ]
A series of commands against the contents of the archive, at the moment
only `CSV`,`'FIXED` and `DBF` commands are supported.
Note that commands are supporting the clause *FROM FILENAME MATCHING*
which allows the pgloader command not to depend on the exact names of
the archive directories.
The same clause can also be applied to several files with using the
spelling *FROM ALL FILENAMES MATCHING* and a regular expression.
The whole *matching* clause must follow the following rule::
FROM [ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING
Archive Final SQL Commands
--------------------------
- *FINALLY DO*
SQL Queries to run once the data is loaded, such as `CREATE INDEX`.

View File

@ -1,133 +0,0 @@
COPY
====
This commands instructs pgloader to load from a file containing COPY TEXT
data as described in the PostgreSQL documentation.
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader copy.load
And the contents of the ``copy.load`` file could be inspired from the following:
::
LOAD COPY
FROM copy://./data/track.copy
(
trackid, track, album, media, genre, composer,
milliseconds, bytes, unitprice
)
INTO postgresql:///pgloader
TARGET TABLE track_full
WITH truncate
SET work_mem to '14MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ drop table if exists track_full; $$,
$$ create table track_full (
trackid bigserial,
track text,
album text,
media text,
genre text,
composer text,
milliseconds bigint,
bytes bigint,
unitprice numeric
);
$$;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
COPY Formatted Files Source Specification: FROM
-----------------------------------------------
Filename where to load the data from. This support local files, HTTP URLs
and zip files containing a single dbf file of the same name. Fetch such a
zip file from an HTTP address is of course supported.
- *inline*
The data is found after the end of the parsed commands. Any number of
empty lines between the end of the commands and the beginning of the
data is accepted.
- *stdin*
Reads the data from the standard input stream.
- *FILENAMES MATCHING*
The whole *matching* clause must follow the following rule::
[ ALL FILENAMES | [ FIRST ] FILENAME ]
MATCHING regexp
[ IN DIRECTORY '...' ]
The *matching* clause applies given *regular expression* (see above for
exact syntax, several options can be used here) to filenames. It's then
possible to load data from only the first match of all of them.
The optional *IN DIRECTORY* clause allows specifying which directory to
walk for finding the data files, and can be either relative to where the
command file is read from, or absolute. The given directory must exists.
COPY Formatted File Options: WITH
---------------------------------
When loading from a `COPY` file, the following options are supported:
- *delimiter*
Takes a single character as argument, which must be found inside single
quotes, and might be given as the printable character itself, the
special value \t to denote a tabulation character, or `0x` then an
hexadecimal value read as the ASCII code for the character.
This character is used as the *delimiter* when reading the data, in a
similar way to the PostgreSQL `COPY` option.
- *null*
Takes a quoted string as an argument (quotes can be either double quotes
or single quotes) and uses that string as the `NULL` representation in
the data.
This is similar to the *null* `COPY` option in PostgreSQL.
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command against
the PostgreSQL target table before reading the data file.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *skip header*
Takes a numeric value as argument. Instruct pgloader to skip that many
lines at the beginning of the input file.

View File

@ -1,262 +0,0 @@
CSV
===
This command instructs pgloader to load data from a `CSV` file. Because of
the complexity of guessing the parameters of a CSV file, it's simpler to
instruct pgloader with how to parse the data in there, using the full
pgloader command syntax and CSV specifications as in the following example.
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader csv.load
And the contents of the ``csv.load`` file could be inspired from the following:
::
LOAD CSV
FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us
HAVING FIELDS
(
startIpNum, endIpNum, locId
)
INTO postgresql://user@localhost:54393/dbname
TARGET TABLE geolite.blocks
TARGET COLUMNS
(
iprange ip4r using (ip-range startIpNum endIpNum),
locId
)
WITH truncate,
skip header = 2,
fields optionally enclosed by '"',
fields escaped by backslash-quote,
fields terminated by '\t'
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
CSV Source Specification: FROM
------------------------------
Filename where to load the data from. Accepts an *ENCODING* option. Use the
`--list-encodings` option to know which encoding names are supported.
The filename may be enclosed by single quotes, and could be one of the
following special values:
- *inline*
The data is found after the end of the parsed commands. Any number
of empty lines between the end of the commands and the beginning of
the data is accepted.
- *stdin*
Reads the data from the standard input stream.
- *FILENAME MATCHING*
The whole *matching* clause must follow the following rule::
[ ALL FILENAMES | [ FIRST ] FILENAME ]
MATCHING regexp
[ IN DIRECTORY '...' ]
The *matching* clause applies given *regular expression* (see above
for exact syntax, several options can be used here) to filenames.
It's then possible to load data from only the first match of all of
them.
The optional *IN DIRECTORY* clause allows specifying which directory
to walk for finding the data files, and can be either relative to
where the command file is read from, or absolute. The given
directory must exists.
Fields Specifications
---------------------
The *FROM* option also supports an optional comma separated list of *field*
names describing what is expected in the `CSV` data file, optionally
introduced by the clause `HAVING FIELDS`.
Each field name can be either only one name or a name following with
specific reader options for that field, enclosed in square brackets and
comma-separated. Supported per-field reader options are:
- *terminated by*
See the description of *field terminated by* below.
The processing of this option is not currently implemented.
- *date format*
When the field is expected of the date type, then this option allows
to specify the date format used in the file.
Date format string are template strings modeled against the
PostgreSQL `to_char` template strings support, limited to the
following patterns:
- YYYY, YYY, YY for the year part
- MM for the numeric month part
- DD for the numeric day part
- HH, HH12, HH24 for the hour part
- am, AM, a.m., A.M.
- pm, PM, p.m., P.M.
- MI for the minutes part
- SS for the seconds part
- MS for the milliseconds part (4 digits)
- US for the microseconds part (6 digits)
- unparsed punctuation signs: - . * # @ T / \ and space
Here's an example of a *date format* specification::
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
- *null if*
This option takes an argument which is either the keyword *blanks*
or a double-quoted string.
When *blanks* is used and the field value that is read contains
only space characters, then it's automatically converted to an SQL
`NULL` value.
When a double-quoted string is used and that string is read as the
field value, then the field value is automatically converted to an
SQL `NULL` value.
- *trim both whitespace*, *trim left whitespace*, *trim right whitespace*
This option allows to trim whitespaces in the read data, either from
both sides of the data, or only the whitespace characters found on
the left of the streaing, or only those on the right of the string.
CSV Loading Options: WITH
-------------------------
When loading from a `CSV` file, the following options are supported:
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command
against the PostgreSQL target table before reading the data file.
- *drop indexes*
When this option is listed, pgloader issues `DROP INDEX` commands
against all the indexes defined on the target table before copying
the data, then `CREATE INDEX` commands once the `COPY` is done.
In order to get the best performance possible, all the indexes are
created in parallel and when done the primary keys are built again
from the unique indexes just created. This two step process allows
creating the primary key index in parallel with the other indexes,
as only the `ALTER TABLE` command needs an *access exclusive lock*
on the target table.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ...
DISABLE TRIGGER ALL` command against the PostgreSQL target table
before copying the data, then the command `ALTER TABLE ... ENABLE
TRIGGER ALL` once the `COPY` is done.
This option allows loading data into a pre-existing table ignoring
the *foreign key constraints* and user defined triggers and may
result in invalid *foreign key constraints* once the data is loaded.
Use with care.
- *skip header*
Takes a numeric value as argument. Instruct pgloader to skip that
many lines at the beginning of the input file.
- *csv header*
Use the first line read after *skip header* as the list of csv field
names to be found in the CSV file, using the same CSV parameters as
for the CSV data.
- *trim unquoted blanks*
When reading unquoted values in the `CSV` file, remove the blanks
found in between the separator and the value. That behaviour is the
default.
- *keep unquoted blanks*
When reading unquoted values in the `CSV` file, keep blanks found in
between the separator and the value.
- *fields optionally enclosed by*
Takes a single character as argument, which must be found inside single
quotes, and might be given as the printable character itself, the
special value \t to denote a tabulation character, the special value \'
to denote a single-quote, or `0x` then an hexadecimal value read as the
ASCII code for the character.
The following options specify the same enclosing character, a single quote::
fields optionally enclosed by '\''
fields optionally enclosed by '0x27'
This character is used as the quoting character in the `CSV` file,
and defaults to double-quote.
- *fields not enclosed*
By default, pgloader will use the double-quote character as the
enclosing character. If you have a CSV file where fields are not
enclosed and are using double-quote as an expected ordinary
character, then use the option *fields not enclosed* for the CSV
parser to accept those values.
- *fields escaped by*
Takes either the special value *backslash-quote* or *double-quote*,
or any value supported by the *fields terminated by* option (see
below). This value is used to recognize escaped field separators
when they are to be found within the data fields themselves.
Defaults to *double-quote*.
- *csv escape mode*
Takes either the special value *quote* (the default) or *following*
and allows the CSV parser to parse either only escaped field
separator or any character (including CSV data) when using the
*following* value.
- *fields terminated by*
Takes a single character as argument, which must be found inside
single quotes, and might be given as the printable character itself,
the special value \t to denote a tabulation character, or `0x` then
an hexadecimal value read as the ASCII code for the character.
This character is used as the *field separator* when reading the
`CSV` data.
- *lines terminated by*
Takes a single character as argument, which must be found inside
single quotes, and might be given as the printable character itself,
the special value \t to denote a tabulation character, or `0x` then
an hexadecimal value read as the ASCII code for the character.
This character is used to recognize *end-of-line* condition when
reading the `CSV` data.

View File

@ -1,88 +0,0 @@
DBF
===
This command instructs pgloader to load data from a `DBF` file. A default
set of casting rules are provided and might be overloaded and appended to by
the command.
Using advanced options and a load command file
----------------------------------------------
Here's an example with a remote HTTP source and some user defined casting
rules. The command then would be:
::
$ pgloader dbf.load
And the contents of the ``dbf.load`` file could be inspired from the following:
::
LOAD DBF
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
INTO postgresql://user@localhost/dbname
WITH truncate, create table
CAST column reg2013.region to integer,
column reg2013.tncc to smallint;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
DBF Source Specification: FROM
------------------------------
Filename where to load the data from. This support local files, HTTP URLs
and zip files containing a single dbf file of the same name. Fetch such a
zip file from an HTTP address is of course supported.
DBF Loading Options: WITH
-------------------------
When loading from a `DBF` file, the following options are supported:
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command against
the PostgreSQL target table before reading the data file.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *create table*
When this option is listed, pgloader creates the table using the meta
data found in the `DBF` file, which must contain a list of fields with
their data type. A standard data type conversion from DBF to PostgreSQL
is done.
- *table name*
This options expects as its value the possibly qualified name of the
table to create.
Default DB3 Casting Rules
-------------------------
When migrating from DB3 the following Casting Rules are provided::
type C to text using db3-trim-string
type M to text using db3-trim-string
type N to numeric using db3-numeric-to-pgsql-integer
type I to numeric using db3-numeric-to-pgsql-numeric
type L to boolean using logical-to-boolean
type D to date using db3-date-to-pgsql-date

View File

@ -1,204 +0,0 @@
Fixed Columns
=============
This command instructs pgloader to load data from a text file containing
columns arranged in a *fixed size* manner.
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader fixed.load
And the contents of the ``fixed.load`` file could be inspired from the following:
::
LOAD FIXED
FROM inline
(
a from 0 for 10,
b from 10 for 8,
c from 18 for 8,
d from 26 for 17 [null if blanks, trim right whitespace]
)
INTO postgresql:///pgloader
TARGET TABLE fixed
(
a, b,
c time using (time-with-no-separator c),
d
)
WITH truncate
SET work_mem to '14MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ drop table if exists fixed; $$,
$$ create table fixed (
a integer,
b date,
c time,
d text
);
$$;
01234567892008052011431250firstline
01234562008052115182300left blank-padded
12345678902008052208231560another line
2345609872014092914371500
2345678902014092914371520
Note that the example comes from the test suite of pgloader, where we use
the advanced feature ``FROM inline`` that allows embedding the source data
within the command file. In most cases a more classic FROM clause loading
the data from a separate file would be used.
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
Fixed File Format Source Specification: FROM
--------------------------------------------
Filename where to load the data from. Accepts an *ENCODING* option. Use the
`--list-encodings` option to know which encoding names are supported.
The filename may be enclosed by single quotes, and could be one of the
following special values:
- *inline*
The data is found after the end of the parsed commands. Any number
of empty lines between the end of the commands and the beginning of
the data is accepted.
- *stdin*
Reads the data from the standard input stream.
- *FILENAMES MATCHING*
The whole *matching* clause must follow the following rule::
[ ALL FILENAMES | [ FIRST ] FILENAME ]
MATCHING regexp
[ IN DIRECTORY '...' ]
The *matching* clause applies given *regular expression* (see above
for exact syntax, several options can be used here) to filenames.
It's then possible to load data from only the first match of all of
them.
The optional *IN DIRECTORY* clause allows specifying which directory
to walk for finding the data files, and can be either relative to
where the command file is read from, or absolute. The given
directory must exists.
Fields Specifications
---------------------
The *FROM* option also supports an optional comma separated list of *field*
names describing what is expected in the `FIXED` data file.
Each field name is composed of the field name followed with specific reader
options for that field. Supported per-field reader options are the
following, where only *start* and *length* are required.
- *start*
Position in the line where to start reading that field's value. Can
be entered with decimal digits or `0x` then hexadecimal digits.
- *length*
How many bytes to read from the *start* position to read that
field's value. Same format as *start*.
Those optional parameters must be enclosed in square brackets and
comma-separated:
- *terminated by*
See the description of *field terminated by* below.
The processing of this option is not currently implemented.
- *date format*
When the field is expected of the date type, then this option allows
to specify the date format used in the file.
Date format string are template strings modeled against the
PostgreSQL `to_char` template strings support, limited to the
following patterns:
- YYYY, YYY, YY for the year part
- MM for the numeric month part
- DD for the numeric day part
- HH, HH12, HH24 for the hour part
- am, AM, a.m., A.M.
- pm, PM, p.m., P.M.
- MI for the minutes part
- SS for the seconds part
- MS for the milliseconds part (4 digits)
- US for the microseconds part (6 digits)
- unparsed punctuation signs: - . * # @ T / \ and space
Here's an example of a *date format* specification::
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
- *null if*
This option takes an argument which is either the keyword *blanks*
or a double-quoted string.
When *blanks* is used and the field value that is read contains only
space characters, then it's automatically converted to an SQL `NULL`
value.
When a double-quoted string is used and that string is read as the
field value, then the field value is automatically converted to an
SQL `NULL` value.
- *trim both whitespace*, *trim left whitespace*, *trim right whitespace*
This option allows to trim whitespaces in the read data, either from
both sides of the data, or only the whitespace characters found on
the left of the streaing, or only those on the right of the string.
Fixed File Format Loading Options: WITH
---------------------------------------
When loading from a `FIXED` file, the following options are supported:
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command
against the PostgreSQL target table before reading the data file.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ...
DISABLE TRIGGER ALL` command against the PostgreSQL target table
before copying the data, then the command `ALTER TABLE ... ENABLE
TRIGGER ALL` once the `COPY` is done.
This option allows loading data into a pre-existing table ignoring
the *foreign key constraints* and user defined triggers and may
result in invalid *foreign key constraints* once the data is loaded.
Use with care.
- *skip header*
Takes a numeric value as argument. Instruct pgloader to skip that
many lines at the beginning of the input file.

View File

@ -1,83 +0,0 @@
IXF
===
This command instructs pgloader to load data from an IBM `IXF` file.
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader ixf.load
And the contents of the ``ixf.load`` file could be inspired from the following:
::
LOAD IXF
FROM data/nsitra.test1.ixf
INTO postgresql:///pgloader
TARGET TABLE nsitra.test1
WITH truncate, create table, timezone UTC
BEFORE LOAD DO
$$ create schema if not exists nsitra; $$,
$$ drop table if exists nsitra.test1; $$;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
IXF Source Specification: FROM
------------------------------
Filename where to load the data from. This support local files, HTTP URLs
and zip files containing a single ixf file of the same name. Fetch such a
zip file from an HTTP address is of course supported.
IXF Loading Options: WITH
-------------------------
When loading from a `IXF` file, the following options are supported:
- *truncate*
When this option is listed, pgloader issues a `TRUNCATE` command against
the PostgreSQL target table before reading the data file.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *create table*
When this option is listed, pgloader creates the table using the meta
data found in the `DBF` file, which must contain a list of fields with
their data type. A standard data type conversion from DBF to PostgreSQL
is done.
- *table name*
This options expects as its value the possibly qualified name of the
table to create.
- *timezone*
This options allows to specify which timezone is used when parsing
timestamps from an IXF file, and defaults to *UTC*. Expected values are
either `UTC`, `GMT` or a single quoted location name such as
`'Universal'` or `'Europe/Paris'`.

View File

@ -1,242 +0,0 @@
MS SQL to Postgres
==================
This command instructs pgloader to load data from a MS SQL database.
Automatic discovery of the schema is supported, including build of the
indexes, primary and foreign keys constraints.
Using default settings
----------------------
Here is the simplest command line example, which might be all you need:
::
$ pgloader mssql://user@mshost/dbname pgsql://pguser@pghost/dbname
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader ms.load
And the contents of the command file ``ms.load`` could be inspired from the
following:
::
load database
from mssql://user@host/dbname
into postgresql:///dbname
including only table names like 'GlobalAccount' in schema 'dbo'
set work_mem to '16MB', maintenance_work_mem to '512 MB'
before load do $$ drop schema if exists dbo cascade; $$;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
MS SQL Database Source Specification: FROM
------------------------------------------
Connection string to an existing MS SQL database server that listens and
welcome external TCP/IP connection. As pgloader currently piggybacks on the
FreeTDS driver, to change the port of the server please export the `TDSPORT`
environment variable.
MS SQL Database Migration Options: WITH
---------------------------------------
When loading from a `MS SQL` database, the same options as when loading a
`MYSQL` database are supported. Please refer to the MYSQL section. The
following options are added:
- *create schemas*
When this option is listed, pgloader creates the same schemas as found
on the MS SQL instance. This is the default.
- *create no schemas*
When this option is listed, pgloader refrains from creating any schemas
at all, you must then ensure that the target schema do exist.
MS SQL Database Casting Rules
-----------------------------
CAST
^^^^
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
Please refer to the MS SQL CAST clause for details.
MS SQL Views Support
--------------------
MS SQL views support allows pgloader to migrate view as if they were base
tables. This feature then allows for on-the-fly transformation from MS SQL
to PostgreSQL, as the view definition is used rather than the base data.
MATERIALIZE VIEWS
^^^^^^^^^^^^^^^^^
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will query
the data. It's not possible to just allow for plain `SQL` because we want to
know a lot about the exact data types of each column involved in the query
output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the following
expression::
*name* `AS` `$$` *sql query* `$$`
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
the beginning of the data loading, and the resulting view will then be
dropped at the end of the data loading.
MATERIALIZE ALL VIEWS
^^^^^^^^^^^^^^^^^^^^^
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
returned by MS SQL rather than asking the user to specify the list.
MS SQL Partial Migration
------------------------
INCLUDING ONLY TABLE NAMES LIKE
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table name patterns used to limit the
tables to migrate to a sublist. More than one such clause may be used, they
will be accumulated together.
Example::
including only table names like 'GlobalAccount' in schema 'dbo'
EXCLUDING TABLE NAMES LIKE
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table name patterns used to exclude
table names from the migration. This filter only applies to the result of
the *INCLUDING* filter.
::
excluding table names matching 'LocalAccount' in schema 'dbo'
MS SQL Schema Transformations
-----------------------------
ALTER SCHEMA '...' RENAME TO '...'
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Allows to rename a schema on the flight, so that for instance the tables
found in the schema 'dbo' in your source database will get migrated into the
schema 'public' in the target database with this command::
alter schema 'dbo' rename to 'public'
ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
IN SCHEMA 'dbo'
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
(regexp or string).
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
the level of the pgloader in-memory representation of your source database
schema. In case of a name change, the mapping is kept and reused in the
*foreign key* and *index* support.
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
The matching is done in pgloader itself, with a Common Lisp regular
expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,
nor on the lack of support for regular expressions in the engine.
MS SQL Driver setup and encoding
--------------------------------
pgloader is using the `FreeTDS` driver, and internally expects the data to
be sent in utf-8. To achieve that, you can configure the FreeTDS driver with
those defaults, in the file `~/.freetds.conf`::
[global]
tds version = 7.4
client charset = UTF-8
Default MS SQL Casting Rules
----------------------------
When migrating from MS SQL the following Casting Rules are provided:
Numbers::
type tinyint to smallint
type float to float using float-to-string
type real to real using float-to-string
type double to double precision using float-to-string
type numeric to numeric using float-to-string
type decimal to numeric using float-to-string
type money to numeric using float-to-string
type smallmoney to numeric using float-to-string
Texts::
type char to text drop typemod
type nchar to text drop typemod
type varchar to text drop typemod
type nvarchar to text drop typemod
type xml to text drop typemod
Binary::
type binary to bytea using byte-vector-to-bytea
type varbinary to bytea using byte-vector-to-bytea
Date::
type datetime to timestamptz
type datetime2 to timestamptz
Others::
type bit to boolean
type hierarchyid to bytea
type geography to bytea
type uniqueidentifier to uuid using sql-server-uniqueidentifier-to-uuid

View File

@ -1,687 +0,0 @@
MySQL to Postgres
=================
This command instructs pgloader to load data from a database connection.
pgloader supports dynamically converting the schema of the source database
and the indexes building.
A default set of casting rules are provided and might be overloaded and
appended to by the command.
Using default settings
----------------------
Here is the simplest command line example, which might be all you need:
::
$ pgloader mysql://myuser@myhost/dbname pgsql://pguser@pghost/dbname
Using advanced options and a load command file
----------------------------------------------
It might be that you want more flexibility than that and want to set
advanced options. Then the next example is using as many options as
possible, some of them even being defaults. Chances are you don't need that
complex a setup, don't copy and paste it, use it only as a reference!
The command then would be:
::
$ pgloader my.load
And the contents of the command file ``my.load`` could be inspired from the
following:
::
LOAD DATABASE
FROM mysql://root@localhost/sakila
INTO postgresql://localhost:54393/sakila
WITH include drop, create tables, create indexes, reset sequences,
workers = 8, concurrency = 1,
multiple readers per thread, rows per range = 50000
SET PostgreSQL PARAMETERS
maintenance_work_mem to '128MB',
work_mem to '12MB',
search_path to 'sakila, public, "$user"'
SET MySQL PARAMETERS
net_read_timeout = '120',
net_write_timeout = '120'
CAST type bigint when (= precision 20) to bigserial drop typemod,
type date drop not null drop default using zero-dates-to-null,
-- type tinyint to boolean using tinyint-to-boolean,
type year to integer
MATERIALIZE VIEWS film_list, staff_list
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
-- DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
-- ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
-- ALTER TABLE NAMES MATCHING ~/_list$/ SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
ALTER SCHEMA 'sakila' RENAME TO 'pagila'
BEFORE LOAD DO
$$ create schema if not exists pagila; $$,
$$ create schema if not exists mv; $$,
$$ alter database sakila set search_path to pagila, mv, public; $$;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
MySQL Database Source Specification: FROM
-----------------------------------------
Must be a connection URL pointing to a MySQL database.
If the connection URI contains a table name, then only this table is
migrated from MySQL to PostgreSQL.
See the `SOURCE CONNECTION STRING` section above for details on how to write
the connection string. The MySQL connection string accepts the same
parameter *sslmode* as the PostgreSQL connection string, but the *verify*
mode is not implemented (yet).
::
mysql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
MySQL connection strings support specific options:
- ``useSSL``
The same notation rules as found in the *Connection String* parts of the
documentation apply, and we have a specific MySQL option: ``useSSL``.
The value for ``useSSL`` can be either ``false`` or ``true``.
If both ``sslmode`` and ``useSSL`` are used in the same connection
string, pgloader behavior is undefined.
The MySQL connection string also accepts the *useSSL* parameter with values
being either *false* or *true*.
Environment variables described in
<http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html> can be
used as default values too. If the user is not provided, then it defaults to
`USER` environment variable value. The password can be provided with the
environment variable `MYSQL_PWD`. The host can be provided with the
environment variable `MYSQL_HOST` and otherwise defaults to `localhost`. The
port can be provided with the environment variable `MYSQL_TCP_PORT` and
otherwise defaults to `3306`.
MySQL Database Migration Options: WITH
--------------------------------------
When loading from a `MySQL` database, the following options are supported,
and the default *WITH* clause is: *no truncate*, *create
tables*, *include drop*, *create indexes*, *reset sequences*, *foreign
keys*, *downcase identifiers*, *uniquify index names*.
- *include drop*
When this option is listed, pgloader drops all the tables in the target
PostgreSQL database whose names appear in the MySQL database. This
option allows for using the same command several times in a row until
you figure out all the options, starting automatically from a clean
environment. Please note that `CASCADE` is used to ensure that tables
are dropped even if there are foreign keys pointing to them. This is
precisely what `include drop` is intended to do: drop all target tables
and recreate them.
Great care needs to be taken when using `include drop`, as it will
cascade to *all* objects referencing the target tables, possibly
including other tables that are not being loaded from the source DB.
- *include no drop*
When this option is listed, pgloader will not include any `DROP`
statement when loading the data.
- *truncate*
When this option is listed, pgloader issue the `TRUNCATE` command
against each PostgreSQL table just before loading data into it.
- *no truncate*
When this option is listed, pgloader issues no `TRUNCATE` command.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *create tables*
When this option is listed, pgloader creates the table using the meta
data found in the `MySQL` file, which must contain a list of fields with
their data type. A standard data type conversion from DBF to PostgreSQL
is done.
- *create no tables*
When this option is listed, pgloader skips the creation of table before
loading data, target tables must then already exist.
Also, when using *create no tables* pgloader fetches the metadata from
the current target database and checks type casting, then will remove
constraints and indexes prior to loading the data and install them back
again once the loading is done.
- *create indexes*
When this option is listed, pgloader gets the definitions of all the
indexes found in the MySQL database and create the same set of index
definitions against the PostgreSQL database.
- *create no indexes*
When this option is listed, pgloader skips the creating indexes.
- *drop indexes*
When this option is listed, pgloader drops the indexes in the target
database before loading the data, and creates them again at the end
of the data copy.
- *uniquify index names*, *preserve index names*
MySQL index names are unique per-table whereas in PostgreSQL index names
have to be unique per-schema. The default for pgloader is to change the
index name by prefixing it with `idx_OID` where `OID` is the internal
numeric identifier of the table the index is built against.
In somes cases like when the DDL are entirely left to a framework it
might be sensible for pgloader to refrain from handling index unique
names, that is achieved by using the *preserve index names* option.
The default is to *uniquify index names*.
Even when using the option *preserve index names*, MySQL primary key
indexes named "PRIMARY" will get their names uniquified. Failing to do
so would prevent the primary keys to be created again in PostgreSQL
where the index names must be unique per schema.
- *drop schema*
When this option is listed, pgloader drops the target schema in the
target PostgreSQL database before creating it again and all the objects
it contains. The default behavior doesn't drop the target schemas.
- *foreign keys*
When this option is listed, pgloader gets the definitions of all the
foreign keys found in the MySQL database and create the same set of
foreign key definitions against the PostgreSQL database.
- *no foreign keys*
When this option is listed, pgloader skips creating foreign keys.
- *reset sequences*
When this option is listed, at the end of the data loading and after the
indexes have all been created, pgloader resets all the PostgreSQL
sequences created to the current maximum value of the column they are
attached to.
The options *schema only* and *data only* have no effects on this
option.
- *reset no sequences*
When this option is listed, pgloader skips resetting sequences after the
load.
The options *schema only* and *data only* have no effects on this
option.
- *downcase identifiers*
When this option is listed, pgloader converts all MySQL identifiers
(table names, index names, column names) to *downcase*, except for
PostgreSQL *reserved* keywords.
The PostgreSQL *reserved* keywords are determined dynamically by using
the system function `pg_get_keywords()`.
- *quote identifiers*
When this option is listed, pgloader quotes all MySQL identifiers so
that their case is respected. Note that you will then have to do the
same thing in your application code queries.
- *schema only*
When this option is listed pgloader refrains from migrating the data
over. Note that the schema in this context includes the indexes when the
option *create indexes* has been listed.
- *data only*
When this option is listed pgloader only issues the `COPY` statements,
without doing any other processing.
- *single reader per thread*, *multiple readers per thread*
The default is *single reader per thread* and it means that each
MySQL table is read by a single thread as a whole, with a single
`SELECT` statement using no `WHERE` clause.
When using *multiple readers per thread* pgloader may be able to
divide the reading work into several threads, as many as the
*concurrency* setting, which needs to be greater than 1 for this
option to kick be activated.
For each source table, pgloader searches for a primary key over a
single numeric column, or a multiple-column primary key index for
which the first column is of a numeric data type (one of `integer`
or `bigint`). When such an index exists, pgloader runs a query to
find the *min* and *max* values on this column, and then split that
range into many ranges containing a maximum of *rows per range*.
When the range list we then obtain contains at least as many ranges
than our concurrency setting, then we distribute those ranges to
each reader thread.
So when all the conditions are met, pgloader then starts as many
reader thread as the *concurrency* setting, and each reader thread
issues several queries with a `WHERE id >= x AND id < y`, where `y -
x = rows per range` or less (for the last range, depending on the
max value just obtained.
- *rows per range*
How many rows are fetched per `SELECT` query when using *multiple
readers per thread*, see above for details.
- *SET MySQL PARAMETERS*
The *SET MySQL PARAMETERS* allows setting MySQL parameters using the
MySQL `SET` command each time pgloader connects to it.
MySQL Database Casting Rules
----------------------------
The command *CAST* introduces user-defined casting rules.
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
A casting rule is expected to follow one of the forms::
type <mysql-type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
column <table-name>.<column-name> [ <guards> ] to ...
It's possible for a *casting rule* to either match against a MySQL data type
or against a given *column name* in a given *table name*. That flexibility
allows to cope with cases where the type `tinyint` might have been used as a
`boolean` in some cases but as a `smallint` in others.
The *casting rules* are applied in order, the first match prevents following
rules to be applied, and user defined rules are evaluated first.
The supported guards are:
- *when unsigned*
The casting rule is only applied against MySQL columns of the source
type that have the keyword *unsigned* in their data type definition.
Example of a casting rule using a *unsigned* guard::
type smallint when unsigned to integer drop typemod
- *when default 'value'*
The casting rule is only applied against MySQL columns of the source
type that have given *value*, which must be a single-quoted or a
double-quoted string.
- *when typemod expression*
The casting rule is only applied against MySQL columns of the source
type that have a *typemod* value matching the given *typemod
expression*. The *typemod* is separated into its *precision* and *scale*
components.
Example of a cast rule using a *typemod* guard::
type char when (= precision 1) to char keep typemod
This expression casts MySQL `char(1)` column to a PostgreSQL column of
type `char(1)` while allowing for the general case `char(N)` will be
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
- *with extra auto_increment*
The casting rule is only applied against MySQL columns having the
*extra* column `auto_increment` option set, so that it's possible to
target e.g. `serial` rather than `integer`.
The default matching behavior, when this option isn't set, is to match
both columns with the extra definition and without.
This means that if you want to implement a casting rule that target
either `serial` or `integer` from a `smallint` definition depending on
the *auto_increment* extra bit of information from MySQL, then you need
to spell out two casting rules as following::
type smallint with extra auto_increment
to serial drop typemod keep default keep not null,
type smallint
to integer drop typemod keep default keep not null
The supported casting options are:
- *drop default*, *keep default*
When the option *drop default* is listed, pgloader drops any
existing default expression in the MySQL database for columns of the
source type from the `CREATE TABLE` statement it generates.
The spelling *keep default* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *drop not null*, *keep not null*, *set not null*
When the option *drop not null* is listed, pgloader drops any
existing `NOT NULL` constraint associated with the given source
MySQL datatype when it creates the tables in the PostgreSQL
database.
The spelling *keep not null* explicitly prevents that behaviour and
can be used to overload the default casting rules.
When the option *set not null* is listed, pgloader sets a `NOT NULL`
constraint on the target column regardless whether it has been set
in the source MySQL column.
- *drop typemod*, *keep typemod*
When the option *drop typemod* is listed, pgloader drops any
existing *typemod* definition (e.g. *precision* and *scale*) from
the datatype definition found in the MySQL columns of the source
type when it created the tables in the PostgreSQL database.
The spelling *keep typemod* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *using*
This option takes as its single argument the name of a function to
be found in the `pgloader.transforms` Common Lisp package. See above
for details.
It's possible to augment a default cast rule (such as one that
applies against `ENUM` data type for example) with a *transformation
function* by omitting entirely the `type` parts of the casting rule,
as in the following example::
column enumerate.foo using empty-string-to-null
MySQL Views Support
-------------------
MySQL views support allows pgloader to migrate view as if they were base
tables. This feature then allows for on-the-fly transformation from MySQL to
PostgreSQL, as the view definition is used rather than the base data.
MATERIALIZE VIEWS
^^^^^^^^^^^^^^^^^
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will query
the data. It's not possible to just allow for plain `SQL` because we want to
know a lot about the exact data types of each column involved in the query
output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the following
expression::
*name* `AS` `$$` *sql query* `$$`
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
the beginning of the data loading, and the resulting view will then be
dropped at the end of the data loading.
MATERIALIZE ALL VIEWS
^^^^^^^^^^^^^^^^^^^^^
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
returned by MySQL rather than asking the user to specify the list.
MySQL Partial Migration
-----------------------
INCLUDING ONLY TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to limit the tables to migrate to a sublist.
Example::
including only table names matching ~/film/, 'actor'
EXCLUDING TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to exclude table names from the migration. This filter only applies to the
result of the *INCLUDING* filter.
::
excluding table names matching ~<ory>
MySQL Encoding Support
----------------------
DECODING TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
used to force the encoding to use when processing data from MySQL. If the
data encoding known to you is different from MySQL's idea about it, this is
the option to use.
::
decoding table names matching ~/messed/, ~/encoding/ AS utf8
You can use as many such rules as you need, all with possibly different
encodings.
MySQL Schema Transformations
----------------------------
ALTER TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
(regexp or string).
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
the level of the pgloader in-memory representation of your source database
schema. In case of a name change, the mapping is kept and reused in the
*foreign key* and *index* support.
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
MySQL Migration: limitations
----------------------------
The `database` command currently only supports MySQL source database and has
the following limitations:
- Views are not migrated,
Supporting views might require implementing a full SQL parser for the
MySQL dialect with a porting engine to rewrite the SQL against
PostgreSQL, including renaming functions and changing some constructs.
While it's not theoretically impossible, don't hold your breath.
- Triggers are not migrated
The difficulty of doing so is not yet assessed.
- Of the geometric datatypes, only the `POINT` database has been covered.
The other ones should be easy enough to implement now, it's just not
done yet.
Default MySQL Casting Rules
---------------------------
When migrating from MySQL the following Casting Rules are provided:
Numbers::
type int with extra auto_increment to serial when (< precision 10)
type int with extra auto_increment to bigserial when (<= 10 precision)
type int to int when (< precision 10)
type int to bigint when (<= 10 precision)
type tinyint with extra auto_increment to serial
type smallint with extra auto_increment to serial
type mediumint with extra auto_increment to serial
type bigint with extra auto_increment to bigserial
type tinyint to boolean when (= 1 precision) using tinyint-to-boolean
type bit when (= 1 precision) to boolean drop typemod using bits-to-boolean
type bit to bit drop typemod using bits-to-hex-bitstring
type bigint when signed to bigint drop typemod
type bigint when (< 19 precision) to numeric drop typemod
type tinyint when unsigned to smallint drop typemod
type smallint when unsigned to integer drop typemod
type mediumint when unsigned to integer drop typemod
type integer when unsigned to bigint drop typemod
type tinyint to smallint drop typemod
type smallint to smallint drop typemod
type mediumint to integer drop typemod
type integer to integer drop typemod
type bigint to bigint drop typemod
type float to float drop typemod
type double to double precision drop typemod
type numeric to numeric keep typemod
type decimal to decimal keep typemod
Texts::
type char to char keep typemod using remove-null-characters
type varchar to varchar keep typemod using remove-null-characters
type tinytext to text using remove-null-characters
type text to text using remove-null-characters
type mediumtext to text using remove-null-characters
type longtext to text using remove-null-characters
Binary::
type binary to bytea using byte-vector-to-bytea
type varbinary to bytea using byte-vector-to-bytea
type tinyblob to bytea using byte-vector-to-bytea
type blob to bytea using byte-vector-to-bytea
type mediumblob to bytea using byte-vector-to-bytea
type longblob to bytea using byte-vector-to-bytea
Date::
type datetime when default "0000-00-00 00:00:00" and not null
to timestamptz drop not null drop default
using zero-dates-to-null
type datetime when default "0000-00-00 00:00:00"
to timestamptz drop default
using zero-dates-to-null
type datetime with extra on update current timestamp when not null
to timestamptz drop not null drop default
using zero-dates-to-null
type datetime with extra on update current timestamp
to timestamptz drop default
using zero-dates-to-null
type timestamp when default "0000-00-00 00:00:00" and not null
to timestamptz drop not null drop default
using zero-dates-to-null
type timestamp when default "0000-00-00 00:00:00"
to timestamptz drop default
using zero-dates-to-null
type date when default "0000-00-00" to date drop default
using zero-dates-to-null
type date to date
type datetime to timestamptz
type timestamp to timestamptz
type year to integer drop typemod
Geometric::
type geometry to point using convert-mysql-point
type point to point using convert-mysql-point
type linestring to path using convert-mysql-linestring
Enum types are declared inline in MySQL and separately with a `CREATE TYPE`
command in PostgreSQL, so each column of Enum Type is converted to a type
named after the table and column names defined with the same labels in the
same order.
When the source type definition is not matched in the default casting rules
nor in the casting rules provided in the command, then the type name with
the typemod is used.

View File

@ -1,196 +0,0 @@
PostgreSQL to Citus
===================
This command instructs pgloader to load data from a database connection.
Automatic discovery of the schema is supported, including build of the
indexes, primary and foreign keys constraints. A default set of casting
rules are provided and might be overloaded and appended to by the command.
Automatic distribution column backfilling is supported, either from commands
that specify what is the distribution column in every table, or only in the
main table, then relying on foreign key constraints to discover the other
distribution keys.
Here's a short example of migrating a database from a PostgreSQL server to
another:
::
load database
from pgsql:///hackathon
into pgsql://localhost:9700/dim
with include drop, reset no sequences
cast column impressions.seen_at to "timestamp with time zone"
distribute companies using id
-- distribute campaigns using company_id
-- distribute ads using company_id from campaigns
-- distribute clicks using company_id from ads, campaigns
-- distribute impressions using company_id from ads, campaigns
;
Everything works exactly the same way as when doing a PostgreSQL to
PostgreSQL migration, with the added fonctionality of this new `distribute`
command.
Distribute Command
^^^^^^^^^^^^^^^^^^
The distribute command syntax is as following::
distribute <table name> using <column name>
distribute <table name> using <column name> from <table> [, <table>, ...]
distribute <table name> as reference table
When using the distribute command, the following steps are added to pgloader
operations when migrating the schema:
- if the distribution column does not exist in the table, it is added as
the first column of the table
- if the distribution column does not exists in the primary key of the
table, it is added as the first column of the primary of the table
- all the foreign keys that point to the table are added the distribution
key automatically too, including the source tables of the foreign key
constraints
- once the schema has been created on the target database, pgloader then
issues Citus specific command `create_reference_table()
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-reference-table>`_
and `create_distributed_table()
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-distributed-table>`_
to make the tables distributed
Those operations are done in the schema section of pgloader, before the data
is loaded. When the data is loaded, the newly added columns need to be
backfilled from referenced data. pgloader knows how to do that by generating
a query like the following and importing the result set of such a query
rather than the raw data from the source table.
Citus Migration Example
^^^^^^^^^^^^^^^^^^^^^^^
With the migration command as above, pgloader adds the column ``company_id``
to the tables that have a direct or indirect foreign key reference to the
``companies`` table.
We run pgloader using the following command, where the file
`./test/citus/company.load
<https://github.com/dimitri/pgloader/blob/master/test/citus/company.load>`_
contains the pgloader command as shown above.
::
$ pgloader --client-min-messages sql ./test/citus/company.load
The following SQL statements are all extracted from the log messages that
the pgloader command outputs. We are going to have a look at the
`impressions` table. It gets created with a new column `company_id` in the
first position, as follows:
::
CREATE TABLE "public"."impressions"
(
company_id bigint,
"id" bigserial,
"ad_id" bigint default NULL,
"seen_at" timestamp with time zone default NULL,
"site_url" text default NULL,
"cost_per_impression_usd" numeric(20,10) default NULL,
"user_ip" inet default NULL,
"user_data" jsonb default NULL
);
The original schema for this table does not have the `company_id` column,
which means pgloader now needs to change the primary key definition, the
foreign keys constraints definitions from and to this table, and also to
*backfill* the `company_id` data to this table when doing the COPY phase of
the migration.
Then once the tables have been created, pgloader executes the following SQL
statements::
SELECT create_distributed_table('"public"."companies"', 'id');
SELECT create_distributed_table('"public"."campaigns"', 'company_id');
SELECT create_distributed_table('"public"."ads"', 'company_id');
SELECT create_distributed_table('"public"."clicks"', 'company_id');
SELECT create_distributed_table('"public"."impressions"', 'company_id');
Then when copying the data from the source PostgreSQL database to the new
Citus tables, the new column (here ``company_id``) needs to be backfilled
from the source tables. Here's the SQL query that pgloader uses as a data
source for the ``ads`` table in our example:
::
SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text,
"ads".name::text, "ads".image_url::text, "ads".target_url::text,
"ads".impressions_count::text, "ads".clicks_count::text,
"ads".created_at::text, "ads".updated_at::text
FROM "public"."ads"
JOIN "public"."campaigns"
ON ads.campaign_id = campaigns.id
The ``impressions`` table has an indirect foreign key reference to the
``company`` table, which is the table where the distribution key is
specified. pgloader will discover that itself from walking the PostgreSQL
catalogs, and you may also use the following specification in the pgloader
command to explicitely add the indirect dependency:
::
distribute impressions using company_id from ads, campaigns
Given this schema, the SQL query used by pgloader to fetch the data for the
`impressions` table is the following, implementing online backfilling of the
data:
::
SELECT "campaigns".company_id::text, "impressions".id::text,
"impressions".ad_id::text, "impressions".seen_at::text,
"impressions".site_url::text,
"impressions".cost_per_impression_usd::text,
"impressions".user_ip::text,
"impressions".user_data::text
FROM "public"."impressions"
JOIN "public"."ads"
ON impressions.ad_id = ads.id
JOIN "public"."campaigns"
ON ads.campaign_id = campaigns.id
When the data copying is done, then pgloader also has to install the indexes
supporting the primary keys, and add the foreign key definitions to the
schema. Those definitions are not the same as in the source schema, because
of the adding of the distribution column to the table: we need to also add
the column to the primary key and the foreign key constraints.
Here's the commands issued by pgloader for the ``impressions`` table:
::
CREATE UNIQUE INDEX "impressions_pkey"
ON "public"."impressions" (company_id, id);
ALTER TABLE "public"."impressions"
ADD CONSTRAINT "impressions_ad_id_fkey"
FOREIGN KEY(company_id,ad_id)
REFERENCES "public"."ads"(company_id,id)
Given a single line of specification ``distribute companies using id`` then
pgloader implements all the necessary schema changes on the fly when
migrating to Citus, and also dynamically backfills the data.
Citus Migration: Limitations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The way pgloader implements *reset sequence* does not work with Citus at
this point, so sequences need to be taken care of separately at this point.

View File

@ -1,71 +0,0 @@
Redshift to Postgres
====================
The command and behavior are the same as when migration from a PostgreSQL
database source, see :ref:`migrating_to_pgsql`. pgloader automatically
discovers that it's talking to a Redshift database by parsing the output of
the ``SELECT version()`` SQL query.
Redshift as a data source
^^^^^^^^^^^^^^^^^^^^^^^^^
Redshift is a variant of PostgreSQL version 8.0.2, which allows pgloader to
work with only a very small amount of adaptation in the catalog queries
used. In other words, migrating from Redshift to PostgreSQL works just the
same as when migrating from a PostgreSQL data source, including the
connection string specification.
Redshift as a data destination
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The Redshift variant of PostgreSQL 8.0.2 does not have support for the
``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY
with Redshift, the data must first be made available in an S3 bucket.
First, pgloader must authenticate to Amazon S3. pgloader uses the following
setup for that:
- ``~/.aws/config``
This INI formatted file contains sections with your default region and
other global values relevant to using the S3 API. pgloader parses it to
get the region when it's setup in the ``default`` INI section.
The environment variable ``AWS_DEFAULT_REGION`` can be used to override
the configuration file value.
- ``~/.aws/credentials``
The INI formatted file contains your authentication setup to Amazon,
with the properties ``aws_access_key_id`` and ``aws_secret_access_key``
in the section ``default``. pgloader parses this file for those keys,
and uses their values when communicating with Amazon S3.
The environment variables ``AWS_ACCESS_KEY_ID`` and
``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file
- ``AWS_S3_BUCKET_NAME``
Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is
used by pgloader as the name of the S3 bucket where to upload the files
to COPY to the Redshift database. The bucket name defaults to
``pgloader``.
Then pgloader works as usual, see the other sections of the documentation
for the details, depending on the data source (files, other databases, etc).
When preparing the data for PostgreSQL, pgloader now uploads each batch into
a single CSV file, and then issue such as the following, for each batch:
::
COPY <target_table_name>
FROM 's3://<s3 bucket>/<s3-filename-just-uploaded>'
FORMAT CSV
TIMEFORMAT 'auto'
REGION '<aws-region>'
ACCESS_KEY_ID '<aws-access-key-id>'
SECRET_ACCESS_KEY '<aws-secret-access-key>;
This is the only difference with a PostgreSQL core version, where pgloader
can rely on the classic ``COPY FROM STDIN`` command, which allows to send
data through the already established connection to PostgreSQL.

View File

@ -1,441 +0,0 @@
.. _migrating_to_pgsql:
Postgres to Postgres
====================
This command instructs pgloader to load data from a database connection.
Automatic discovery of the schema is supported, including build of the
indexes, primary and foreign keys constraints. A default set of casting
rules are provided and might be overloaded and appended to by the command.
For a complete Postgres to Postgres solution including Change Data Capture
support with Logical Decoding, see `pgcopydb`__.
__ https://pgcopydb.readthedocs.io/
Using default settings
----------------------
Here is the simplest command line example, which might be all you need:
::
$ pgloader pgsql://user@source/dbname pgsql://user@target/dbname
Using advanced options and a load command file
----------------------------------------------
Here's a short example of migrating a database from a PostgreSQL server to
another. The command would then be:
::
$ pgloader pg.load
And the contents of the command file ``pg.load`` could be inspired from the
following:
::
load database
from pgsql://localhost/pgloader
into pgsql://localhost/copy
including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
including only table names matching ~/geolocations/ in schema 'public'
;
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
PostgreSQL Database Source Specification: FROM
----------------------------------------------
Must be a connection URL pointing to a PostgreSQL database.
See the `SOURCE CONNECTION STRING` section above for details on how to write
the connection string.
::
pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
PostgreSQL Database Migration Options: WITH
-------------------------------------------
When loading from a `PostgreSQL` database, the following options are
supported, and the default *WITH* clause is: *no truncate*, *create schema*,
*create tables*, *include drop*, *create indexes*, *reset sequences*,
*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*.
- *include drop*
When this option is listed, pgloader drops all the tables in the target
PostgreSQL database whose names appear in the MySQL database. This
option allows for using the same command several times in a row until
you figure out all the options, starting automatically from a clean
environment. Please note that `CASCADE` is used to ensure that tables
are dropped even if there are foreign keys pointing to them. This is
precisely what `include drop` is intended to do: drop all target tables
and recreate them.
Great care needs to be taken when using `include drop`, as it will
cascade to *all* objects referencing the target tables, possibly
including other tables that are not being loaded from the source DB.
- *include no drop*
When this option is listed, pgloader will not include any `DROP`
statement when loading the data.
- *truncate*
When this option is listed, pgloader issue the `TRUNCATE` command
against each PostgreSQL table just before loading data into it.
- *no truncate*
When this option is listed, pgloader issues no `TRUNCATE` command.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring the
*foreign key constraints* and user defined triggers and may result in
invalid *foreign key constraints* once the data is loaded. Use with
care.
- *create tables*
When this option is listed, pgloader creates the table using the meta
data found in the `MySQL` file, which must contain a list of fields with
their data type. A standard data type conversion from DBF to PostgreSQL
is done.
- *create no tables*
When this option is listed, pgloader skips the creation of table before
loading data, target tables must then already exist.
Also, when using *create no tables* pgloader fetches the metadata from
the current target database and checks type casting, then will remove
constraints and indexes prior to loading the data and install them back
again once the loading is done.
- *create indexes*
When this option is listed, pgloader gets the definitions of all the
indexes found in the MySQL database and create the same set of index
definitions against the PostgreSQL database.
- *create no indexes*
When this option is listed, pgloader skips the creating indexes.
- *drop indexes*
When this option is listed, pgloader drops the indexes in the target
database before loading the data, and creates them again at the end
of the data copy.
- *reindex*
When this option is used, pgloader does both *drop indexes* before
loading the data and *create indexes* once data is loaded.
- *drop schema*
When this option is listed, pgloader drops the target schema in the
target PostgreSQL database before creating it again and all the objects
it contains. The default behavior doesn't drop the target schemas.
- *foreign keys*
When this option is listed, pgloader gets the definitions of all the
foreign keys found in the MySQL database and create the same set of
foreign key definitions against the PostgreSQL database.
- *no foreign keys*
When this option is listed, pgloader skips creating foreign keys.
- *reset sequences*
When this option is listed, at the end of the data loading and after the
indexes have all been created, pgloader resets all the PostgreSQL
sequences created to the current maximum value of the column they are
attached to.
The options *schema only* and *data only* have no effects on this
option.
- *reset no sequences*
When this option is listed, pgloader skips resetting sequences after the
load.
The options *schema only* and *data only* have no effects on this
option.
- *downcase identifiers*
When this option is listed, pgloader converts all MySQL identifiers
(table names, index names, column names) to *downcase*, except for
PostgreSQL *reserved* keywords.
The PostgreSQL *reserved* keywords are determined dynamically by using
the system function `pg_get_keywords()`.
- *quote identifiers*
When this option is listed, pgloader quotes all MySQL identifiers so
that their case is respected. Note that you will then have to do the
same thing in your application code queries.
- *schema only*
When this option is listed pgloader refrains from migrating the data
over. Note that the schema in this context includes the indexes when the
option *create indexes* has been listed.
- *data only*
When this option is listed pgloader only issues the `COPY` statements,
without doing any other processing.
- *rows per range*
How many rows are fetched per `SELECT` query when using *multiple
readers per thread*, see above for details.
PostgreSQL Database Casting Rules
---------------------------------
The command *CAST* introduces user-defined casting rules.
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
A casting rule is expected to follow one of the forms::
type <type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
column <table-name>.<column-name> [ <guards> ] to ...
It's possible for a *casting rule* to either match against a PostgreSQL data
type or against a given *column name* in a given *table name*. So it's
possible to migrate a table from a PostgreSQL database while changing and
`int` column to a `bigint` one, automatically.
The *casting rules* are applied in order, the first match prevents following
rules to be applied, and user defined rules are evaluated first.
The supported guards are:
- *when default 'value'*
The casting rule is only applied against MySQL columns of the source
type that have given *value*, which must be a single-quoted or a
double-quoted string.
- *when typemod expression*
The casting rule is only applied against MySQL columns of the source
type that have a *typemod* value matching the given *typemod
expression*. The *typemod* is separated into its *precision* and *scale*
components.
Example of a cast rule using a *typemod* guard::
type char when (= precision 1) to char keep typemod
This expression casts MySQL `char(1)` column to a PostgreSQL column of
type `char(1)` while allowing for the general case `char(N)` will be
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
- *with extra auto_increment*
The casting rule is only applied against PostgreSQL attached to a
sequence. This can be the result of doing that manually, using a
`serial` or a `bigserial` data type, or an `identity` column.
The supported casting options are:
- *drop default*, *keep default*
When the option *drop default* is listed, pgloader drops any
existing default expression in the MySQL database for columns of the
source type from the `CREATE TABLE` statement it generates.
The spelling *keep default* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *drop not null*, *keep not null*, *set not null*
When the option *drop not null* is listed, pgloader drops any
existing `NOT NULL` constraint associated with the given source
MySQL datatype when it creates the tables in the PostgreSQL
database.
The spelling *keep not null* explicitly prevents that behaviour and
can be used to overload the default casting rules.
When the option *set not null* is listed, pgloader sets a `NOT NULL`
constraint on the target column regardless whether it has been set
in the source MySQL column.
- *drop typemod*, *keep typemod*
When the option *drop typemod* is listed, pgloader drops any
existing *typemod* definition (e.g. *precision* and *scale*) from
the datatype definition found in the MySQL columns of the source
type when it created the tables in the PostgreSQL database.
The spelling *keep typemod* explicitly prevents that behaviour and
can be used to overload the default casting rules.
- *using*
This option takes as its single argument the name of a function to
be found in the `pgloader.transforms` Common Lisp package. See above
for details.
It's possible to augment a default cast rule (such as one that
applies against `ENUM` data type for example) with a *transformation
function* by omitting entirely the `type` parts of the casting rule,
as in the following example::
column enumerate.foo using empty-string-to-null
PostgreSQL Views Support
------------------------
PostgreSQL views support allows pgloader to migrate view as if they were
base tables. This feature then allows for on-the-fly transformation of the
source schema, as the view definition is used rather than the base data.
MATERIALIZE VIEWS
^^^^^^^^^^^^^^^^^
This clause allows you to implement custom data processing at the data
source by providing a *view definition* against which pgloader will query
the data. It's not possible to just allow for plain `SQL` because we want to
know a lot about the exact data types of each column involved in the query
output.
This clause expect a comma separated list of view definitions, each one
being either the name of an existing view in your database or the following
expression::
*name* `AS` `$$` *sql query* `$$`
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
the beginning of the data loading, and the resulting view will then be
dropped at the end of the data loading.
MATERIALIZE ALL VIEWS
^^^^^^^^^^^^^^^^^^^^^
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
returned by PostgreSQL rather than asking the user to specify the list.
PostgreSQL Partial Migration
----------------------------
INCLUDING ONLY TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to limit the tables to migrate to a sublist.
Example::
including only table names matching ~/film/, 'actor' in schema 'public'
EXCLUDING TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expression* used
to exclude table names from the migration. This filter only applies to the
result of the *INCLUDING* filter.
::
excluding table names matching ~<ory> in schema 'public'
PostgreSQL Schema Transformations
---------------------------------
ALTER TABLE NAMES MATCHING
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table names or *regular expressions*
that you want to target in the pgloader *ALTER TABLE* command. Available
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
IN SCHEMA 'public'
SET SCHEMA 'mv'
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'public' RENAME TO 'films'
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET (fillfactor='40')
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET TABLESPACE 'pg_default'
You can use as many such rules as you need. The list of tables to be
migrated is searched in pgloader memory against the *ALTER TABLE* matching
rules, and for each command pgloader stops at the first matching criteria
(regexp or string).
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
the level of the pgloader in-memory representation of your source database
schema. In case of a name change, the mapping is kept and reused in the
*foreign key* and *index* support.
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
command that pgloader will run when it has to create a table.
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
`CREATE TABLE` command that pgloader will run when it has to create a table.
PostgreSQL Migration: limitations
---------------------------------
The only PostgreSQL objects supported at this time in pgloader are
extensions, schema, tables, indexes and constraints. Anything else is ignored.
- Views are not migrated,
Supporting views might require implementing a full SQL parser for the
MySQL dialect with a porting engine to rewrite the SQL against
PostgreSQL, including renaming functions and changing some constructs.
While it's not theoretically impossible, don't hold your breath.
- Triggers are not migrated
The difficulty of doing so is not yet assessed.
- Stored Procedures and Functions are not migrated.
Default PostgreSQL Casting Rules
--------------------------------
When migrating from PostgreSQL the following Casting Rules are provided::
type int with extra auto_increment to serial
type bigint with extra auto_increment to bigserial
type "character varying" to text drop typemod

View File

@ -1,230 +0,0 @@
SQLite to Postgres
==================
This command instructs pgloader to load data from a SQLite file. Automatic
discovery of the schema is supported, including build of the indexes.
Using default settings
----------------------
Here is the simplest command line example, which might be all you need:
::
$ pgloader sqlite:///path/to/file.db pgsql://pguser@pghost/dbname
Using advanced options and a load command file
----------------------------------------------
The command then would be:
::
$ pgloader db.load
Here's an example of the ``db.load`` contents then::
load database
from sqlite:///Users/dim/Downloads/lastfm_tags.db
into postgresql:///tags
with include drop, create tables, create indexes, reset sequences
set work_mem to '16MB', maintenance_work_mem to '512 MB';
Common Clauses
--------------
Please refer to :ref:`common_clauses` for documentation about common
clauses.
SQLite Database Source Specification: FROM
------------------------------------------
Path or HTTP URL to a SQLite file, might be a `.zip` file.
SQLite Database Migration Options: WITH
---------------------------------------
When loading from a `SQLite` database, the following options are
supported:
When loading from a `SQLite` database, the following options are
supported, and the default *WITH* clause is: *no truncate*, *create
tables*, *include drop*, *create indexes*, *reset sequences*, *downcase
identifiers*, *encoding 'utf-8'*.
- *include drop*
When this option is listed, pgloader drops all the tables in the target
PostgreSQL database whose names appear in the SQLite database. This
option allows for using the same command several times in a row until
you figure out all the options, starting automatically from a clean
environment. Please note that `CASCADE` is used to ensure that tables
are dropped even if there are foreign keys pointing to them. This is
precisely what `include drop` is intended to do: drop all target tables
and recreate them.
Great care needs to be taken when using `include drop`, as it will
cascade to *all* objects referencing the target tables, possibly
including other tables that are not being loaded from the source DB.
- *include no drop*
When this option is listed, pgloader will not include any `DROP`
statement when loading the data.
- *truncate*
When this option is listed, pgloader issue the `TRUNCATE` command
against each PostgreSQL table just before loading data into it.
- *no truncate*
When this option is listed, pgloader issues no `TRUNCATE` command.
- *disable triggers*
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
TRIGGER ALL` command against the PostgreSQL target table before copying
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
`COPY` is done.
This option allows loading data into a pre-existing table ignoring
the *foreign key constraints* and user defined triggers and may
result in invalid *foreign key constraints* once the data is loaded.
Use with care.
- *create tables*
When this option is listed, pgloader creates the table using the meta
data found in the `SQLite` file, which must contain a list of fields
with their data type. A standard data type conversion from SQLite to
PostgreSQL is done.
- *create no tables*
When this option is listed, pgloader skips the creation of table before
loading data, target tables must then already exist.
Also, when using *create no tables* pgloader fetches the metadata
from the current target database and checks type casting, then will
remove constraints and indexes prior to loading the data and install
them back again once the loading is done.
- *create indexes*
When this option is listed, pgloader gets the definitions of all the
indexes found in the SQLite database and create the same set of index
definitions against the PostgreSQL database.
- *create no indexes*
When this option is listed, pgloader skips the creating indexes.
- *drop indexes*
When this option is listed, pgloader drops the indexes in the target
database before loading the data, and creates them again at the end
of the data copy.
- *reset sequences*
When this option is listed, at the end of the data loading and after
the indexes have all been created, pgloader resets all the
PostgreSQL sequences created to the current maximum value of the
column they are attached to.
- *reset no sequences*
When this option is listed, pgloader skips resetting sequences after the
load.
The options *schema only* and *data only* have no effects on this
option.
- *schema only*
When this option is listed pgloader will refrain from migrating the data
over. Note that the schema in this context includes the indexes when the
option *create indexes* has been listed.
- *data only*
When this option is listed pgloader only issues the `COPY` statements,
without doing any other processing.
- *encoding*
This option allows to control which encoding to parse the SQLite text
data with. Defaults to UTF-8.
SQLite Database Casting Rules
-----------------------------
The command *CAST* introduces user-defined casting rules.
The cast clause allows to specify custom casting rules, either to overload
the default casting rules or to amend them with special cases.
SQlite Database Partial Migrations
----------------------------------
INCLUDING ONLY TABLE NAMES LIKE
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table name patterns used to limit the
tables to migrate to a sublist.
Example::
including only table names like 'Invoice%'
EXCLUDING TABLE NAMES LIKE
^^^^^^^^^^^^^^^^^^^^^^^^^^
Introduce a comma separated list of table name patterns used to exclude
table names from the migration. This filter only applies to the result of
the *INCLUDING* filter.
::
excluding table names like 'appointments'
Default SQLite Casting Rules
----------------------------
When migrating from SQLite the following Casting Rules are provided:
Numbers::
type tinyint to smallint using integer-to-string
type integer to bigint using integer-to-string
type float to float using float-to-string
type real to real using float-to-string
type double to double precision using float-to-string
type numeric to numeric using float-to-string
type decimal to numeric using float-to-string
Texts::
type character to text drop typemod
type varchar to text drop typemod
type nvarchar to text drop typemod
type char to text drop typemod
type nchar to text drop typemod
type nvarchar to text drop typemod
type clob to text drop typemod
Binary::
type blob to bytea
Date::
type datetime to timestamptz using sqlite-timestamp-to-timestamp
type timestamp to timestamptz using sqlite-timestamp-to-timestamp
type timestamptz to timestamptz using sqlite-timestamp-to-timestamp

View File

@ -1,142 +0,0 @@
Transformation Functions
========================
Some data types are implemented in a different enough way that a
transformation function is necessary. This function must be written in
`Common lisp` and is searched in the `pgloader.transforms` package.
Some default transformation function are provided with pgloader, and you can
use the `--load` command line option to load and compile your own lisp file
into pgloader at runtime. For your functions to be found, remember to begin
your lisp file with the following form::
(in-package #:pgloader.transforms)
The provided transformation functions are:
- *zero-dates-to-null*
When the input date is all zeroes, return `nil`, which gets loaded as a
PostgreSQL `NULL` value.
- *date-with-no-separator*
Applies *zero-dates-to-null* then transform the given date into a format
that PostgreSQL will actually process::
In: "20041002152952"
Out: "2004-10-02 15:29:52"
- *time-with-no-separator*
Transform the given time into a format that PostgreSQL will actually
process::
In: "08231560"
Out: "08:23:15.60"
- *tinyint-to-boolean*
As MySQL lacks a proper boolean type, *tinyint* is often used to
implement that. This function transforms `0` to `'false'` and anything
else to `'true`'.
- *bits-to-boolean*
As MySQL lacks a proper boolean type, *BIT* is often used to implement
that. This function transforms 1-bit bit vectors from `0` to `f` and any
other value to `t`..
- *int-to-ip*
Convert an integer into a dotted representation of an ip4. ::
In: 18435761
Out: "1.25.78.177"
- *ip-range*
Converts a couple of integers given as strings into a range of ip4. ::
In: "16825344" "16825599"
Out: "1.0.188.0-1.0.188.255"
- *convert-mysql-point*
Converts from the `astext` representation of points in MySQL to the
PostgreSQL representation. ::
In: "POINT(48.5513589 7.6926827)"
Out: "(48.5513589,7.6926827)"
- *integer-to-string*
Converts a integer string or a Common Lisp integer into a string
suitable for a PostgreSQL integer. Takes care of quoted integers. ::
In: "\"0\""
Out: "0"
- *float-to-string*
Converts a Common Lisp float into a string suitable for a PostgreSQL float::
In: 100.0d0
Out: "100.0"
- *hex-to-dec*
Converts a string containing an hexadecimal representation of a number
into its decimal representation::
In: "deadbeef"
Out: "3735928559"
- *set-to-enum-array*
Converts a string representing a MySQL SET into a PostgreSQL Array of
Enum values from the set. ::
In: "foo,bar"
Out: "{foo,bar}"
- *empty-string-to-null*
Convert an empty string to a null.
- *right-trim*
Remove whitespace at end of string.
- *remove-null-characters*
Remove `NUL` characters (`0x0`) from given strings.
- *byte-vector-to-bytea*
Transform a simple array of unsigned bytes to the PostgreSQL bytea Hex
Format representation as documented at
http://www.postgresql.org/docs/9.3/interactive/datatype-binary.html
- *sqlite-timestamp-to-timestamp*
SQLite type system is quite interesting, so cope with it here to produce
timestamp literals as expected by PostgreSQL. That covers year only on 4
digits, 0 dates to null, and proper date strings.
- *sql-server-uniqueidentifier-to-uuid*
The SQL Server driver receives data fo type uniqueidentifier as byte
vector that we then need to convert to an UUID string for PostgreSQL
COPY input format to process.
- *unix-timestamp-to-timestamptz*
Converts a unix timestamp (number of seconds elapsed since beginning of
1970) into a proper PostgreSQL timestamp format.
- *varbinary-to-string*
Converts binary encoded string (such as a MySQL `varbinary` entry) to a
decoded text, using the table's encoding that may be overloaded with the
*DECODING TABLE NAMES MATCHING* clause.

View File

@ -1,4 +0,0 @@
Sphinx==4.2.0
sphinx_rtd_theme==1.0.0
docutils==0.16
readthedocs-sphinx-search==0.1.0

View File

@ -1,131 +0,0 @@
Loading SQLite files with pgloader
----------------------------------
The SQLite database is a respected solution to manage your data with. Its
embeded nature makes it a source of migrations when a projects now needs to
handle more concurrency, which `PostgreSQL`__ is very good at. pgloader can help
you there.
__ http://www.postgresql.org/
In a Single Command Line
^^^^^^^^^^^^^^^^^^^^^^^^
You can ::
$ createdb chinook
$ pgloader https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite pgsql:///chinook
Done! All with the schema, data, constraints, primary keys and foreign keys,
etc. We also see an error with the Chinook schema that contains several
primary key definitions against the same table, which is not accepted by
PostgreSQL::
2017-06-20T16:18:59.019000+02:00 LOG Data errors in '/private/tmp/pgloader/'
2017-06-20T16:18:59.236000+02:00 LOG Fetching 'https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite'
2017-06-20T16:19:00.664000+02:00 ERROR Database error 42P16: multiple primary keys for table "playlisttrack" are not allowed
QUERY: ALTER TABLE playlisttrack ADD PRIMARY KEY USING INDEX idx_66873_sqlite_autoindex_playlisttrack_1;
2017-06-20T16:19:00.665000+02:00 LOG report summary reset
table name read imported errors total time
----------------------- --------- --------- --------- --------------
fetch 0 0 0 0.877s
fetch meta data 33 33 0 0.033s
Create Schemas 0 0 0 0.003s
Create SQL Types 0 0 0 0.006s
Create tables 22 22 0 0.043s
Set Table OIDs 11 11 0 0.012s
----------------------- --------- --------- --------- --------------
album 347 347 0 0.023s
artist 275 275 0 0.023s
customer 59 59 0 0.021s
employee 8 8 0 0.018s
invoice 412 412 0 0.031s
genre 25 25 0 0.021s
invoiceline 2240 2240 0 0.034s
mediatype 5 5 0 0.025s
playlisttrack 8715 8715 0 0.040s
playlist 18 18 0 0.016s
track 3503 3503 0 0.111s
----------------------- --------- --------- --------- --------------
COPY Threads Completion 33 33 0 0.313s
Create Indexes 22 22 0 0.160s
Index Build Completion 22 22 0 0.027s
Reset Sequences 0 0 0 0.017s
Primary Keys 12 0 1 0.013s
Create Foreign Keys 11 11 0 0.040s
Create Triggers 0 0 0 0.000s
Install Comments 0 0 0 0.000s
----------------------- --------- --------- --------- --------------
Total import time 15607 15607 0 1.669s
You may need to have special cases to take care of tho. In advanced case you
can use the pgloader command.
The Command
^^^^^^^^^^^
To load data with pgloader you need to define in a *command* the operations in
some details. Here's our command::
load database
from 'sqlite/Chinook_Sqlite_AutoIncrementPKs.sqlite'
into postgresql:///pgloader
with include drop, create tables, create indexes, reset sequences
set work_mem to '16MB', maintenance_work_mem to '512 MB';
Note that here pgloader will benefit from the meta-data information found in
the SQLite file to create a PostgreSQL database capable of hosting the data
as described, then load the data.
Loading the data
^^^^^^^^^^^^^^^^
Let's start the `pgloader` command with our `sqlite.load` command file::
$ pgloader sqlite.load
... LOG Starting pgloader, log system is ready.
... LOG Parsing commands from file "/Users/dim/dev/pgloader/test/sqlite.load"
... WARNING Postgres warning: table "album" does not exist, skipping
... WARNING Postgres warning: table "artist" does not exist, skipping
... WARNING Postgres warning: table "customer" does not exist, skipping
... WARNING Postgres warning: table "employee" does not exist, skipping
... WARNING Postgres warning: table "genre" does not exist, skipping
... WARNING Postgres warning: table "invoice" does not exist, skipping
... WARNING Postgres warning: table "invoiceline" does not exist, skipping
... WARNING Postgres warning: table "mediatype" does not exist, skipping
... WARNING Postgres warning: table "playlist" does not exist, skipping
... WARNING Postgres warning: table "playlisttrack" does not exist, skipping
... WARNING Postgres warning: table "track" does not exist, skipping
table name read imported errors time
---------------------- --------- --------- --------- --------------
create, truncate 0 0 0 0.052s
Album 347 347 0 0.070s
Artist 275 275 0 0.014s
Customer 59 59 0 0.014s
Employee 8 8 0 0.012s
Genre 25 25 0 0.018s
Invoice 412 412 0 0.032s
InvoiceLine 2240 2240 0 0.077s
MediaType 5 5 0 0.012s
Playlist 18 18 0 0.008s
PlaylistTrack 8715 8715 0 0.071s
Track 3503 3503 0 0.105s
index build completion 0 0 0 0.000s
---------------------- --------- --------- --------- --------------
Create Indexes 20 20 0 0.279s
reset sequences 0 0 0 0.043s
---------------------- --------- --------- --------- --------------
Total streaming time 15607 15607 0 0.476s
We can see that `pgloader <http://pgloader.io>`_ did download the file from
its HTTP URL location then *unziped* it before loading it.
Also, the *WARNING* messages we see here are expected as the PostgreSQL
database is empty when running the command, and pgloader is using the SQL
commands `DROP TABLE IF EXISTS` when the given command uses the `include
drop` option.
Note that the output of the command has been edited to facilitate its
browsing online.

View File

@ -1,9 +0,0 @@
Pgloader Tutorial
=================
.. include:: csv.rst
.. include:: fixed.rst
.. include:: geolite.rst
.. include:: dBase.rst
.. include:: sqlite.rst
.. include:: mysql.rst

2765
pgloader.1 Normal file

File diff suppressed because it is too large Load Diff

2107
pgloader.1.md Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,287 +1,197 @@
;;;; pgloader.asd ;;;; pgloader.asd
(asdf:defsystem #:pgloader (asdf:defsystem #:pgloader
:serial t :serial t
:description "Load data into PostgreSQL" :description "Load data into PostgreSQL"
:author "Dimitri Fontaine <dim@tapoueh.org>" :author "Dimitri Fontaine <dimitri@2ndQuadrant.fr>"
:license "The PostgreSQL Licence" :license "The PostgreSQL Licence"
:depends-on (#:uiop ; host system integration :depends-on (#:uiop ; host system integration
#:cl-log ; logging #:cl-log ; logging
#:postmodern ; PostgreSQL protocol implementation #:postmodern ; PostgreSQL protocol implementation
#:cl-postgres ; low level bits for COPY streaming #:cl-postgres ; low level bits for COPY streaming
#:simple-date ; FIXME: recheck dependency #:simple-date ; FIXME: recheck dependency
#:qmynd ; MySQL protocol implemenation #:qmynd ; MySQL protocol implemenation
#:split-sequence ; some parsing is made easy #:split-sequence ; some parsing is made easy
#:cl-csv ; full CSV reader #:cl-csv ; full CSV reader
#:cl-fad ; file and directories #:cl-fad ; file and directories
#:lparallel ; threads, workers, queues #:lparallel ; threads, workers, queues
#:esrap ; parser generator #:esrap ; parser generator
#:alexandria ; utils #:alexandria ; utils
#:drakma ; http client, download archives #:drakma ; http client, download archives
#:flexi-streams ; streams #:flexi-streams ; streams
#:usocket ; UDP / syslog #:usocket ; UDP / syslog
#:local-time ; UDP date parsing #:local-time ; UDP date parsing
#:command-line-arguments ; for the main function #:command-line-arguments ; for the main function
#:db3 ; DBF version 3 file reader #:abnf ; ABNF parser generator (for syslog)
#:ixf ; IBM IXF file format reader #:db3 ; DBF version 3 file reader
#:py-configparser ; Read old-style INI config files #:ixf ; IBM IXF file format reader
#:sqlite ; Query a SQLite file #:py-configparser ; Read old-style INI config files
#:cl-base64 ; Decode base64 data #:sqlite ; Query a SQLite file
#:trivial-backtrace ; For --debug cli usage #:cl-base64 ; Decode base64 data
#:cl-markdown ; To produce the website #:trivial-backtrace ; For --debug cli usage
#:metabang-bind ; the bind macro #:cl-markdown ; To produce the website
#:mssql ; M$ SQL connectivity #:metabang-bind ; the bind macro
#:uuid ; Transforming MS SQL unique identifiers #:mssql ; M$ SQL connectivity
#:quri ; decode URI parameters #:uuid ; Transforming MS SQL unique identifiers
#:cl-ppcre ; Perl Compatible Regular Expressions )
#:cl-mustache ; Logic-less templates :components
#:yason ; JSON routines ((:module "src"
#:closer-mop ; introspection :components
#:zs3 ; integration with AWS S3 for Redshift ((:file "params")
) (:file "package" :depends-on ("params"))
:components (:file "queue" :depends-on ("params" "package"))
((:module "src"
:components
((:file "params")
(:file "package" :depends-on ("params"))
(:module "monkey" (:module "monkey"
:components :components
((:file "bind") ((:file "bind")
(:file "mssql"))) (:file "mssql")))
(:module "utils" (:module "utils"
:depends-on ("package" "params") :depends-on ("package" "params")
:components :components
((:file "charsets") ((:file "charsets")
(:file "logs") (:file "threads")
(:file "utils") (:file "logs")
(:file "state") (:file "monitor" :depends-on ("logs"))
(:file "state")
(:file "report" :depends-on ("state"))
(:file "utils" :depends-on ("charsets" "monitor"))
(:file "archive" :depends-on ("logs"))
;; user defined transforms package and pgloader ;; those are one-package-per-file
;; provided ones (:file "transforms")
(:file "transforms") (:file "read-sql-files")))
;; PostgreSQL related utils ;; generic connection api
(:file "read-sql-files") (:file "connection" :depends-on ("utils"))
(:file "queries")
(:file "quoting" :depends-on ("utils"))
(:file "catalog" :depends-on ("quoting"))
(:file "alter-table" :depends-on ("catalog"))
(:file "citus" :depends-on ("catalog"))
;; State, monitoring, reporting ;; package pgloader.pgsql
(:file "reject" :depends-on ("state")) (:module pgsql
(:file "pretty-print-state" :depends-on ("state")) :depends-on ("package" "params" "utils" "connection")
(:file "report" :depends-on ("state" :components
"pretty-print-state" ((:file "copy-format")
"utils" (:file "queries")
"catalog")) (:file "schema")
(:file "monitor" :depends-on ("logs" (:file "pgsql"
"state" :depends-on ("copy-format"
"reject" "queries"
"report")) "schema"))))
(:file "threads" :depends-on ("monitor"))
(:file "archive" :depends-on ("monitor"))
;; generic connection api (:module "parsers"
(:file "connection" :depends-on ("monitor" :depends-on ("params" "package" "utils"
"archive")))) "pgsql" "monkey" "connection")
:serial t
:components
((:file "parse-ini")
(:file "command-utils")
(:file "command-keywords")
(:file "command-regexp")
(:file "command-db-uri")
(:file "command-source")
(:file "command-options")
(:file "command-sql-block")
(:file "command-csv")
(:file "command-ixf")
(:file "command-fixed")
(:file "command-copy")
(:file "command-dbf")
(:file "command-cast-rules")
(:file "command-mysql")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-archive")
(:file "command-parser")
(:file "date-format")))
;; package pgloader.pgsql ;; Source format specific implementations
(:module pgsql (:module sources
:depends-on ("package" "params" "utils") :depends-on ("monkey" ; mssql driver patches
:serial t "params"
:components "package"
((:file "connection") "connection"
(:file "pgsql-ddl") "pgsql"
(:file "pgsql-ddl-citus") "utils"
(:file "pgsql-schema") "parsers"
(:file "merge-catalogs" :depends-on ("pgsql-schema")) "queue")
(:file "pgsql-trigger") :components
(:file "pgsql-index-filter") ((:module "common"
(:file "pgsql-finalize-catalogs") :components
(:file "pgsql-create-schema" ((:file "api")
:depends-on ("pgsql-trigger")))) (:file "casting-rules")
(:file "files-and-pathnames")
(:file "project-fields")))
;; Source format specific implementations (:module "csv"
(:module sources :depends-on ("common")
:depends-on ("monkey" ; mssql driver patches :components
"params" ((:file "csv-guess")
"package" (:file "csv-database")
"pgsql" (:file "csv")))
"utils")
:components
((:module "common"
:serial t
:components
((:file "api")
(:file "methods")
(:file "md-methods")
(:file "matviews")
(:file "casting-rules")
(:file "files-and-pathnames")
(:file "project-fields")))
(:module "csv" (:file "fixed"
:depends-on ("common") :depends-on ("common" "csv"))
:components
((:file "csv-guess")
;; (:file "csv-database")
(:file "csv")))
(:module "fixed" (:file "copy"
:depends-on ("common") :depends-on ("common" "csv"))
:serial t
:components
((:file "fixed-guess")
(:file "fixed")))
(:file "copy" (:module "db3"
:depends-on ("common" "csv")) :depends-on ("common")
:components
((:file "db3-schema")
(:file "db3" :depends-on ("db3-schema"))))
(:module "db3" (:module "ixf"
:serial t :depends-on ("common")
:depends-on ("common" "csv") :components
:components ((:file "ixf-schema")
((:file "db3-cast-rules") (:file "ixf" :depends-on ("ixf-schema"))))
(:file "db3-connection")
(:file "db3-schema")
(:file "db3")))
(:module "ixf" ;(:file "syslog") ; experimental...
:serial t
:depends-on ("common")
:components
((:file "ixf-cast-rules")
(:file "ixf-connection")
(:file "ixf-schema")
(:file "ixf" :depends-on ("ixf-schema"))))
;(:file "syslog") ; experimental... (:module "sqlite"
:depends-on ("common")
:components
((:file "sqlite-cast-rules")
(:file "sqlite-schema"
:depends-on ("sqlite-cast-rules"))
(:file "sqlite"
:depends-on ("sqlite-cast-rules"
"sqlite-schema"))))
(:module "sqlite" (:module "mssql"
:serial t :depends-on ("common")
:depends-on ("common") :components
:components ((:file "mssql-cast-rules")
((:file "sqlite-cast-rules") (:file "mssql-schema"
(:file "sqlite-connection") :depends-on ("mssql-cast-rules"))
(:file "sqlite-schema") (:file "mssql"
(:file "sqlite"))) :depends-on ("mssql-cast-rules"
"mssql-schema"))))
(:module "mssql" (:module "mysql"
:serial t :depends-on ("common")
:depends-on ("common") :components
:components ((:file "mysql-cast-rules")
((:file "mssql-cast-rules") (:file "mysql-schema"
(:file "mssql-connection") :depends-on ("mysql-cast-rules"))
(:file "mssql-schema") (:file "mysql-csv"
(:file "mssql") :depends-on ("mysql-schema"))
(:file "mssql-index-filters"))) (:file "mysql"
:depends-on ("mysql-cast-rules"
"mysql-schema"))))))
(:module "mysql" ;; the main entry file, used when building a stand-alone
:serial t ;; executable image
:depends-on ("common") (:file "main" :depends-on ("params"
:components "package"
((:file "mysql-cast-rules") "utils"
(:file "mysql-connection") "parsers"
(:file "mysql-schema") "sources"))))
(:file "mysql")))
(:module "pgsql" ;; to produce the website
:serial t (:module "web"
:depends-on ("common") :components
:components ((:file "pgsql-cast-rules") ((:module src
(:file "pgsql"))))) :components
((:file "docs")))))))
;; package pgloader.copy
(:module "pg-copy"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources")
:serial t
:components
((:file "copy-batch")
(:file "copy-format")
(:file "copy-db-write")
(:file "copy-rows-in-stream")
(:file "copy-rows-in-batch")
(:file "copy-rows-in-batch-through-s3")
(:file "copy-retry-batch")
(:file "copy-from-queue")))
(:module "load"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources")
:serial t
:components
((:file "api")
(:file "copy-data")
(:file "load-file")
(:file "migrate-database")))
(:module "parsers"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources"
"monkey")
:serial t
:components
((:file "parse-ini")
(:file "template")
(:file "command-utils")
(:file "command-keywords")
(:file "command-regexp")
(:file "parse-pgpass")
(:file "command-db-uri")
(:file "command-source")
(:file "command-options")
(:file "command-sql-block")
(:file "command-sexp")
(:file "command-csv")
(:file "command-ixf")
(:file "command-fixed")
(:file "command-copy")
(:file "command-dbf")
(:file "command-cast-rules")
(:file "command-materialize-views")
(:file "command-alter-table")
(:file "command-distribute")
(:file "command-mysql")
(:file "command-including-like")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-pgsql")
(:file "command-archive")
(:file "command-parser")
(:file "parse-sqlite-type-name")
(:file "date-format")))
;; the main entry file, used when building a stand-alone
;; executable image
(:file "api" :depends-on ("params"
"package"
"utils"
"parsers"
"sources"))
(:module "regress"
:depends-on ("params" "package" "utils" "pgsql" "api")
:components ((:file "regress")))
(:file "main" :depends-on ("params"
"package"
"utils"
"parsers"
"sources"
"api"
"regress"))))))

24
pgloader.lisp Executable file
View File

@ -0,0 +1,24 @@
#!/bin/sh
#|
exec sbcl --script "$0" $@
|#
;;; load the necessary components then parse the command line
;;; and launch the work
#-quicklisp
(let ((quicklisp-init (merge-pathnames "quicklisp/setup.lisp"
(user-homedir-pathname))))
(when (probe-file quicklisp-init)
(load quicklisp-init)))
;; now is the time to load our Quicklisp project
(format t "Loading quicklisp and the pgloader project and its dependencies...")
(terpri)
(with-output-to-string (*standard-output*)
(ql:quickload '(:pgloader)))
(in-package #:pgloader)
;;; actually call the main function, too
(main SB-EXT:*POSIX-ARGV*)

View File

@ -1,22 +1,11 @@
Summary: extract, transform and load data into PostgreSQL Summary: extract, transform and load data into PostgreSQL
Name: pgloader Name: pgloader
Version: 3.6.10 Version: 3.2.0
Release: 22%{?dist} Release: 22%{?dist}
License: The PostgreSQL Licence License: The PostgreSQL Licence
Group: System Environment/Base Group: System Environment/Base
Source: %{name}-%{version}.tar.gz
URL: https://github.com/dimitri/pgloader URL: https://github.com/dimitri/pgloader
Source0: %{url}/archive/v%{version}.tar.gz
BuildRequires: sbcl
BuildRequires: freetds-devel
BuildRequires: openssl-devel
BuildRequires: sqlite-devel
BuildRequires: zlib-devel
Requires: freetds
Requires: openssl-devel
Requires: sbcl
Requires: zlib
Requires: sqlite
%description %description
pgloader imports data from different kind of sources and COPY it into pgloader imports data from different kind of sources and COPY it into
@ -33,7 +22,7 @@ PostgreSQL. In the MySQL case it's possible to edit CASTing rules from the
pgloader command directly. pgloader command directly.
%prep %prep
%setup -q -n %{name}-%{version} %setup -q -n %{name}
%build %build
%define debug_package %{nil} %define debug_package %{nil}
@ -46,23 +35,11 @@ mkdir -p $RPM_BUILD_ROOT/etc/prelink.conf.d
echo '-b /usr/bin/pgloader' > $RPM_BUILD_ROOT/etc/prelink.conf.d/%{name}.conf echo '-b /usr/bin/pgloader' > $RPM_BUILD_ROOT/etc/prelink.conf.d/%{name}.conf
%files %files
%doc README.md %doc README.md pgloader.1.md
%{_bindir}/* %{_bindir}/*
/etc/prelink.conf.d/%{name}.conf /etc/prelink.conf.d/%{name}.conf
%changelog %changelog
* Sun Mar 22 2020 Michał "phoe" Herda <phoe@disroot.org> - 3.6.2
- Release 3.6.2
* Tue Sep 24 2019 Phil Ingram <pingram.au@gmail.com> - 3.6.1
- Release 3.6.1
- Use Requires and BuildRequires
- Variablise Source0
- Fix Files
* Thu Jan 22 2015 Dimitri Fontaine <dimitri@2ndQuadrant.fr> - 3.2.1.preview-22
- Release 3.2.1.preview
* Thu Jan 15 2015 Dimitri Fontaine <dimitri@2ndQuadrant.fr> - 3.2.0-22 * Thu Jan 15 2015 Dimitri Fontaine <dimitri@2ndQuadrant.fr> - 3.2.0-22
- Release 3.2.0 - Release 3.2.0
@ -74,9 +51,9 @@ echo '-b /usr/bin/pgloader' > $RPM_BUILD_ROOT/etc/prelink.conf.d/%{name}.conf
* Tue Apr 29 2014 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.99 * Tue Apr 29 2014 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.99
- Assorted fixes, release candidate 9 - Assorted fixes, release candidate 9
* Mon Dec 23 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.98 * Thu Dec 23 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.98
- Assorted fixes, release candidate 8 - Assorted fixes, release candidate 8
* Sun Dec 15 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.97 * Wed Dec 15 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.97
- Assorted fixes, release candidate 7 - Assorted fixes, release candidate 7
* Tue Dec 10 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.96 * Tue Dec 10 2013 Dimitri Fontaine <dimitri@2ndQuadrant.fr> 3.0.96
- Package as an RPM - Package as an RPM

View File

@ -1,278 +0,0 @@
;;;
;;; The main API, or an attempt at providing pgloader as a lisp usable API
;;; rather than only an end-user program.
;;;
(in-package #:pgloader)
(define-condition source-definition-error (error)
((mesg :initarg :mesg :reader source-definition-error-mesg))
(:report (lambda (err stream)
(format stream "~a" (source-definition-error-mesg err)))))
(define-condition cli-parsing-error (error) ()
(:report (lambda (err stream)
(declare (ignore err))
(format stream "Could not parse the command line: see above."))))
(define-condition load-files-not-found-error (error)
((filename-list :initarg :filename-list))
(:report (lambda (err stream)
(format stream
;; start lines with 3 spaces because of trivial-backtrace
"~{No such file or directory: ~s~^~% ~}"
(slot-value err 'filename-list)))))
;;;
;;; Helper functions to actually do things
;;;
(defun process-command-file (filename-list &key (flush-summary t))
"Process each FILENAME in FILENAME-LIST as a pgloader command
file (.load)."
(loop :for filename :in filename-list
:for truename := (probe-file filename)
:unless truename :collect filename :into not-found-list
:do (if truename
(run-commands truename
:start-logger nil
:flush-summary flush-summary)
(log-message :error "Can not find file: ~s" filename))
:finally (when not-found-list
(error 'load-files-not-found-error :filename-list not-found-list))))
(defun process-source-and-target (source-string target-string
&optional
type encoding set with field cast
before after)
"Given exactly 2 CLI arguments, process them as source and target URIs.
Parameters here are meant to be already parsed, see parse-cli-optargs."
(let* ((type (handler-case
(parse-cli-type type)
(condition (e)
(log-message :warning
"Could not parse --type ~s: ~a"
type e))))
(source-uri (handler-case
(if type
(parse-source-string-for-type type source-string)
(parse-source-string source-string))
(condition (e)
(log-message :warning
"Could not parse source string ~s: ~a"
source-string e))))
(type (when (and source-string
(typep source-uri 'connection))
(parse-cli-type (conn-type source-uri))))
(target-uri (handler-case
(parse-target-string target-string)
(condition (e)
(log-message :error
"Could not parse target string ~s: ~a"
target-string e)))))
;; some verbosity about the parsing "magic"
(log-message :info " SOURCE: ~s" source-string)
(log-message :info "SOURCE URI: ~s" source-uri)
(log-message :info " TARGET: ~s" target-string)
(log-message :info "TARGET URI: ~s" target-uri)
(cond ((and (null source-uri) (null target-uri))
(process-command-file (list source-string target-string)))
((or (null source-string) (null source-uri))
(log-message :fatal
"Failed to parse ~s as a source URI." source-string)
(log-message :log "You might need to use --type."))
((or (null target-string) (null target-uri))
(log-message :fatal
"Failed to parse ~s as a PostgreSQL database URI."
target-string)))
(let* ((nb-errors 0)
(options (handler-case
(parse-cli-options type with)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse --with ~s:" with)
(log-message :error "~a" e))))
(fields (handler-case
(parse-cli-fields type field)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse --fields ~s:" field)
(log-message :error "~a" e)))))
(destructuring-bind (&key encoding gucs casts before after)
(loop :for (keyword option user-string parse-fn)
:in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding)
(:gucs "--set" ,set ,#'parse-cli-gucs)
(:casts "--cast" ,cast ,#'parse-cli-casts)
(:before "--before" ,before ,#'parse-sql-file)
(:after "--after" ,after ,#'parse-sql-file))
:append (list keyword
(handler-case
(funcall parse-fn user-string)
(condition (e)
(incf nb-errors)
(log-message :error "Could not parse ~a ~s: ~a"
option user-string e)))))
(unless (= 0 nb-errors)
(error 'cli-parsing-error))
;; so, we actually have all the specs for the
;; job on the command line now.
(when (and source-uri target-uri (= 0 nb-errors))
(load-data :from source-uri
:into target-uri
:encoding encoding
:options options
:gucs gucs
:fields fields
:casts casts
:before before
:after after
:start-logger nil))))))
;;;
;;; Helper function to run a given command
;;;
(defun run-commands (source
&key
(start-logger t)
(flush-summary t)
((:summary *summary-pathname*) *summary-pathname*)
((:log-filename *log-filename*) *log-filename*)
((:log-min-messages *log-min-messages*) *log-min-messages*)
((:client-min-messages *client-min-messages*) *client-min-messages*))
"SOURCE can be a function, which is run, a list, which is compiled as CL
code then run, a pathname containing one or more commands that are parsed
then run, or a commands string that is then parsed and each command run."
(with-monitor (:start-logger start-logger)
(let* ((*print-circle* nil)
(funcs
(typecase source
(function (list source))
(list (list (compile-lisp-command source)))
(pathname (mapcar #'compile-lisp-command
(parse-commands-from-file source)))
(t (mapcar #'compile-lisp-command
(if (probe-file source)
(parse-commands-from-file source)
(parse-commands source)))))))
(loop :for func :in funcs
:do (funcall func)
:do (when flush-summary
(flush-summary :reset t))))))
(defun compile-lisp-command (source)
"SOURCE must be lisp source code, a list form."
(let (function warnings-p failure-p notes)
;; capture the compiler notes and warnings
(setf notes
(with-output-to-string (stream)
(let ((*standard-output* stream)
(*error-output* stream)
(*trace-output* stream))
(with-compilation-unit (:override t)
(setf (values function warnings-p failure-p)
(compile nil source))))))
;; log the captured compiler output at the DEBUG level
(when (and notes (string/= notes ""))
(let ((pp-source (with-output-to-string (s) (pprint source s))))
(log-message :debug "While compiling:~%~a~%~a" pp-source notes)))
;; and signal an error if we failed to compile our lisp code
(cond
(failure-p (error "Failed to compile code: ~a~%~a" source notes))
(warnings-p function)
(t function))))
;;;
;;; Main API to use from outside of pgloader.
;;;
(defun load-data (&key ((:from source)) ((:into target))
encoding fields target-table-name
options gucs casts before after
(start-logger t) (flush-summary t))
"Load data from SOURCE into TARGET."
(declare (type connection source)
(type pgsql-connection target))
(when (and (typep source (or 'csv-connection
'copy-connection
'fixed-connection))
(null target-table-name)
(null (pgconn-table-name target)))
(error 'source-definition-error
:mesg (format nil
"~a data source require a table name target."
(conn-type source))))
(with-monitor (:start-logger start-logger)
(when (and casts (not (member (type-of source)
'(sqlite-connection
mysql-connection
mssql-connection))))
(log-message :log "Cast rules are ignored for this sources."))
;; now generates the code for the command
(log-message :debug "LOAD DATA FROM ~s" source)
(let* ((target-table-name (or target-table-name
(pgconn-table-name target)))
(code (lisp-code-for-loading :from source
:into target
:encoding encoding
:fields fields
:target-table-name target-table-name
:options options
:gucs gucs
:casts casts
:before before
:after after)))
(run-commands (process-relative-pathnames (uiop:getcwd) code)
:start-logger nil
:flush-summary flush-summary))))
(defvar *get-code-for-source*
(list (cons 'copy-connection #'lisp-code-for-loading-from-copy)
(cons 'fixed-connection #'lisp-code-for-loading-from-fixed)
(cons 'csv-connection #'lisp-code-for-loading-from-csv)
(cons 'dbf-connection #'lisp-code-for-loading-from-dbf)
(cons 'ixf-connection #'lisp-code-for-loading-from-ixf)
(cons 'sqlite-connection #'lisp-code-for-loading-from-sqlite)
(cons 'mysql-connection #'lisp-code-for-loading-from-mysql)
(cons 'mssql-connection #'lisp-code-for-loading-from-mssql)
(cons 'pgsql-connection #'lisp-code-for-loading-from-pgsql))
"Each source type might require a different set of options.")
(defun lisp-code-for-loading (&key
((:from source)) ((:into target))
encoding fields target-table-name
options gucs casts before after)
(let ((func (cdr (assoc (type-of source) *get-code-for-source*))))
;; not all functions support the same set of &key parameters,
;; they all have &allow-other-keys in their signature tho.
(assert (not (null func)))
(if func
(funcall func
source
target
:target-table-name target-table-name
:fields fields
:encoding (or encoding :default)
:gucs gucs
:casts casts
:options options
:before before
:after after
:allow-other-keys t))))

View File

@ -1,11 +1,8 @@
;; ;;
;; Abstract classes to define the API to connect to a data source ;; Abstrat classes to define the API to connect to a data source
;; ;;
(in-package :pgloader.connection) (in-package :pgloader.connection)
;;;
;;; Generic API
;;;
(defclass connection () (defclass connection ()
((type :initarg :type :accessor conn-type) ((type :initarg :type :accessor conn-type)
(handle :initarg :conn :accessor conn-handle :initform nil)) (handle :initarg :conn :accessor conn-handle :initform nil))
@ -21,29 +18,12 @@
(defgeneric close-connection (connection) (defgeneric close-connection (connection)
(:documentation "Close a connection to the data source.")) (:documentation "Close a connection to the data source."))
(defgeneric check-connection (connection)
(:documentation "Check that we can actually connect."))
(defgeneric clone-connection (connection)
(:documentation "Instanciate a new connection object with similar properties."))
;;;
;;; File based objects
;;;
(defclass fd-connection (connection) (defclass fd-connection (connection)
((uri :initarg :uri :accessor fd-uri) ((uri :initarg :uri :accessor fd-uri)
(arch :initarg :arch :accessor fd-arch) (arch :initarg :arch :accessor fd-arch)
(path :initarg :path :accessor fd-path)) (path :initarg :path :accessor fd-path))
(:documentation "pgloader connection parameters for a file based data source.")) (:documentation "pgloader connection parameters for a file based data source."))
(defmethod clone-connection ((fd fd-connection))
(let ((clone (make-instance 'fd-connection :type (conn-type fd))))
(loop :for slot :in '(uri arch path)
:do (when (slot-boundp fd slot)
(setf (slot-value clone slot) (slot-value fd slot))))
clone))
(define-condition fd-connection-error (connection-error) (define-condition fd-connection-error (connection-error)
((path :initarg :path :reader connection-error-path)) ((path :initarg :path :reader connection-error-path))
(:report (lambda (err stream) (:report (lambda (err stream)
@ -64,10 +44,10 @@
(format stream "~a://~a" type url))))) (format stream "~a://~a" type url)))))
(defgeneric fetch-file (fd-connection) (defgeneric fetch-file (fd-connection)
(:documentation "Support for HTTP URI for files.")) (:documentation "Suport for HTTP URI for files."))
(defgeneric expand (fd-connection) (defgeneric expand (fd-connection)
(:documentation "Support for file archives.")) (:documentation "Suport for file archives."))
(defmethod expand ((fd fd-connection)) (defmethod expand ((fd fd-connection))
"Expand the archive for the FD connection." "Expand the archive for the FD connection."
@ -89,9 +69,6 @@
(setf (fd-path fd) local-filename)))) (setf (fd-path fd) local-filename))))
fd) fd)
;;;
;;; database connections
;;;
(defclass db-connection (connection) (defclass db-connection (connection)
((name :initarg :name :accessor db-name) ((name :initarg :name :accessor db-name)
(host :initarg :host :accessor db-host) (host :initarg :host :accessor db-host)
@ -100,24 +77,10 @@
(pass :initarg :pass :accessor db-pass)) (pass :initarg :pass :accessor db-pass))
(:documentation "pgloader connection parameters for a database service.")) (:documentation "pgloader connection parameters for a database service."))
(defmethod clone-connection ((c db-connection))
(make-instance 'db-connection
:type (conn-type c)
:name (db-name c)
:host (db-host c)
:port (db-port c)
:user (db-user c)
:pass (db-pass c)))
(defmethod print-object ((c db-connection) stream) (defmethod print-object ((c db-connection) stream)
(print-unreadable-object (c stream :type t :identity t) (print-unreadable-object (c stream :type t :identity t)
(with-slots (type name host port user) c (with-slots (type name host port user) c
(let ((host (typecase host (format stream "~a://~a@~a:~a/~a" type user host port name))))
(cons (format nil "~a:~a"
(string-downcase (car host))
(cdr host)))
(t host))))
(format stream "~a://~a@~a:~a/~a" type user host port name)))))
(define-condition db-connection-error (connection-error) (define-condition db-connection-error (connection-error)
((host :initarg :host :reader connection-error-host) ((host :initarg :host :reader connection-error-host)
@ -131,29 +94,14 @@
(connection-error-user err) (connection-error-user err)
(connection-error-mesg err))))) (connection-error-mesg err)))))
(defgeneric query (db-connection sql &key) (defmacro with-connection ((var connection) &body forms)
(:documentation "Query DB-CONNECTION with SQL query"))
;;;
;;; Tools for every connection classes
;;;
(defmacro with-connection ((var connection &rest args &key &allow-other-keys)
&body forms)
"Connect to DB-CONNECTION and handle any condition when doing so, and when "Connect to DB-CONNECTION and handle any condition when doing so, and when
connected execute FORMS in a protected way so that we always disconnect connected execute FORMS in a protected way so that we always disconnect
at the end." at the end."
(let ((conn (gensym "conn"))) (let ((conn (gensym "conn")))
`(let* ((,conn ,connection) `(let* ((,conn ,connection)
(,var (handler-case (,var (handler-case
;; in some cases (client_min_messages set to debug5 (open-connection ,conn)
;; for example), PostgreSQL might send us some
;; WARNINGs already when opening a new connection
(handler-bind ((cl-postgres:postgresql-warning
#'(lambda (w)
(log-message :warning "~a" w)
(muffle-warning))))
(apply #'open-connection ,conn (list ,@args)))
(condition (e) (condition (e)
(cond ((typep ,connection 'fd-connection) (cond ((typep ,connection 'fd-connection)
(error 'fd-connection-error (error 'fd-connection-error
@ -167,36 +115,8 @@
:type (conn-type ,conn) :type (conn-type ,conn)
:host (db-host ,conn) :host (db-host ,conn)
:port (db-port ,conn) :port (db-port ,conn)
:user (db-user ,conn))) :user (db-user ,conn))))))))
(t
(error 'connection-error
:mesg (format nil "~a" e)
:type (conn-type ,conn))))))))
(unwind-protect (unwind-protect
(progn ,@forms) (progn ,@forms)
(close-connection ,var))))) (close-connection ,var)))))
(defmethod check-connection ((fd fd-connection))
"Check that it is possible to connect to db-connection C."
(log-message :log "Attempting to open ~a" fd)
(handler-case
(with-connection (cnx fd)
(log-message :log "Success, opened ~a." fd))
(condition (e)
(log-message :fatal "Failed to connect to ~a: ~a" fd e))))
(defmethod check-connection ((c db-connection))
"Check that it is possible to connect to db-connection C."
(log-message :log "Attempting to connect to ~a" c)
(handler-case
(with-connection (cnx c)
(log-message :log "Success, opened ~a." c)
(let ((sql "SELECT 1;"))
(log-message :log "Running a simple query: ~a" sql)
(handler-case
(query cnx sql)
(condition (e)
(log-message :fatal "SQL failed on ~a: ~a" c e)))))
(condition (e)
(log-message :fatal "Failed to connect to ~a: ~a" c e))))

View File

@ -1,18 +0,0 @@
;;;
;;; Export a getenv feature so that we can fetch http_proxy at build time.
;;;
;;; We can't rely on Quicklisp to have installed a modern ASDF with UIOP yet
;;; here: we need the feature to pass in the :proxy argument to
;;; quicklisp-quickstart:install.
;;;
(in-package :cl-user)
;;
;; ccl provides an implementation of getenv already.
;;
#+sbcl
(defun getenv (name &optional default)
"Return the current value for the environment variable NAME, or default
when unset."
(or (sb-ext:posix-getenv name) default))

View File

@ -11,20 +11,10 @@
(in-package #:cl-user) (in-package #:cl-user)
;; So that we can #+pgloader-image some code away, see main.lisp
(push :pgloader-image *features*)
;;;
;;; We need to support *print-circle* for the debug traces of the catalogs,
;;; and while at it let's enforce *print-pretty* too.
;;;
(setf *print-circle* t *print-pretty* t)
(defun close-foreign-libs () (defun close-foreign-libs ()
"Close Foreign libs in use by pgloader at application save time." "Close Foreign libs in use by pgloader at application save time."
(let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning)) (let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning))
(mapc #'cffi:close-foreign-library '(cl+ssl::libssl (mapc #'cffi:close-foreign-library '(cl+ssl::libssl
cl+ssl::libcrypto
mssql::sybdb)))) mssql::sybdb))))
(defun open-foreign-libs () (defun open-foreign-libs ()
@ -32,25 +22,18 @@
(let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning)) (let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning))
;; we specifically don't load mssql::sybdb eagerly, it's getting loaded ;; we specifically don't load mssql::sybdb eagerly, it's getting loaded
;; in only when the data source is a MS SQL database. ;; in only when the data source is a MS SQL database.
;; (cffi:load-foreign-library 'cl+ssl::libssl)))
;; and for CL+SSL, we need to call the specific reload function that
;; handles some context and things around loading with CFFI.
(cl+ssl:reload)))
#+ccl (push #'open-foreign-libs *lisp-startup-functions*) #+ccl (push #'open-foreign-libs *lisp-startup-functions*)
#+sbcl (push #'open-foreign-libs sb-ext:*init-hooks*) #+sbcl (push #'open-foreign-libs sb-ext:*save-hooks*)
#+ccl (push #'close-foreign-libs *save-exit-functions*) #+ccl (push #'close-foreign-libs *save-exit-functions*)
#+sbcl (push #'close-foreign-libs sb-ext:*save-hooks*) #+sbcl (push #'close-foreign-libs sb-ext:*init-hooks*)
;;; ;;;
;;; Register all loaded systems in the image, so that ASDF don't search for ;;; Register all loaded systems in the image, so that ASDF don't search for
;;; them again when doing --self-upgrade ;;; them again when doing --self-upgrade
;;; ;;;
;;; FIXME: this idea kept failing.
#|
(defun register-preloaded-system (system) (defun register-preloaded-system (system)
(unless (string= "pgloader" (asdf::coerce-name system)) (unless (string= "pgloader" (asdf::coerce-name system))
(let ((version (slot-value system 'asdf::version))) (let ((version (slot-value system 'asdf::version)))
@ -60,12 +43,3 @@
(setf pgloader::*self-upgrade-immutable-systems* (setf pgloader::*self-upgrade-immutable-systems*
(remove "pgloader" (asdf:already-loaded-systems) :test #'string=)) (remove "pgloader" (asdf:already-loaded-systems) :test #'string=))
(defun list-files-to-load-for-system (system-name)
(loop for (o . c) in (asdf/plan:plan-actions
(asdf/plan:make-plan 'asdf/plan:sequential-plan
'asdf:load-source-op
(asdf:find-system system-name)))
when (typep o 'asdf:load-source-op)
append (asdf:input-files o c)))
|#

View File

@ -1,71 +0,0 @@
;;;
;;; Generic API for pgloader data loading and database migrations.
;;;
(in-package :pgloader.load)
(defgeneric copy-from (source &key)
(:documentation
"Load data from SOURCE into its target as defined by the SOURCE object."))
;; That one is more an export than a load. It always export to a single very
;; well defined format, the importing utility is defined in
;; src/pgsql-copy-format.lisp
(defgeneric copy-to (source filename)
(:documentation
"Load data from SOURCE and serialize it into FILENAME, using PostgreSQL
COPY TEXT format."))
;; The next generic function is only to get instanciated for sources
;; actually containing more than a single source item (tables, collections,
;; etc)
(defgeneric copy-database (source
&key
worker-count
concurrency
max-parallel-create-index
truncate
data-only
schema-only
create-tables
include-drop
foreign-keys
create-indexes
reset-sequences
disable-triggers
materialize-views
set-table-oids
including
excluding)
(:documentation
"Auto-discover source schema, convert it to PostgreSQL, migrate the data
from the source definition to PostgreSQL for all the discovered
items (tables, collections, etc), then reset the PostgreSQL sequences
created by SERIAL columns in the first step.
The target tables are automatically discovered, the only-tables
parameter allows to filter them out."))
(defgeneric prepare-pgsql-database (db-copy catalog
&key
truncate
create-tables
create-schemas
drop-indexes
set-table-oids
materialize-views
foreign-keys
include-drop)
(:documentation "Prepare the target PostgreSQL database."))
(defgeneric complete-pgsql-database (db-copy catalog pkeys
&key
foreign-keys
create-indexes
create-triggers
reset-sequences)
(:documentation "Alter load duties for database sources copy support."))

View File

@ -1,156 +0,0 @@
;;;
;;; Generic API for pgloader sources
;;;
(in-package :pgloader.load)
;;;
;;; Common API implementation
;;;
(defmethod queue-raw-data ((copy copy) rawq concurrency)
"Stream data as read by the map-queue method on the COPY argument into QUEUE,
as given."
(log-message :notice "COPY ~a ~@[with ~d rows estimated~] [~a/~a]"
(format-table-name (target copy))
(table-row-count-estimate (target copy))
(lp:kernel-worker-index)
(lp:kernel-worker-count))
(log-message :debug "Reader started for ~a" (format-table-name (target copy)))
(let* ((start-time (get-internal-real-time))
(row-count 0)
(process-row
(if (or (eq :data *log-min-messages*)
(eq :data *client-min-messages*))
;; when debugging, use a lambda with debug traces
(lambda (row)
(log-message :data "< ~s" row)
(lq:push-queue row rawq)
(incf row-count))
;; usual non-debug case
(lambda (row)
(lq:push-queue row rawq)
(incf row-count)))))
;; signal we are starting
(update-stats :data (target copy) :start start-time)
;; call the source-specific method for reading input data
(map-rows copy :process-row-fn process-row)
;; process last batches and send them to queues
;; and mark end of stream
(loop :repeat concurrency :do (lq:push-queue :end-of-data rawq))
(let ((seconds (elapsed-time-since start-time)))
(log-message :debug "Reader for ~a is done in ~6$s"
(format-table-name (target copy)) seconds)
(update-stats :data (target copy) :read row-count :rs seconds)
(list :reader (target copy) seconds))))
(defmethod copy-to ((copy copy) pgsql-copy-filename)
"Extract data from COPY file into a PotgreSQL COPY TEXT formated file"
(with-open-file (text-file pgsql-copy-filename
:direction :output
:if-exists :supersede
:external-format :utf-8)
(let ((row-fn (lambda (row)
(format-vector-row text-file row (transforms copy)))))
(map-rows copy :process-row-fn row-fn))))
(defmethod copy-from ((copy copy)
&key
(kernel nil k-s-p)
(channel nil c-s-p)
(worker-count 8)
(concurrency 2)
(multiple-readers nil)
(on-error-stop *on-error-stop*)
disable-triggers)
"Copy data from COPY source into PostgreSQL."
(let* ((table-name (format-table-name (target copy)))
(lp:*kernel* (or kernel (make-kernel worker-count)))
(channel (or channel (lp:make-channel)))
(readers nil)
(task-count 0))
(flet ((submit-task (channel function &rest args)
(apply #'lp:submit-task channel function args)
(incf task-count)))
(lp:task-handler-bind
(#+pgloader-image
(copy-init-error
#'(lambda (condition)
;; stop the other tasks and then transfer the control
(log-message :log "COPY INIT ERROR")
(lp:invoke-transfer-error condition)))
(on-error-stop
#'(lambda (condition)
(log-message :log "ON ERROR STOP")
(lp:kill-tasks :default)
(lp:invoke-transfer-error condition)))
#+pgloader-image
(error
#'(lambda (condition)
(log-message :error "A thread failed with error: ~a" condition)
(log-message :error "~a"
(trivial-backtrace:print-backtrace condition
:output nil))
(lp::invoke-transfer-error condition))))
;; Check for Read Concurrency Support from our source
(when (and multiple-readers (< 1 concurrency))
(let ((label "Check Concurrency Support"))
(with-stats-collection (label :section :pre)
(setf readers (concurrency-support copy concurrency))
(update-stats :pre label :read 1 :rows (if readers 1 0))
(when readers
(log-message :notice "Multiple Readers Enabled for ~a"
(format-table-name (target copy)))))))
;; when reader is non-nil, we have reader concurrency support!
(if readers
;; here we have detected Concurrency Support: we create as many
;; readers as writers and create associated couples, each couple
;; shares its own queue
(let ((rawqs
(loop :repeat concurrency :collect
(lq:make-queue :fixed-capacity *prefetch-rows*))))
(log-message :info "Read Concurrency Enabled for ~s"
(format-table-name (target copy)))
(loop :for rawq :in rawqs :for reader :in readers :do
;; each reader pretends to be alone, pass 1 as concurrency
(submit-task channel #'queue-raw-data reader rawq 1)
(submit-task channel #'copy-rows-from-queue
copy rawq
:on-error-stop on-error-stop
:disable-triggers disable-triggers)))
;; no Read Concurrency Support detected, start a single reader
;; task, using a single data queue that is read by multiple
;; writers.
(let ((rawq
(lq:make-queue :fixed-capacity *prefetch-rows*)))
(submit-task channel #'queue-raw-data copy rawq concurrency)
;; start a task to transform the raw data in the copy format
;; and send that data down to PostgreSQL
(loop :repeat concurrency :do
(submit-task channel #'copy-rows-from-queue
copy rawq
:on-error-stop on-error-stop
:disable-triggers disable-triggers))))
;; now wait until both the tasks are over, and kill the kernel
(unless c-s-p
(log-message :debug "waiting for ~d tasks" task-count)
(loop :repeat task-count :do (lp:receive-result channel))
(log-message :notice "COPY ~s done." table-name)
(unless k-s-p (lp:end-kernel :wait t)))
;; return task-count, which is how many tasks we submitted to our
;; lparallel kernel.
task-count))))

View File

@ -1,133 +0,0 @@
;;;
;;; Generic API for pgloader sources
;;; Methods for source types with multiple files input
;;;
(in-package :pgloader.load)
(defmethod copy-database ((copy md-copy)
&key
(on-error-stop *on-error-stop*)
truncate
disable-triggers
drop-indexes
max-parallel-create-index
;; generic API, but ignored here
(worker-count 4)
(concurrency 1)
data-only
schema-only
create-tables
include-drop
foreign-keys
create-indexes
reset-sequences
materialize-views
set-table-oids
including
excluding)
"Copy the contents of the COPY formated file to PostgreSQL."
(declare (ignore data-only schema-only
create-tables include-drop foreign-keys
create-indexes reset-sequences materialize-views
set-table-oids including excluding))
(let* ((*on-error-stop* on-error-stop)
(pgconn (target-db copy))
pgsql-catalog)
(handler-case
(with-pgsql-connection (pgconn)
(setf pgsql-catalog
(fetch-pgsql-catalog (db-name pgconn)
:table (target copy)
:variant (pgconn-variant pgconn)
:pgversion (pgconn-major-version pgconn)))
;; if the user didn't tell us the column list of the table, now is
;; a proper time to set it in the copy object
(unless (and (slot-boundp copy 'columns)
(slot-value copy 'columns))
(setf (columns copy)
(mapcar (lambda (col)
;; we need to handle the md-copy format for the
;; column list, which allow for user given
;; options: each column is a list which car is
;; the column name.
(list (column-name col)))
(table-field-list (first (table-list pgsql-catalog))))))
(log-message :data "CATALOG: ~s" pgsql-catalog)
;; this sets (table-index-list (target copy))
(maybe-drop-indexes pgsql-catalog :drop-indexes drop-indexes)
;; now is the proper time to truncate, before parallel operations
(when truncate
(truncate-tables pgsql-catalog)))
(cl-postgres:database-error (e)
(log-message :fatal "Failed to prepare target PostgreSQL table.")
(log-message :fatal "~a" e)
(return-from copy-database)))
;; Keep the PostgreSQL table target around in the copy instance,
;; with the following subtleties to deal with:
;; 1. the catalog fetching did fill-in PostgreSQL columns as fields
;; 2. we might target fewer pg columns than the table actually has
(let ((table (first (table-list pgsql-catalog))))
(setf (table-column-list table)
(loop :for column-name :in (mapcar #'first (columns copy))
:collect (find column-name (table-field-list table)
:key #'column-name
:test #'string=)))
(setf (target copy) table))
;; expand the specs of our source, we might have to care about several
;; files actually.
(let* ((lp:*kernel* (make-kernel worker-count))
(channel (lp:make-channel))
(path-list (expand-spec (source copy)))
(task-count 0))
(with-stats-collection ("Files Processed" :section :post
:use-result-as-read t
:use-result-as-rows t)
(loop :for path-spec :in path-list
:count t
:do (let ((table-source (clone-copy-for copy path-spec)))
(when (and (header table-source) (null (fields table-source)))
(parse-header table-source))
(incf task-count
(copy-from table-source
:concurrency concurrency
:kernel lp:*kernel*
:channel channel
:on-error-stop on-error-stop
:disable-triggers disable-triggers)))))
;; end kernel
(with-stats-collection ("COPY Threads Completion" :section :post
:use-result-as-read t
:use-result-as-rows t)
(loop :repeat task-count
:do (handler-case
(destructuring-bind (task table seconds)
(lp:receive-result channel)
(log-message :debug
"Finished processing ~a for ~s ~50T~6$s"
task (format-table-name table) seconds))
(condition (e)
(log-message :fatal "~a" e)))
:finally (progn
(lp:end-kernel :wait nil)
(return task-count))))
(lp:end-kernel :wait t))
;; re-create the indexes from the target table entry
(create-indexes-again (target-db copy)
pgsql-catalog
:max-parallel-create-index max-parallel-create-index
:drop-indexes drop-indexes)))

View File

@ -1,548 +0,0 @@
;;;
;;; Generic API for pgloader sources
;;; Methods for database source types (with introspection)
;;;
(in-package :pgloader.load)
;;;
;;; Prepare the PostgreSQL database before streaming the data into it.
;;;
(defmethod prepare-pgsql-database ((copy db-copy)
(catalog catalog)
&key
truncate
create-tables
create-schemas
drop-schema
drop-indexes
set-table-oids
materialize-views
foreign-keys
include-drop)
"Prepare the target PostgreSQL database: create tables casting datatypes
from the MySQL definitions, prepare index definitions and create target
tables for materialized views.
That function mutates index definitions in ALL-INDEXES."
(log-message :notice "Prepare PostgreSQL database.")
(with-pgsql-transaction (:pgconn (target-db copy))
(finalize-catalogs catalog (pgconn-variant (target-db copy)))
(if create-tables
(progn
(when create-schemas
(with-stats-collection ("Create Schemas" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(create-schemas catalog
:include-drop drop-schema
:client-min-messages :error)))
;; create new SQL types (ENUMs, SETs) if needed and before we
;; get to the table definitions that will use them
(with-stats-collection ("Create SQL Types" :section :pre
:use-result-as-read t
:use-result-as-rows t)
;; some SQL types come from extensions (ip4r, hstore, etc)
(create-extensions catalog
:include-drop include-drop
:if-not-exists t
:client-min-messages :error)
(create-sqltypes catalog
:include-drop include-drop
:client-min-messages :error))
;; now the tables
(with-stats-collection ("Create tables" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(create-tables catalog
:include-drop include-drop
:client-min-messages :error)))
(progn
;; if we're not going to create the tables, now is the time to
;; remove the constraints: indexes, primary keys, foreign keys
;;
;; to be able to do that properly, get the constraints from
;; the pre-existing target database catalog
(let* ((pgversion (pgconn-major-version (target-db copy)))
(pgsql-catalog
(fetch-pgsql-catalog (db-name (target-db copy))
:source-catalog catalog
:pgversion pgversion)))
(merge-catalogs catalog pgsql-catalog))
;; now the foreign keys and only then the indexes, because a
;; drop constraint on a primary key cascades to the drop of
;; any foreign key that targets the primary key
(when foreign-keys
(with-stats-collection ("Drop Foreign Keys" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(drop-pgsql-fkeys catalog :log-level :notice)))
(when drop-indexes
(with-stats-collection ("Drop Indexes" :section :pre
:use-result-as-read t
:use-result-as-rows t)
;; we want to error out early in case we can't DROP the
;; index, don't CASCADE
(drop-indexes catalog :cascade nil :log-level :notice)))
(when truncate
(with-stats-collection ("Truncate" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(truncate-tables catalog)))))
;; Some database sources allow the same index name being used
;; against several tables, so we add the PostgreSQL table OID in the
;; index name, to differenciate. Set the table oids now.
(when (and create-tables set-table-oids)
(with-stats-collection ("Set Table OIDs" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(set-table-oids catalog :variant (pgconn-variant (target-db copy)))))
;; We might have to MATERIALIZE VIEWS
(when (and create-tables materialize-views)
(with-stats-collection ("Create MatViews Tables" :section :pre
:use-result-as-read t
:use-result-as-rows t)
(create-views catalog
:include-drop include-drop
:client-min-messages :error))))
;; Citus Support
;;
;; We need a separate transaction here in some cases, because of the
;; distributed DDL support from Citus, to avoid the following error:
;;
;; ERROR Database error 25001: cannot establish a new connection for
;; placement 2299, since DDL has been executed on a connection that is in
;; use
;;
(when (catalog-distribution-rules catalog)
(with-pgsql-transaction (:pgconn (target-db copy))
(with-stats-collection ("Citus Distribute Tables" :section :pre)
(create-distributed-table (catalog-distribution-rules catalog)))))
;; log the catalog we just fetched and (maybe) merged
(log-message :data "CATALOG: ~s" catalog))
(defmethod complete-pgsql-database ((copy db-copy)
(catalog catalog)
pkeys
&key
foreign-keys
create-indexes
create-triggers
reset-sequences)
"After loading the data into PostgreSQL, we can now reset the sequences
and declare foreign keys."
;;
;; Now Reset Sequences, the good time to do that is once the whole data
;; has been imported and once we have the indexes in place, as max() is
;; able to benefit from the indexes. In particular avoid doing that step
;; while CREATE INDEX statements are in flight (avoid locking).
;;
(log-message :notice "Completing PostgreSQL database.")
(when reset-sequences
(reset-sequences (clone-connection (target-db copy)) catalog))
(handler-case
(with-pgsql-transaction (:pgconn (clone-connection (target-db copy)))
;;
;; Turn UNIQUE indexes into PRIMARY KEYS now
;;
(when create-indexes
(pgsql-execute-with-timing :post "Primary Keys" pkeys
:log-level :notice))
;;
;; Foreign Key Constraints
;;
;; We need to have finished loading both the reference and the
;; refering tables to be able to build the foreign keys, so wait
;; until all tables and indexes are imported before doing that.
;;
(when foreign-keys
(create-pgsql-fkeys catalog
:section :post
:label "Create Foreign Keys"
:log-level :notice))
;;
;; Triggers and stored procedures -- includes special default values
;;
(when create-triggers
(create-triggers catalog
:section :post
:label "Create Triggers"))
;;
;; Add schemas that needs to be in the search_path to the database
;; search_path, when using PostgreSQL. Redshift doesn't know how to
;; do that, unfortunately.
;;
(unless (eq :redshift (pgconn-variant (target-db copy)))
(add-to-search-path catalog
:section :post
:label "Set Search Path"))
;;
;; And now, comments on tables and columns.
;;
(comment-on-tables-and-columns catalog
:section :post
:label "Install Comments"))
(postgresql-unavailable (condition)
(log-message :error "~a" condition)
(log-message :error
"Complete PostgreSQL database reconnecting to PostgreSQL.")
;; in order to avoid Socket error in "connect": ECONNREFUSED if we
;; try just too soon, wait a little
(sleep 2)
;;
;; Reset Sequence can be done several times safely, and the rest of the
;; operations run in a single transaction, so if the connection was lost,
;; nothing has been done. Retry.
;;
(complete-pgsql-database copy
catalog
pkeys
:foreign-keys foreign-keys
:create-indexes create-indexes
:create-triggers create-triggers
:reset-sequences reset-sequences))))
(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
rewriting, pgloader level alter schema and alter table commands."
(log-message :info "Processing source catalogs")
;; cast the catalog into something PostgreSQL can work on
(cast catalog)
;; support code for index filters (where clauses)
(process-index-definitions catalog :sql-dialect (class-name (class-of copy)))
;; we may have to alter schemas
(when alter-schema
(alter-schema catalog alter-schema))
;; if asked, now alter the catalog with given rules: the alter-table
;; keyword parameter actually contains a set of alter table rules.
(when alter-table
(alter-table catalog alter-table))
;; we also support schema changes necessary for Citus distribution
(when distribute
(log-message :info "Applying distribution rules")
(setf (catalog-distribution-rules catalog)
(citus-distribute-schema catalog distribute))))
(defun optimize-table-copy-ordering (catalog)
"Return a list of tables to copy over in optimized order"
(let ((table-list (copy-list (table-list catalog)))
(view-list (copy-list (view-list catalog))))
;; when materialized views are not supported, view-list is empty here
(cond
((notevery #'zerop (mapcar #'table-row-count-estimate table-list))
(let ((sorted-table-list
(sort table-list #'> :key #'table-row-count-estimate)))
(log-message :notice
"Processing tables in this order: ~{~a: ~d rows~^, ~}"
(loop :for table :in (append table-list view-list)
:collect (format-table-name table)
:collect (table-row-count-estimate table)))
(nconc sorted-table-list view-list)))
(t
(nconc table-list view-list)))))
;;;
;;; Generic enough implementation of the copy-database method.
;;;
(defmethod copy-database ((copy db-copy)
&key
(on-error-stop *on-error-stop*)
(worker-count 4)
(concurrency 1)
(multiple-readers nil)
max-parallel-create-index
(truncate nil)
(disable-triggers nil)
(data-only nil)
(schema-only nil)
(create-schemas t)
(create-tables t)
(include-drop t)
(drop-schema nil)
(create-indexes t)
(index-names :uniquify)
(reset-sequences t)
(foreign-keys t)
(reindex nil)
(after-schema nil)
distribute
including
excluding
set-table-oids
alter-table
alter-schema
materialize-views)
"Export database source data and Import it into PostgreSQL"
(log-message :log "Migrating from ~a" (source-db copy))
(log-message :log "Migrating into ~a" (target-db copy))
(let* ((*on-error-stop* on-error-stop)
(copy-data (or data-only (not schema-only)))
(create-ddl (or schema-only (not data-only)))
(create-tables (and create-tables create-ddl))
(create-schemas (and create-schemas create-ddl))
;; foreign keys has a special meaning in data-only mode
(foreign-keys (if (eq :redshift (pgconn-variant (target-db copy)))
nil
foreign-keys))
(drop-indexes (if (eq :redshift (pgconn-variant (target-db copy)))
nil
(or reindex
(and include-drop create-ddl))))
(create-indexes (if (eq :redshift (pgconn-variant (target-db copy)))
nil
(or reindex
(and create-indexes drop-indexes create-ddl))))
(reset-sequences (if (eq :redshift (pgconn-variant (target-db copy)))
nil
reset-sequences))
(*preserve-index-names*
(or (eq :preserve index-names)
;; if we didn't create the tables, we are re-installing the
;; pre-existing indexes
(not create-tables)))
(copy-kernel (make-kernel worker-count))
(copy-channel (let ((lp:*kernel* copy-kernel)) (lp:make-channel)))
(catalog (handler-case
(fetch-metadata
copy
(make-catalog
:name (typecase (source-db copy)
(db-connection
(db-name (source-db copy)))
(fd-connection
(pathname-name
(fd-path (source-db copy))))))
:materialize-views materialize-views
:create-indexes create-indexes
:foreign-keys foreign-keys
:including including
:excluding excluding)
(mssql::mssql-error (e)
(log-message :error "MSSQL ERROR: ~a" e)
(log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
(return-from copy-database))
#+pgloader-image
(condition (e)
(log-message :error
"~a: ~a"
(conn-type (source-db copy))
e)
(return-from copy-database))))
pkeys
(writers-count (make-hash-table :size (count-tables catalog)))
(max-indexes (when create-indexes
(max-indexes-per-table catalog)))
(idx-kernel (when (and max-indexes (< 0 max-indexes))
(make-kernel (or max-parallel-create-index
max-indexes))))
(idx-channel (when idx-kernel
(let ((lp:*kernel* idx-kernel))
(lp:make-channel))))
(task-count 0))
;; apply catalog level transformations to support the database migration
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
(handler-case
(process-catalog copy catalog
:alter-table alter-table
:alter-schema alter-schema
:distribute distribute)
#+pgloader-image
((or citus-rule-table-not-found citus-rule-is-missing-from-list) (e)
(log-message :fatal "~a" e)
(return-from copy-database))
#+pgloader-image
(condition (e)
(log-message :fatal "Failed to process catalogs: ~a" e)
(return-from copy-database)))
;; if asked, first drop/create the tables on the PostgreSQL side
(handler-case
(progn
(prepare-pgsql-database copy
catalog
:truncate truncate
:create-tables create-tables
:create-schemas create-schemas
:drop-indexes drop-indexes
:drop-schema drop-schema
:include-drop include-drop
:foreign-keys foreign-keys
:set-table-oids set-table-oids
:materialize-views materialize-views)
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
;; to run it.
(when after-schema
(pgloader.parser::execute-sql-code-block (target-db copy)
:pre
after-schema
"after schema")))
;;
;; In case some error happens in the preparatory transaction, we
;; need to stop now and refrain from trying to load the data into
;; an incomplete schema.
;;
(cl-postgres:database-error (e)
(declare (ignore e)) ; a log has already been printed
(log-message :fatal "Failed to create the schema, see above.")
;; we might have some cleanup to do...
(cleanup copy catalog :materialize-views materialize-views)
(return-from copy-database)))
(loop
:for table :in (optimize-table-copy-ordering catalog)
:do (let ((table-source (instanciate-table-copy-object copy table)))
;; first COPY the data from source to PostgreSQL, using copy-kernel
(if (not copy-data)
;; start indexing straight away then
(when create-indexes
(alexandria:appendf
pkeys
(create-indexes-in-kernel (target-db copy)
table
idx-kernel
idx-channel)))
;; prepare the writers-count hash-table, as we start
;; copy-from, we have concurrency tasks writing.
(progn ; when copy-data
(setf (gethash table writers-count) concurrency)
(incf task-count
(copy-from table-source
:concurrency concurrency
:multiple-readers multiple-readers
:kernel copy-kernel
:channel copy-channel
:on-error-stop on-error-stop
:disable-triggers disable-triggers))))))
;; now end the kernels
;; and each time a table is done, launch its indexing
(when copy-data
(let ((lp:*kernel* copy-kernel))
(with-stats-collection ("COPY Threads Completion" :section :post
:use-result-as-read t
:use-result-as-rows t)
(loop :repeat task-count
:do (destructuring-bind (task table seconds)
(lp:receive-result copy-channel)
(log-message :debug
"Finished processing ~a for ~s ~50T~6$s"
task (format-table-name table) seconds)
(when (eq :writer task)
;;
;; Start the CREATE INDEX parallel tasks only when
;; the data has been fully copied over to the
;; corresponding table, that's when the writers
;; count is down to zero.
;;
(decf (gethash table writers-count))
(log-message :debug "writers-counts[~a] = ~a"
(format-table-name table)
(gethash table writers-count))
(when (and create-indexes
(zerop (gethash table writers-count)))
(let* ((stats pgloader.monitor::*sections*)
(section (get-state-section stats :data))
(table-stats (pgstate-get-label section table))
(pprint-secs
(pgloader.state::format-interval seconds nil)))
;; in CCL we have access to the *sections* dynamic
;; binding from another thread, in SBCL we access
;; an empty copy.
(log-message :notice
"DONE copying ~a in ~a~@[ for ~d rows~]"
(format-table-name table)
pprint-secs
(when table-stats
(pgtable-rows table-stats))))
(alexandria:appendf
pkeys
(create-indexes-in-kernel (target-db copy)
table
idx-kernel
idx-channel)))))
:finally (progn
(lp:end-kernel :wait nil)
(return worker-count))))))
(log-message :info "Done with COPYing data, waiting for indexes")
(when create-indexes
(let ((lp:*kernel* idx-kernel))
;; wait until the indexes are done being built...
;; don't forget accounting for that waiting time.
(with-stats-collection ("Index Build Completion" :section :post
:use-result-as-read t
:use-result-as-rows t)
(loop :for count :below (count-indexes catalog)
:do (lp:receive-result idx-channel))
(lp:end-kernel :wait t)
(log-message :info "Done waiting for indexes")
(count-indexes catalog))))
;;
;; Complete the PostgreSQL database before handing over.
;;
(complete-pgsql-database copy
catalog
pkeys
:foreign-keys foreign-keys
:create-indexes create-indexes
;; only create triggers (for default values)
;; when we've been responsible for creating the
;; tables -- otherwise assume the schema is
;; good as it is
:create-triggers create-tables
:reset-sequences reset-sequences)
;;
;; Time to cleanup!
;;
(cleanup copy catalog :materialize-views materialize-views)))

View File

@ -1,8 +1,5 @@
(in-package #:pgloader) (in-package #:pgloader)
;;;
;;; Now some tooling
;;;
(defun log-threshold (min-message &key quiet verbose debug) (defun log-threshold (min-message &key quiet verbose debug)
"Return the internal value to use given the script parameters." "Return the internal value to use given the script parameters."
(cond ((and debug verbose) :data) (cond ((and debug verbose) :data)
@ -45,18 +42,6 @@
(("load-lisp-file" #\l) :type string :list t :optional t (("load-lisp-file" #\l) :type string :list t :optional t
:documentation "Read user code from files") :documentation "Read user code from files")
("dry-run" :type boolean
:documentation "Only check database connections, don't load anything.")
("on-error-stop" :type boolean
:documentation "Refrain from handling errors properly.")
("no-ssl-cert-verification"
:type boolean
:documentation "Instruct OpenSSL to bypass verifying certificates.")
(("context" #\C) :type string :documentation "Command Context Variables")
(("with") :type string :list t :optional t (("with") :type string :list t :optional t
:documentation "Load options") :documentation "Load options")
@ -82,17 +67,14 @@
:documentation "SQL script to run after loading the data") :documentation "SQL script to run after loading the data")
("self-upgrade" :type string :optional t ("self-upgrade" :type string :optional t
:documentation "Path to pgloader newer sources") :documentation "Path to pgloader newer sources")))
("regress" :type boolean :optional t (defun print-backtrace (condition debug stream)
:documentation "Drive regression testing")))
(defun print-backtrace (condition debug)
"Depending on DEBUG, print out the full backtrace or just a shorter "Depending on DEBUG, print out the full backtrace or just a shorter
message on STREAM for given CONDITION." message on STREAM for given CONDITION."
(if debug (if debug
(trivial-backtrace:print-backtrace condition :output nil) (trivial-backtrace:print-backtrace condition :output stream :verbose t)
(trivial-backtrace:print-condition condition nil))) (trivial-backtrace:print-condition condition stream)))
(defun mkdir-or-die (path debug &optional (stream *standard-output*)) (defun mkdir-or-die (path debug &optional (stream *standard-output*))
"Create a directory at given PATH and exit with an error message when "Create a directory at given PATH and exit with an error message when
@ -105,7 +87,7 @@
(condition (e) (condition (e)
;; any error here is a panic ;; any error here is a panic
(if debug (if debug
(format stream "PANIC: ~a~%" (print-backtrace e debug)) (print-backtrace e debug stream)
(format stream "PANIC: ~a.~%" e)) (format stream "PANIC: ~a.~%" e))
(uiop:quit)))) (uiop:quit))))
@ -128,7 +110,7 @@
(format t "~&~a [ option ... ] command-file ..." (first argv)) (format t "~&~a [ option ... ] command-file ..." (first argv))
(format t "~&~a [ option ... ] SOURCE TARGET" (first argv)) (format t "~&~a [ option ... ] SOURCE TARGET" (first argv))
(command-line-arguments:show-option-help *opt-spec*) (command-line-arguments:show-option-help *opt-spec*)
(when quit (uiop:quit +os-code-error-usage+))) (when quit (uiop:quit)))
(defvar *self-upgraded-already* nil (defvar *self-upgraded-already* nil
"Keep track if we did reload our own source code already.") "Keep track if we did reload our own source code already.")
@ -139,7 +121,7 @@
(uiop:parse-unix-namestring namestring)))) (uiop:parse-unix-namestring namestring))))
(unless pgloader-pathname (unless pgloader-pathname
(format t "No such directory: ~s~%" namestring) (format t "No such directory: ~s~%" namestring)
(uiop:quit +os-code-error+)) (uiop:quit))
;; now the real thing ;; now the real thing
(handler-case (handler-case
@ -171,7 +153,7 @@
(defvar *--load-list-file-extension-whitelist* '("lisp" "lsp" "cl" "asd") (defvar *--load-list-file-extension-whitelist* '("lisp" "lsp" "cl" "asd")
"White list of file extensions allowed with the --load option.") "White list of file extensions allowed with the --load option.")
(defun load-extra-transformation-functions (filename &optional verbose) (defun load-extra-transformation-functions (filename)
"Load an extra filename to tweak pgloader's behavior." "Load an extra filename to tweak pgloader's behavior."
(let ((pathname (uiop:parse-native-namestring filename))) (let ((pathname (uiop:parse-native-namestring filename)))
(unless (member (pathname-type pathname) (unless (member (pathname-type pathname)
@ -179,8 +161,7 @@
:test #'string=) :test #'string=)
(error "Unknown lisp file extension: ~s" (pathname-type pathname))) (error "Unknown lisp file extension: ~s" (pathname-type pathname)))
(format t "Loading code from ~s~%" pathname) (load (compile-file pathname :verbose nil :print nil))))
(load (compile-file pathname :verbose verbose :print verbose))))
(defun main (argv) (defun main (argv)
"Entry point when building an executable image with buildapp" "Entry point when building an executable image with buildapp"
@ -196,13 +177,10 @@
(destructuring-bind (&key help version quiet verbose debug logfile (destructuring-bind (&key help version quiet verbose debug logfile
list-encodings upgrade-config list-encodings upgrade-config
dry-run on-error-stop context
((:load-lisp-file load)) ((:load-lisp-file load))
client-min-messages log-min-messages summary client-min-messages log-min-messages summary
root-dir self-upgrade root-dir self-upgrade
with set field cast type encoding before after with set field cast type encoding before after)
no-ssl-cert-verification
regress)
options options
;; parse the log thresholds ;; parse the log thresholds
@ -225,11 +203,6 @@
(let ((*self-upgraded-already* t)) (let ((*self-upgraded-already* t))
(main argv)))) (main argv))))
;; --list-encodings, -E
(when list-encodings
(show-encodings)
(uiop:quit +os-code-success+))
;; First care about the root directory where pgloader is supposed to ;; First care about the root directory where pgloader is supposed to
;; output its data logs and reject files ;; output its data logs and reject files
(let ((root-dir-truename (or (probe-file root-dir) (let ((root-dir-truename (or (probe-file root-dir)
@ -239,20 +212,8 @@
;; Set parameters that come from the environement ;; Set parameters that come from the environement
(init-params-from-environment) (init-params-from-environment)
;; Read the context file (if given) and the environment
(handler-case
(initialize-context context)
(condition (e)
(format t "Couldn't read ini file ~s: ~a~%" context e)
(usage argv)))
;; Then process options ;; Then process options
(when debug (when debug
(format t "pgloader version ~a~%" *version-string*)
#+pgloader-image
(format t "compiled with ~a ~a~%"
(lisp-implementation-type)
(lisp-implementation-version))
#+sbcl #+sbcl
(format t "sb-impl::*default-external-format* ~s~%" (format t "sb-impl::*default-external-format* ~s~%"
sb-impl::*default-external-format*) sb-impl::*default-external-format*)
@ -264,14 +225,14 @@
(lisp-implementation-type) (lisp-implementation-type)
(lisp-implementation-version))) (lisp-implementation-version)))
(when (or help) (when help
(usage argv)) (usage argv))
(when (or help version) (uiop:quit +os-code-success+)) (when (or help version) (uiop:quit))
(when (null arguments) (when list-encodings
(usage argv) (show-encodings)
(uiop:quit +os-code-error-usage+)) (uiop:quit))
(when upgrade-config (when upgrade-config
(loop for filename in arguments (loop for filename in arguments
@ -281,25 +242,20 @@
(pgloader.ini:convert-ini-into-commands filename)) (pgloader.ini:convert-ini-into-commands filename))
(condition (c) (condition (c)
(when debug (invoke-debugger c)) (when debug (invoke-debugger c))
(uiop:quit +os-code-error+)))) (uiop:quit 1)))
(uiop:quit +os-code-success+)) (format t "~%~%"))
(uiop:quit))
;; Should we run in dry-run mode? (when load
(setf *dry-run* dry-run) (loop for filename in load do
(handler-case
;; Should we stop at first error? (load-extra-transformation-functions filename)
(setf *on-error-stop* on-error-stop) (condition (e)
(format *standard-output*
;; load extra lisp code provided for by the user "Failed to load lisp source file ~s~%"
(when load filename)
(loop :for filename :in load :do (format *standard-output* "~a~%" e)
(handler-case (uiop:quit 3)))))
(load-extra-transformation-functions filename debug)
((or simple-condition serious-condition) (e)
(format *error-output*
"Failed to load lisp source file ~s~%" filename)
(format *error-output* "~a~%~%" e)
(uiop:quit +os-code-error+)))))
;; Now process the arguments ;; Now process the arguments
(when arguments (when arguments
@ -307,89 +263,237 @@
(let* ((*log-filename* (log-file-name logfile)) (let* ((*log-filename* (log-file-name logfile))
(*summary-pathname* (parse-summary-filename summary debug))) (*summary-pathname* (parse-summary-filename summary debug)))
(handler-case (with-monitor ()
;; The handler-case is to catch unhandled exceptions at the ;; tell the user where to look for interesting things
;; top level. (log-message :log "Main logs in '~a'" (probe-file *log-filename*))
;; (log-message :log "Data errors in '~a'~%" *root-dir*)
;; The handler-bind below is to be able to offer a
;; meaningful backtrace to the user in case of unexpected
;; conditions being signaled.
(handler-bind
(((and serious-condition (not (or monitor-error
cli-parsing-error
source-definition-error
regression-test-error)))
#'(lambda (condition)
(format *error-output* "KABOOM!~%")
(format *error-output* "~a: ~a~%~a~%~%"
(class-name (class-of condition))
condition
(print-backtrace condition debug)))))
(with-monitor () (handler-case
;; tell the user where to look for interesting things ;; The handler-case is to catch unhandled exceptions at the
(log-message :log "Main logs in '~a'" ;; top level.
(uiop:native-namestring *log-filename*)) ;;
(log-message :log "Data errors in '~a'~%" *root-dir*) ;; The handler-bind is to be able to offer a meaningful
;; backtrace to the user in case of unexpected conditions
;; being signaled.
(handler-bind
((condition
#'(lambda (condition)
(log-message :fatal "We have a situation here.")
(print-backtrace condition debug *standard-output*))))
(when no-ssl-cert-verification ;; if there are exactly two arguments in the command
(setf cl+ssl:*make-ssl-client-stream-verify-default* nil)) ;; line, try and process them as source and target
;; arguments
(if (= 2 (length arguments))
(let* ((type (parse-cli-type type))
(source (first arguments))
(source (if type
(parse-source-string-for-type type source)
(parse-source-string source)))
(type (parse-cli-type (conn-type source)))
(target (parse-target-string (second arguments))))
(cond ;; some verbosity about the parsing "magic"
((and regress (= 1 (length arguments))) (log-message :info "SOURCE: ~s" source)
(process-regression-test (first arguments))) (log-message :info "TARGET: ~s" target)
(regress (cond ((and (null source) (null target)
(log-message :fatal "Regression testing requires a single .load file as input.")) (probe-file
(uiop:parse-unix-namestring
(first arguments)))
(probe-file
(uiop:parse-unix-namestring
(second arguments))))
(mapcar #'process-command-file arguments))
((= 2 (length arguments)) ((null source)
;; if there are exactly two arguments in the command (log-message :fatal
;; line, try and process them as source and target "Failed to parse ~s as a source URI."
;; arguments (first arguments))
(process-source-and-target (first arguments) (log-message :log "You might need to use --type."))
(second arguments)
type encoding
set with field cast
before after))
(t
;; process the files
;; other options are not going to be used here
(let ((cli-options `(("--type" ,type)
("--encoding" ,encoding)
("--set" ,set)
("--with" ,with)
("--field" ,field)
("--cast" ,cast)
("--before" ,before)
("--after" ,after))))
(loop :for (cli-option-name cli-option-value)
:in cli-options
:when cli-option-value
:do (log-message
:fatal
"Option ~s is ignored when using a load file"
cli-option-name))
;; when we issued a single error previously, do nothing ((null target)
(unless (remove-if #'null (mapcar #'second cli-options)) (log-message :fatal
(process-command-file arguments))))))) "Failed to parse ~s as a PostgreSQL database URI."
(second arguments))))
((or cli-parsing-error source-definition-error) (c) ;; so, we actually have all the specs for the
(format *error-output* "~%~a~%~%" c) ;; job on the command line now.
(uiop:quit +os-code-error-bad-source+)) (when (and source target)
(load-data :from source
:into target
:encoding (parse-cli-encoding encoding)
:options (parse-cli-options type with)
:gucs (parse-cli-gucs set)
:fields (parse-cli-fields type field)
:casts (parse-cli-casts cast)
:before (parse-sql-file before)
:after (parse-sql-file after)
:start-logger nil)))
(regression-test-error (c) ;; process the files
(format *error-output* "~%~a~%~%" c) (mapcar #'process-command-file arguments)))
(uiop:quit +os-code-error-regress+))
(monitor-error (c) (source-definition-error (c)
(format *error-output* "~a~%" c) (log-message :fatal "~a" c)
(uiop:quit +os-code-error+)) (uiop:quit 2))
(serious-condition (c) (condition (c)
(format *error-output* "~%What I am doing here?~%~%") (when debug (invoke-debugger c))
(format *error-output* "~a~%~%" c) (uiop:quit 1))))))
(uiop:quit +os-code-error+)))))
;; done. ;; done.
(uiop:quit +os-code-success+))))) (uiop:quit)))))
(defun process-command-file (filename)
"Process FILENAME as a pgloader command file (.load)."
(let ((truename (probe-file filename)))
(if truename
(run-commands truename :start-logger nil)
(log-message :error "Can not find file: ~s" filename)))
(format t "~&"))
(defun run-commands (source
&key
(start-logger t)
((:summary *summary-pathname*) *summary-pathname*)
((:log-filename *log-filename*) *log-filename*)
((:log-min-messages *log-min-messages*) *log-min-messages*)
((:client-min-messages *client-min-messages*) *client-min-messages*))
"SOURCE can be a function, which is run, a list, which is compiled as CL
code then run, a pathname containing one or more commands that are parsed
then run, or a commands string that is then parsed and each command run."
(with-monitor (:start-logger start-logger)
(let* ((funcs
(typecase source
(function (list source))
(list (list (compile nil source)))
(pathname (mapcar (lambda (expr) (compile nil expr))
(parse-commands-from-file source)))
(t (mapcar (lambda (expr) (compile nil expr))
(if (probe-file source)
(parse-commands-from-file source)
(parse-commands source)))))))
;; maybe duplicate the summary to a file
(let* ((summary-stream (when *summary-pathname*
(open *summary-pathname*
:direction :output
:if-exists :rename
:if-does-not-exist :create)))
(*report-stream* (or summary-stream *standard-output*)))
(unwind-protect
;; run the commands
(loop for func in funcs do (funcall func))
;; cleanup
(when summary-stream (close summary-stream)))))))
;;;
;;; Main API to use from outside of pgloader.
;;;
(define-condition source-definition-error (error)
((mesg :initarg :mesg :reader source-definition-error-mesg))
(:report (lambda (err stream)
(format stream "~a" (source-definition-error-mesg err)))))
(defun load-data (&key ((:from source)) ((:into target))
encoding fields options gucs casts before after
(start-logger t))
"Load data from SOURCE into TARGET."
(declare (type connection source)
(type pgsql-connection target))
;; some preliminary checks
(when (and (typep source 'csv-connection)
(not (typep source 'copy-connection))
(null fields))
(error 'source-definition-error
:mesg "This data source requires fields definitions."))
(when (and (typep source 'csv-connection) (null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "This data source require a table name target."))
(when (and (typep source 'fixed-connection) (null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "Fixed-width data source require a table name target."))
(with-monitor (:start-logger start-logger)
(when (and casts (not (member (type-of source)
'(sqlite-connection
mysql-connection
mssql-connection))))
(log-message :log "Cast rules are ignored for this sources."))
;; now generates the code for the command
(log-message :debug "LOAD DATA FROM ~s" source)
(run-commands
(process-relative-pathnames
(uiop:getcwd)
(typecase source
(copy-connection
(lisp-code-for-loading-from-copy source fields target
:encoding (or encoding :default)
:gucs gucs
:copy-options options
:before before
:after after))
(fixed-connection
(lisp-code-for-loading-from-fixed source fields target
:encoding encoding
:gucs gucs
:fixed-options options
:before before
:after after))
(csv-connection
(lisp-code-for-loading-from-csv source fields target
:encoding encoding
:gucs gucs
:csv-options options
:before before
:after after))
(dbf-connection
(lisp-code-for-loading-from-dbf source target
:gucs gucs
:dbf-options options
:before before
:after after))
(ixf-connection
(lisp-code-for-loading-from-ixf source target
:gucs gucs
:ixf-options options
:before before
:after after))
(sqlite-connection
(lisp-code-for-loading-from-sqlite source target
:gucs gucs
:casts casts
:sqlite-options options))
(mysql-connection
(lisp-code-for-loading-from-mysql source target
:gucs gucs
:casts casts
:mysql-options options
:before before
:after after))
(mssql-connection
(lisp-code-for-loading-from-mssql source target
:gucs gucs
:casts casts
:mssql-options options
:before before
:after after))))
:start-logger start-logger)))

View File

@ -73,63 +73,31 @@
(:syb-xml 163) (:syb-xml 163)
) )
(defun unsigned-to-signed (byte n)
(declare (type fixnum n) (type unsigned-byte byte))
(logior byte (- (mask-field (byte 1 (1- (* n 8))) byte))))
(defun sysdb-data-to-lisp (%dbproc data type len) (defun sysdb-data-to-lisp (%dbproc data type len)
(let ((syb-type (foreign-enum-keyword '%syb-value-type type))) (if (> len 0)
(case syb-type (case (foreign-enum-keyword '%syb-value-type type)
;; we accept emtpy string (len is 0) ((:syb-varchar :syb-text) (foreign-string-to-lisp data :count len))
((:syb-char :syb-varchar :syb-text :syb-msxml) (:syb-char (string-trim #(#\Space) (foreign-string-to-lisp data :count len)))
(foreign-string-to-lisp data :count len)) ((:syb-bit :syb-bitn) (mem-ref data :int))
((:syb-int1 :syb-int2 :syb-int4) (mem-ref data :int))
(otherwise (:syb-int8 (mem-ref data :int8))
;; other types must have a non-zero len now, or we just return nil. (:syb-flt8 (mem-ref data :double))
(if (> len 0) (:syb-datetime
(case syb-type (with-foreign-pointer (%buf +numeric-buf-sz+)
((:syb-bit :syb-bitn) (mem-ref data :int)) (foreign-string-to-lisp %buf
(:syb-int1 (unsigned-to-signed (mem-ref data :unsigned-int) 1)) :count (%dbconvert %dbproc type data -1 :syb-char %buf +numeric-buf-sz+))))
(:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2)) ((:syb-money :syb-money4 :syb-decimal :syb-numeric)
(:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4)) (with-foreign-pointer (%buf +numeric-buf-sz+)
(:syb-int8 (mem-ref data :int8)) (parse-number:parse-number
(:syb-real (mem-ref data :float)) (foreign-string-to-lisp %buf
(:syb-flt8 (mem-ref data :double)) :count (%dbconvert %dbproc type data -1 :syb-char %buf +numeric-buf-sz+)))))
((:syb-datetime ((:syb-image :syb-binary :syb-varbinary :syb-blob)
:syb-datetime4 (let ((vector (make-array len :element-type '(unsigned-byte 8))))
:syb-msdate (dotimes (i len)
:syb-mstime (setf (aref vector i) (mem-ref data :uchar i)))
:syb-msdatetime2 vector))
:syb-msdatetimeoffset) (otherwise (error "not supported type ~A"
(with-foreign-pointer (%buf +numeric-buf-sz+) (foreign-enum-keyword '%syb-value-type type))))))
(let ((count
(%dbconvert %dbproc
type
data
-1
:syb-char
%buf
+numeric-buf-sz+)))
(foreign-string-to-lisp %buf :count count))))
((:syb-money :syb-money4 :syb-decimal :syb-numeric)
(with-foreign-pointer (%buf +numeric-buf-sz+)
(let ((count
(%dbconvert %dbproc
type
data
-1
:syb-char
%buf
+numeric-buf-sz+)))
(parse-number:parse-number
(foreign-string-to-lisp %buf :count count )))))
((:syb-image :syb-binary :syb-varbinary :syb-blob)
(let ((vector (make-array len :element-type '(unsigned-byte 8))))
(dotimes (i len)
(setf (aref vector i) (mem-ref data :uchar i)))
vector))
(otherwise (error "not supported type ~A"
(foreign-enum-keyword '%syb-value-type type)))))))))
;; (defconstant +dbbuffer+ 14) ;; (defconstant +dbbuffer+ 14)

File diff suppressed because it is too large Load Diff

View File

@ -6,55 +6,41 @@
(defpackage #:pgloader.params (defpackage #:pgloader.params
(:use #:cl) (:use #:cl)
(:export #:*version-string* (:export #:*version-string*
#:*dry-run*
#:*on-error-stop*
#:on-error-stop
#:*self-upgrade-immutable-systems* #:*self-upgrade-immutable-systems*
#:*fd-path-root* #:*csv-path-root*
#:*root-dir* #:*root-dir*
#:*log-filename* #:*log-filename*
#:*summary-pathname* #:*summary-pathname*
#:*client-min-messages* #:*client-min-messages*
#:*log-min-messages* #:*log-min-messages*
#:*report-stream* #:*report-stream*
#:*pgsql-reserved-keywords*
#:*identifier-case* #:*identifier-case*
#:*preserve-index-names*
#:*copy-batch-rows* #:*copy-batch-rows*
#:*copy-batch-size* #:*copy-batch-size*
#:*rows-per-range* #:*concurrent-batches*
#:*prefetch-rows*
#:*pg-settings* #:*pg-settings*
#:*mysql-settings* #:*state*
#:*mssql-settings*
#:*default-tmpdir* #:*default-tmpdir*
#:init-params-from-environment #:init-params-from-environment
#:getenv-default #:getenv-default))
#:*context*
#:+os-code-success+
#:+os-code-error+
#:+os-code-error-usage+
#:+os-code-error-bad-source+
#:+os-code-error-regress+))
(in-package :pgloader.params) (in-package :pgloader.params)
(defparameter *release* nil (defparameter *release* t
"non-nil when this build is a release build.") "non-nil when this build is a release build.")
(defparameter *major-version* "3.6") (defparameter *major-version* "3.2")
(defparameter *minor-version* "10") (defparameter *minor-version* "0")
(defun git-hash () (defun git-hash ()
"Return the current abbreviated git hash of the development tree." "Return the current abbreviated git hash of the development tree."
(handler-case (handler-case
(let ((git-hash `("git" "--no-pager" "log" "-n1" "--format=format:%h"))) (let ((git-hash `("git" "--no-pager" "log" "-n1" "--format=format:%h")))
(multiple-value-bind (stdout stderr code) (uiop:with-current-directory ((asdf:system-source-directory :pgloader))
(uiop:run-program git-hash :output :string (multiple-value-bind (stdout stderr code)
:directory (asdf:system-source-directory :pgloader)) (uiop:run-program git-hash :output :string)
(declare (ignore code stderr)) (declare (ignore code stderr))
stdout)) stdout)))
(condition (e) (condition (e)
;; in case anything happen, just return X.Y.Z~devel ;; in case anything happen, just return X.Y.Z~devel
(declare (ignore e)) (declare (ignore e))
@ -77,27 +63,18 @@
DEFAULT if that variable isn't set" DEFAULT if that variable isn't set"
(or (uiop:getenv name) default))) (or (uiop:getenv name) default)))
(defparameter *dry-run* nil ;; we can't use pgloader.utils:make-pgstate yet because params is compiled
"Set to non-nil to only run checks about the load setup.") ;; first in the asd definition, we just make the symbol a special variable.
(defparameter *state* nil
"State of the current loading.")
(defparameter *on-error-stop* nil (defparameter *csv-path-root* nil
"Set to non-nil to for pgloader to refrain from handling errors, quitting instead.") "Where to load CSV files from, when loading from an archive.")
(define-condition on-error-stop ()
((on-condition :initarg :on-condition :reader on-error-condition
:documentation "Condition that triggered on-error-stop"))
(:report (lambda (condition stream)
(format stream
"On Error Stop: ~a"
(on-error-condition condition)))))
(defparameter *fd-path-root* nil
"Where to load files from, when loading from an archive or expanding regexps.")
(defparameter *root-dir* (defparameter *root-dir*
#+unix (uiop:parse-native-namestring "/tmp/pgloader/") #+unix (make-pathname :directory "/tmp/pgloader/")
#-unix (uiop:merge-pathnames* #-unix (uiop:merge-pathnames*
(uiop:make-pathname* :directory '(:relative "pgloader")) "pgloader/"
(uiop:ensure-directory-pathname (getenv-default "Temp"))) (uiop:ensure-directory-pathname (getenv-default "Temp")))
"Top directory where to store all data logs and reject files.") "Top directory where to store all data logs and reject files.")
@ -118,15 +95,9 @@
;;; ;;;
;;; When converting from other databases, how to deal with case sensitivity? ;;; When converting from other databases, how to deal with case sensitivity?
;;; ;;;
(defvar *pgsql-reserved-keywords* nil
"We need to always quote PostgreSQL reserved keywords")
(defparameter *identifier-case* :downcase (defparameter *identifier-case* :downcase
"Dealing with source databases casing rules.") "Dealing with source databases casing rules.")
(defparameter *preserve-index-names* nil
"Dealing with source databases index naming.")
;;; ;;;
;;; How to split batches in case of data loading errors. ;;; How to split batches in case of data loading errors.
;;; ;;;
@ -136,15 +107,10 @@
(defparameter *copy-batch-size* (* 20 1024 1024) (defparameter *copy-batch-size* (* 20 1024 1024)
"Maximum memory size allowed for a single batch.") "Maximum memory size allowed for a single batch.")
(defparameter *prefetch-rows* 100000 (defparameter *concurrent-batches* 10
"How many rows do read in advance in the reader queue.") "How many batches do we stack in the queue in advance.")
(defparameter *rows-per-range* 10000
"How many rows to read in each reader's thread, per SQL query.")
(defparameter *pg-settings* nil "An alist of GUC names and values.") (defparameter *pg-settings* nil "An alist of GUC names and values.")
(defparameter *mysql-settings* nil "An alist of GUC names and values.")
(defparameter *mssql-settings* nil "An alist of GUC names and values.")
;;; ;;;
;;; Archive processing: downloads and unzip. ;;; Archive processing: downloads and unzip.
@ -170,21 +136,3 @@
(setf *default-tmpdir* (setf *default-tmpdir*
(fad:pathname-as-directory (fad:pathname-as-directory
(getenv-default "TMPDIR" *default-tmpdir*)))) (getenv-default "TMPDIR" *default-tmpdir*))))
;;;
;;; Run time context to fill-in variable parts of the commands.
;;;
(defvar *context* nil
"Alist of (names . values) intialized from the environment at run-time,
and from a --context command line argument, then used in the commands when
they are using the Mustache templating feature.")
;;;
;;; Some command line constants for OS errors codes
;;;
(defparameter +os-code-success+ 0)
(defparameter +os-code-error+ 1)
(defparameter +os-code-error-usage+ 2)
(defparameter +os-code-error-bad-source+ 4)
(defparameter +os-code-error-regress+ 5)

View File

@ -1,95 +0,0 @@
;;;
;;; ALTER TABLE allows to change some of their properties while migrating
;;; from a source to PostgreSQL, currently only takes care of the schema.
;;;
(in-package #:pgloader.parser)
(defrule match-rule-target-regex quoted-regex
(:lambda (re) (make-regex-match-rule :target (second re))))
(defrule match-rule-target-string quoted-namestring
(:lambda (s) (make-string-match-rule :target s)))
(defrule match-rule-target (or match-rule-target-string
match-rule-target-regex))
(defrule another-match-rule-target (and comma match-rule-target)
(:lambda (x)
(bind (((_ target) x)) target)))
(defrule filter-list-matching
(and match-rule-target (* another-match-rule-target))
(:lambda (source)
(destructuring-bind (filter1 filters) source
(list* filter1 filters))))
(defrule alter-table-names-matching (and kw-alter kw-table kw-names kw-matching
filter-list-matching)
(:lambda (alter-table)
(bind (((_ _ _ _ match-rule-target-list) alter-table))
match-rule-target-list)))
(defrule in-schema (and kw-in kw-schema quoted-namestring)
(:function third))
(defrule rename-to (and kw-rename kw-to quoted-namestring)
(:lambda (stmt)
(bind (((_ _ new-name) stmt))
(list #'pgloader.catalog::alter-table-rename new-name))))
(defrule set-schema (and kw-set kw-schema quoted-namestring)
(:lambda (stmt)
(bind (((_ _ schema) stmt))
(list #'pgloader.catalog::alter-table-set-schema schema))))
(defrule set-storage-parameters (and kw-set #\( generic-option-list #\))
(:lambda (stmt)
(bind (((_ _ parameters _) stmt))
(list #'pgloader.catalog::alter-table-set-storage-parameters parameters))))
(defrule set-tablespace (and kw-set kw-tablespace quoted-namestring)
(:lambda (stmt)
(list #'pgloader.catalog::alter-table-set-tablespace (third stmt))))
(defrule alter-table-action (or rename-to
set-schema
set-storage-parameters
set-tablespace))
(defrule alter-table-command (and alter-table-names-matching
(? in-schema)
alter-table-action)
(:lambda (alter-table-command)
(destructuring-bind (rule-list schema action)
alter-table-command
(loop :for rule :in rule-list
:collect (make-match-rule
:rule rule
:schema schema
:action (first action)
:args (rest action))))))
(defrule alter-table (+ (and alter-table-command ignore-whitespace))
(:lambda (alter-table-command-list)
(cons :alter-table
(loop :for (command ws) :in alter-table-command-list
:collect command))))
;;;
;;; ALTER SCHEMA ... RENAME TO ...
;;;
;;; Useful mainly for MS SQL at the moment
;;;
(defrule alter-schema-rename-to (and kw-alter kw-schema quoted-namestring
kw-rename kw-to quoted-namestring)
(:lambda (alter-schema-command)
(bind (((_ _ current-name _ _ new-name) alter-schema-command))
(pgloader.catalog::make-match-rule
:rule (make-string-match-rule :target current-name)
:action #'pgloader.catalog::alter-schema-rename
:args (list new-name)))))
;;; currently we only support a single ALTER SCHEMA variant
(defrule alter-schema alter-schema-rename-to
(:lambda (alter-schema-rename-to)
(cons :alter-schema (list (list alter-schema-rename-to)))))

View File

@ -42,22 +42,30 @@
(when (and (or before finally) (null pg-db-conn)) (when (and (or before finally) (null pg-db-conn))
(error "When using a BEFORE LOAD DO or a FINALLY block, you must provide an archive level target database connection.")) (error "When using a BEFORE LOAD DO or a FINALLY block, you must provide an archive level target database connection."))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn nil) (let* ((state-before (pgloader.utils:make-pgstate))
(*state* (pgloader.utils:make-pgstate))
,@(pgsql-connection-bindings pg-db-conn nil)
(state-finally ,(when finally `(pgloader.utils:make-pgstate)))
(archive-file (archive-file
, (destructuring-bind (kind url) source ,(destructuring-bind (kind url) source
(ecase kind (ecase kind
(:http `(with-stats-collection (:http `(with-stats-collection
("download" :section :pre) ("download" :state state-before)
(pgloader.archive:http-fetch-file ,url))) (pgloader.archive:http-fetch-file ,url)))
(:filename url)))) (:filename url))))
(*fd-path-root* (*csv-path-root*
(with-stats-collection ("extract" :section :pre) (with-stats-collection ("extract" :state state-before)
(pgloader.archive:expand-archive archive-file)))) (pgloader.archive:expand-archive archive-file))))
(progn (progn
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
;; import from files block ;; import from files block
,@(loop for command in commands ,@(loop for command in commands
collect `(funcall ,command)) collect `(funcall ,command))
,(sql-code-block pg-db-conn :post finally "finally"))))))) ,(sql-code-block pg-db-conn 'state-finally finally "finally")
;; reporting
(report-full-summary "Total import time" *state*
:before state-before
:finally state-finally)))))))

View File

@ -10,33 +10,19 @@
(defrule cast-default-guard (and kw-when kw-default quoted-string) (defrule cast-default-guard (and kw-when kw-default quoted-string)
(:destructure (w d value) (declare (ignore w d)) (cons :default value))) (:destructure (w d value) (declare (ignore w d)) (cons :default value)))
(defrule cast-unsigned-guard (and kw-when kw-unsigned) (defrule cast-source-guards (* (or cast-default-guard
(:constant (cons :unsigned t))) cast-typemod-guard))
(:lambda (guards)
(defrule cast-signed-guard (and kw-when kw-signed) (alexandria:alist-plist guards)))
(:constant (cons :signed t)))
;; at the moment we only know about extra auto_increment ;; at the moment we only know about extra auto_increment
(defrule cast-source-extra (and kw-with kw-extra (defrule cast-source-extra (and kw-with kw-extra kw-auto-increment)
(or kw-auto-increment (:constant (list :auto-increment t)))
kw-on-update-current-timestamp))
(:lambda (extra)
(cons (third extra) t)))
;; type names may be "double quoted" (defrule cast-source-type (and kw-type trimmed-name)
(defrule cast-type-name (or double-quoted-namestring
(and (alpha-char-p character)
(* (or (alpha-char-p character)
(digit-char-p character)
#\_))))
(:text t))
(defrule cast-source-type (and kw-type cast-type-name)
(:destructure (kw name) (declare (ignore kw)) (list :type name))) (:destructure (kw name) (declare (ignore kw)) (list :type name)))
(defrule table-column-name (and maybe-quoted-namestring (defrule table-column-name (and namestring "." namestring)
"."
maybe-quoted-namestring)
(:destructure (table-name dot column-name) (:destructure (table-name dot column-name)
(declare (ignore dot)) (declare (ignore dot))
(list :column (cons (text table-name) (text column-name))))) (list :column (cons (text table-name) (text column-name)))))
@ -45,33 +31,26 @@
;; well, we want namestring . namestring ;; well, we want namestring . namestring
(:destructure (kw name) (declare (ignore kw)) name)) (:destructure (kw name) (declare (ignore kw)) name))
(defrule cast-source-extra-or-guard (* (or cast-unsigned-guard
cast-signed-guard
cast-default-guard
cast-typemod-guard
cast-source-extra))
(:function alexandria:alist-plist))
(defrule cast-source (and (or cast-source-type cast-source-column) (defrule cast-source (and (or cast-source-type cast-source-column)
cast-source-extra-or-guard) (? cast-source-extra)
(? cast-source-guards)
ignore-whitespace)
(:lambda (source) (:lambda (source)
(bind (((name-and-type extra-and-guards) source) (bind (((name-and-type opts guards _) source)
((&key (default nil d-s-p) ((&key (default nil d-s-p)
(typemod nil t-s-p) (typemod nil t-s-p)
(signed nil s-s-p) &allow-other-keys) guards)
(unsigned nil u-s-p) ((&key (auto-increment nil ai-s-p)
(auto-increment nil ai-s-p) &allow-other-keys) opts))
(on-update-current-timestamp nil ouct-s-p)
&allow-other-keys)
extra-and-guards))
`(,@name-and-type `(,@name-and-type
,@(when t-s-p (list :typemod typemod)) ,@(when t-s-p (list :typemod typemod))
,@(when d-s-p (list :default default)) ,@(when d-s-p (list :default default))
,@(when s-s-p (list :signed signed)) ,@(when ai-s-p (list :auto-increment auto-increment))))))
,@(when u-s-p (list :unsigned unsigned))
,@(when ai-s-p (list :auto-increment auto-increment)) (defrule cast-type-name (and (alpha-char-p character)
,@(when ouct-s-p (list :on-update-current-timestamp (* (or (alpha-char-p character)
on-update-current-timestamp)))))) (digit-char-p character))))
(:text t))
(defrule cast-to-type (and kw-to cast-type-name ignore-whitespace) (defrule cast-to-type (and kw-to cast-type-name ignore-whitespace)
(:lambda (source) (:lambda (source)
@ -96,66 +75,33 @@
(defrule cast-drop-not-null (and kw-drop kw-not kw-null) (defrule cast-drop-not-null (and kw-drop kw-not kw-null)
(:constant (list :drop-not-null t))) (:constant (list :drop-not-null t)))
(defrule cast-set-not-null (and kw-set kw-not kw-null)
(:constant (list :set-not-null t)))
(defrule cast-keep-extra (and kw-keep kw-extra)
(:constant (list :keep-extra t)))
(defrule cast-drop-extra (and kw-drop kw-extra)
(:constant (list :drop-extra t)))
(defrule cast-def (+ (or cast-to-type (defrule cast-def (+ (or cast-to-type
cast-keep-default cast-keep-default
cast-drop-default cast-drop-default
cast-keep-extra
cast-drop-extra
cast-keep-typemod cast-keep-typemod
cast-drop-typemod cast-drop-typemod
cast-keep-not-null cast-keep-not-null
cast-drop-not-null cast-drop-not-null))
cast-set-not-null))
(:lambda (source) (:lambda (source)
(destructuring-bind (destructuring-bind
(&key type drop-default drop-extra drop-typemod (&key type drop-default drop-typemod drop-not-null &allow-other-keys)
drop-not-null set-not-null &allow-other-keys)
(apply #'append source) (apply #'append source)
(list :type type (list :type type
:drop-extra drop-extra
:drop-default drop-default :drop-default drop-default
:drop-typemod drop-typemod :drop-typemod drop-typemod
:drop-not-null drop-not-null :drop-not-null drop-not-null))))
:set-not-null set-not-null))))
(defun function-name-character-p (char) (defun function-name-character-p (char)
(or (member char #.(quote (coerce "/.-%" 'list))) (or (member char #.(quote (coerce "/:.-%" 'list)))
(alphanumericp char))) (alphanumericp char)))
(defrule function-name (+ (function-name-character-p character)) (defrule function-name (* (function-name-character-p character))
(:lambda (fname) (:text t))
(text fname)))
(defrule package-and-function-names (and function-name (defrule cast-function (and kw-using function-name)
(or ":" "::") (:lambda (function)
function-name) (bind (((_ fname) function))
(:lambda (pfn) (intern (string-upcase fname) :pgloader.transforms))))
(bind (((pname _ fname) pfn))
(intern (string-upcase fname) (find-package (string-upcase pname))))))
(defrule maybe-qualified-function-name (or package-and-function-names
function-name)
(:lambda (fname)
(typecase fname
(string (intern (string-upcase fname) :pgloader.transforms))
(symbol fname))))
(defrule transform-expression sexp
(:lambda (sexp)
(eval sexp)))
(defrule cast-function (and kw-using (or maybe-qualified-function-name
transform-expression))
(:destructure (using symbol) (declare (ignore using)) symbol))
(defun fix-target-type (source target) (defun fix-target-type (source target)
"When target has :type nil, steal the source :type definition." "When target has :type nil, steal the source :type definition."

View File

@ -24,57 +24,58 @@
(:lambda (source) (:lambda (source)
(bind (((_ field-defs _) source)) field-defs))) (bind (((_ field-defs _) source)) field-defs)))
(defrule option-delimiter (and kw-delimiter separator) (defrule copy-option (or option-batch-rows
(:lambda (delimiter)
(destructuring-bind (kw sep) delimiter
(declare (ignore kw))
(cons :delimiter sep))))
(defrule option-null (and kw-null quoted-string)
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
(defrule copy-option (or option-on-error-stop
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-prefetch-rows option-batch-concurrency
option-max-parallel-create-index
option-truncate option-truncate
option-drop-indexes option-skip-header))
option-disable-triggers
option-identifiers-case
option-skip-header
option-delimiter
option-null))
(defrule copy-options (and kw-with (defrule another-copy-option (and comma copy-option)
(and copy-option (* (and comma copy-option)))) (:lambda (source)
(:function flatten-option-list)) (bind (((_ option) source)) option)))
(defrule copy-option-list (and copy-option (* another-copy-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule copy-options (and kw-with csv-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :copy-options opts))))
(defrule copy-uri (and "copy://" filename) (defrule copy-uri (and "copy://" filename)
(:lambda (source) (:lambda (source)
(bind (((_ filename) source)) (bind (((_ filename) source))
(make-instance 'copy-connection :spec filename)))) (make-instance 'copy-connection :specs filename))))
(defrule copy-file-source (or stdin (defrule copy-file-source (or stdin
inline inline
http-uri http-uri
copy-uri copy-uri
filename-matching filename-matching
maybe-quoted-filename) maybe-quoted-filename)
(:lambda (src) (:lambda (src)
(if (typep src 'copy-connection) src (if (typep src 'copy-connection) src
(destructuring-bind (type &rest specs) src (destructuring-bind (type &rest specs) src
(case type (case type
(:stdin (make-instance 'copy-connection :spec src)) (:stdin (make-instance 'copy-connection :specs src))
(:inline (make-instance 'copy-connection :spec src)) (:inline (make-instance 'copy-connection :specs src))
(:filename (make-instance 'copy-connection :spec src)) (:filename (make-instance 'copy-connection :specs src))
(:regex (make-instance 'copy-connection :spec src)) (:regex (make-instance 'copy-connection :specs src))
(:http (make-instance 'copy-connection :uri (first specs)))))))) (:http (make-instance 'copy-connection :uri (first specs))))))))
(defrule copy-source (and kw-load kw-copy kw-from copy-file-source) (defrule get-copy-file-source-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v)
(connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'copy-file-source connstring))))
(defrule copy-source (and kw-load kw-copy kw-from
(or get-copy-file-source-from-environment-variable
copy-file-source))
(:lambda (src) (:lambda (src)
(bind (((_ _ _ source) src)) source))) (bind (((_ _ _ source) src)) source)))
@ -86,88 +87,61 @@
(alexandria:alist-plist clauses-list))) (alexandria:alist-plist clauses-list)))
(defrule load-copy-file-command (and copy-source (? file-encoding) (defrule load-copy-file-command (and copy-source (? file-encoding)
(? copy-source-field-list) copy-source-field-list
target target
(? csv-target-table)
(? csv-target-column-list) (? csv-target-column-list)
load-copy-file-optional-clauses) load-copy-file-optional-clauses)
(:lambda (command) (:lambda (command)
(destructuring-bind (source encoding fields pguri table-name columns clauses) (destructuring-bind (source encoding fields target columns clauses) command
command `(,source ,encoding ,fields ,target ,columns ,@clauses))))
(list* source
encoding
fields
pguri
(or table-name (pgconn-table-name pguri))
columns
clauses))))
(defun lisp-code-for-loading-from-copy (copy-conn pg-db-conn (defun lisp-code-for-loading-from-copy (copy-conn fields pg-db-conn
&key &key
(encoding :utf-8) (encoding :utf-8)
fields columns
target-table-name gucs before after
columns ((:copy-options options)))
gucs before after options
&aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :state state-before)
(source-db (with-stats-collection ("fetch" :section :pre) (expand (fetch-file ,copy-conn)))))
(expand (fetch-file ,copy-conn)))))
(progn (progn
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
(let ((on-error-stop (getf ',options :on-error-stop)) (let ((truncate ,(getf options :truncate))
(truncate (getf ',options :truncate))
(disable-triggers (getf ',options :disable-triggers))
(drop-indexes (getf ',options :drop-indexes))
(max-parallel-create-index (getf ',options :max-parallel-create-index))
(source (source
(make-instance 'copy-copy (make-instance 'pgloader.copy:copy-copy
:target-db ,pg-db-conn :target-db ,pg-db-conn
:source source-db :source source-db
:target (create-table ',target-table-name) :target ,(pgconn-table-name pg-db-conn)
:encoding ,encoding :encoding ,encoding
:fields ',fields :fields ',fields
:columns ',columns :columns ',columns
,@(remove-batch-control-option :skip-lines ,(or (getf options :skip-line) 0))))
options :extras '(:worker-count (pgloader.sources:copy-from source :truncate truncate))
:concurrency
:truncate
:drop-indexes
:disable-triggers
:max-parallel-create-index)))))
(copy-database source
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:on-error-stop on-error-stop
:truncate truncate
:drop-indexes drop-indexes
:disable-triggers disable-triggers
:max-parallel-create-index max-parallel-create-index))
,(sql-code-block pg-db-conn :post after "after load"))))) ,(sql-code-block pg-db-conn 'state-after after "after load")
;; reporting
(when summary
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after))))))
(defrule load-copy-file load-copy-file-command (defrule load-copy-file load-copy-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri table-name columns (bind (((source encoding fields pg-db-uri columns
&key options gucs before after) command)) &key ((:copy-options options)) gucs before after) command))
(cond (*dry-run* (lisp-code-for-loading-from-copy source fields pg-db-uri
(lisp-code-for-csv-dry-run pg-db-uri)) :encoding encoding
(t :columns columns
(lisp-code-for-loading-from-copy source pg-db-uri :gucs gucs
:encoding encoding :before before
:fields fields :after after
:target-table-name table-name :copy-options options))))
:columns columns
:gucs gucs
:before before
:after after
:options options))))))

View File

@ -34,22 +34,12 @@
(bind (((_ digits) hex)) (bind (((_ digits) hex))
(code-char (parse-integer (text digits) :radix 16))))) (code-char (parse-integer (text digits) :radix 16)))))
(defrule tab-separator (and #\' #\\ #\t #\') (:constant #\Tab)) (defrule tab (and #\\ #\t) (:constant #\Tab))
(defrule backslash-separator (and #\' #\\ #\') (:constant #\\))
(defrule single-quote-separator (or (and #\' #\' #\' #\') (defrule separator (and #\' (or hex-char-code tab character ) #\')
(and #\' #\\ #\' #\'))
(:constant #\'))
(defrule other-char-separator (and #\' (or hex-char-code character) #\')
(:lambda (sep) (:lambda (sep)
(bind (((_ char _) sep)) char))) (bind (((_ char _) sep)) char)))
(defrule separator (or single-quote-separator
backslash-separator
tab-separator
other-char-separator))
;; ;;
;; Main CSV options (WITH ... in the command grammar) ;; Main CSV options (WITH ... in the command grammar)
;; ;;
@ -59,9 +49,6 @@
(bind (((_ _ _ digits) osh)) (bind (((_ _ _ digits) osh))
(cons :skip-lines (parse-integer (text digits)))))) (cons :skip-lines (parse-integer (text digits))))))
(defrule option-csv-header (and kw-csv kw-header)
(:constant (cons :header t)))
(defrule option-fields-enclosed-by (defrule option-fields-enclosed-by
(and kw-fields (? kw-optionally) kw-enclosed kw-by separator) (and kw-fields (? kw-optionally) kw-enclosed kw-by separator)
(:lambda (enc) (:lambda (enc)
@ -71,17 +58,11 @@
(defrule option-fields-not-enclosed (and kw-fields kw-not kw-enclosed) (defrule option-fields-not-enclosed (and kw-fields kw-not kw-enclosed)
(:constant (cons :quote nil))) (:constant (cons :quote nil)))
(defrule quote-quote "double-quote" (:constant #(#\" #\"))) (defrule quote-quote "double-quote" (:constant "\"\""))
(defrule backslash-quote "backslash-quote" (:constant #(#\\ #\"))) (defrule backslash-quote "backslash-quote" (:constant "\\\""))
(defrule escaped-quote-name (or quote-quote backslash-quote)) (defrule escaped-quote-name (or quote-quote backslash-quote))
(defrule escaped-quote-literal (or (and #\" #\") (and #\\ #\")) (:text t)) (defrule escaped-quote-literal (or (and #\" #\") (and #\\ #\")) (:text t))
(defrule escaped-quote (or escaped-quote-literal (defrule escaped-quote (or escaped-quote-literal escaped-quote-name))
escaped-quote-name
separator))
(defrule escape-mode-quote "quote" (:constant :quote))
(defrule escape-mode-following "following" (:constant :following))
(defrule escape-mode (or escape-mode-quote escape-mode-following))
(defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote) (defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote)
(:lambda (esc) (:lambda (esc)
@ -108,38 +89,32 @@
(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks) (defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks)
(:constant (cons :trim-blanks t))) (:constant (cons :trim-blanks t)))
(defrule option-csv-escape-mode (and kw-csv kw-escape kw-mode escape-mode) (defrule csv-option (or option-batch-rows
(:lambda (term)
(bind (((_ _ _ escape-mode) term))
(cons :escape-mode escape-mode))))
(defrule csv-option (or option-on-error-stop
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-prefetch-rows option-batch-concurrency
option-max-parallel-create-index
option-truncate option-truncate
option-disable-triggers
option-identifiers-case
option-drop-indexes
option-skip-header option-skip-header
option-csv-header
option-lines-terminated-by option-lines-terminated-by
option-fields-not-enclosed option-fields-not-enclosed
option-fields-enclosed-by option-fields-enclosed-by
option-fields-escaped-by option-fields-escaped-by
option-fields-terminated-by option-fields-terminated-by
option-trim-unquoted-blanks option-trim-unquoted-blanks
option-keep-unquoted-blanks option-keep-unquoted-blanks))
option-csv-escape-mode
option-null-if))
(defrule csv-options (and kw-with (defrule another-csv-option (and comma csv-option)
(and csv-option (* (and comma csv-option)))) (:lambda (source)
(:function flatten-option-list)) (bind (((_ option) source)) option)))
(defrule csv-option-list (and csv-option (* another-csv-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule csv-options (and kw-with csv-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :csv-options opts))))
;; ;;
;; CSV per-field reading options ;; CSV per-field reading options
@ -203,19 +178,19 @@
(defrule csv-field-options (? csv-field-option-list)) (defrule csv-field-options (? csv-field-option-list))
(defrule csv-bare-field-name (and (or #\_ (alpha-char-p character)) (defrule csv-raw-field-name (and (or #\_ (alpha-char-p character))
(* (or (alpha-char-p character) (* (or (alpha-char-p character)
(digit-char-p character) (digit-char-p character)
#\.
#\$
#\_))) #\_)))
(:lambda (name) (:text t))
(string-downcase (text name))))
(defrule csv-quoted-field-name (or (and #\' (* (not #\')) #\') (defrule csv-bare-field-name csv-raw-field-name
(and #\" (* (not #\")) #\")) (:lambda (name)
(string-downcase name)))
(defrule csv-quoted-field-name (and #\" csv-raw-field-name #\")
(:lambda (csv-field-name) (:lambda (csv-field-name)
(bind (((_ name _) csv-field-name)) (text name)))) (bind (((_ name _) csv-field-name)) name)))
(defrule csv-field-name (or csv-quoted-field-name csv-bare-field-name)) (defrule csv-field-name (or csv-quoted-field-name csv-bare-field-name))
@ -232,6 +207,11 @@
(destructuring-bind (field1 fields) source (destructuring-bind (field1 fields) source
(list* field1 fields)))) (list* field1 fields))))
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
(:constant :open-paren))
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
(:constant :close-paren))
(defrule having-fields (and kw-having kw-fields) (:constant nil)) (defrule having-fields (and kw-having kw-fields) (:constant nil))
(defrule csv-source-field-list (and (? having-fields) (defrule csv-source-field-list (and (? having-fields)
@ -247,6 +227,44 @@
(defrule column-name csv-field-name) ; same rules here (defrule column-name csv-field-name) ; same rules here
(defrule column-type csv-field-name) ; again, same rules, names only (defrule column-type csv-field-name) ; again, same rules, names only
(defun not-doublequote (char)
(not (eql #\" char)))
(defun symbol-character-p (character)
(not (member character '(#\Space #\( #\)))))
(defun symbol-first-character-p (character)
(and (symbol-character-p character)
(not (member character '(#\+ #\-)))))
(defrule sexp-symbol (and (symbol-first-character-p character)
(* (symbol-character-p character)))
(:lambda (schars)
(pgloader.transforms:intern-symbol (text schars))))
(defrule sexp-string-char (or (not-doublequote character) (and #\\ #\")))
(defrule sexp-string (and #\" (* sexp-string-char) #\")
(:destructure (q1 string q2)
(declare (ignore q1 q2))
(text string)))
(defrule sexp-integer (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
(:lambda (list)
(parse-integer (text list) :radix 10)))
(defrule sexp-list (and open-paren sexp (* sexp) close-paren)
(:destructure (open car cdr close)
(declare (ignore open close))
(cons car cdr)))
(defrule sexp-atom (and ignore-whitespace
(or sexp-string sexp-integer sexp-symbol))
(:lambda (atom)
(bind (((_ a) atom)) a)))
(defrule sexp (or sexp-atom sexp-list))
(defrule column-expression (and kw-using sexp) (defrule column-expression (and kw-using sexp)
(:lambda (expr) (:lambda (expr)
(bind (((_ sexp) expr)) sexp))) (bind (((_ sexp) expr)) sexp)))
@ -275,12 +293,6 @@
open-paren csv-target-columns close-paren) open-paren csv-target-columns close-paren)
(:lambda (source) (:lambda (source)
(bind (((_ _ columns _) source)) columns))) (bind (((_ _ columns _) source)) columns)))
(defrule csv-target-table (and kw-target kw-table dsn-table-name)
(:lambda (c-t-t)
;; dsn-table-name: (:table-name "schema" . "table")
(cdr (third c-t-t))))
;; ;;
;; The main command parsing ;; The main command parsing
;; ;;
@ -345,7 +357,7 @@
(defrule csv-uri (and "csv://" filename) (defrule csv-uri (and "csv://" filename)
(:lambda (source) (:lambda (source)
(bind (((_ filename) source)) (bind (((_ filename) source))
(make-instance 'csv-connection :spec filename)))) (make-instance 'csv-connection :specs filename))))
(defrule csv-file-source (or stdin (defrule csv-file-source (or stdin
inline inline
@ -357,13 +369,23 @@
(if (typep src 'csv-connection) src (if (typep src 'csv-connection) src
(destructuring-bind (type &rest specs) src (destructuring-bind (type &rest specs) src
(case type (case type
(:stdin (make-instance 'csv-connection :spec src)) (:stdin (make-instance 'csv-connection :specs src))
(:inline (make-instance 'csv-connection :spec src)) (:inline (make-instance 'csv-connection :specs src))
(:filename (make-instance 'csv-connection :spec src)) (:filename (make-instance 'csv-connection :specs src))
(:regex (make-instance 'csv-connection :spec src)) (:regex (make-instance 'csv-connection :specs src))
(:http (make-instance 'csv-connection :uri (first specs)))))))) (:http (make-instance 'csv-connection :uri (first specs))))))))
(defrule csv-source (and kw-load kw-csv kw-from csv-file-source) (defrule get-csv-file-source-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v)
(connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'csv-file-source connstring))))
(defrule csv-source (and kw-load kw-csv kw-from
(or get-csv-file-source-from-environment-variable
csv-file-source))
(:lambda (src) (:lambda (src)
(bind (((_ _ _ source) src)) source))) (bind (((_ _ _ source) src)) source)))
@ -386,105 +408,60 @@
(defrule load-csv-file-command (and csv-source (defrule load-csv-file-command (and csv-source
(? file-encoding) (? csv-source-field-list) (? file-encoding) (? csv-source-field-list)
target target (? csv-target-column-list)
(? csv-target-table)
(? csv-target-column-list)
load-csv-file-optional-clauses) load-csv-file-optional-clauses)
(:lambda (command) (:lambda (command)
(destructuring-bind (source encoding fields pguri table-name columns clauses) (destructuring-bind (source encoding fields target columns clauses) command
command `(,source ,encoding ,fields ,target ,columns ,@clauses))))
(list* source
encoding
fields
pguri
(or table-name (pgconn-table-name pguri))
columns
clauses))))
(defun lisp-code-for-csv-dry-run (pg-db-conn) (defun lisp-code-for-loading-from-csv (csv-conn fields pg-db-conn
`(lambda ()
;; CSV connection objects are not actually implementing the generic API
;; because they support many complex options... (the file can be a
;; pattern or standard input or inline or compressed etc).
(log-message :log "DRY RUN, only checking PostgreSQL connection.")
(check-connection ,pg-db-conn)))
(defun lisp-code-for-loading-from-csv (csv-conn pg-db-conn
&key &key
(encoding :utf-8) (encoding :utf-8)
fields
target-table-name
columns columns
gucs before after options gucs before after
&allow-other-keys ((:csv-options options)))
&aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :state state-before)
(source-db (with-stats-collection ("fetch" :section :pre) (expand (fetch-file ,csv-conn)))))
(expand (fetch-file ,csv-conn)))))
(progn (progn
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
(let* ((on-error-stop (getf ',options :on-error-stop)) (let ((truncate (getf ',options :truncate))
(truncate (getf ',options :truncate)) (source
(disable-triggers (getf ',options :disable-triggers)) (make-instance 'pgloader.csv:copy-csv
(drop-indexes (getf ',options :drop-indexes)) :target-db ,pg-db-conn
(max-parallel-create-index (getf ',options :max-parallel-create-index)) :source source-db
(fields :target ,(pgconn-table-name pg-db-conn)
',(let ((null-as (getf options :null-as))) :encoding ,encoding
(if null-as :fields ',fields
(mapcar (lambda (field) :columns ',columns
(if (member :null-as field) field ,@(remove-batch-control-option
(append field (list :null-as null-as)))) options :extras '(:truncate)))))
fields) (pgloader.sources:copy-from source :truncate truncate))
fields)))
(source
(make-instance 'copy-csv
:target-db ,pg-db-conn
:source source-db
:target (create-table ',target-table-name)
:encoding ,encoding
:fields fields
:columns ',columns
,@(remove-batch-control-option
options :extras '(:null-as
:worker-count
:concurrency
:truncate
:drop-indexes
:disable-triggers
:max-parallel-create-index)))))
(copy-database source
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:on-error-stop on-error-stop
:truncate truncate
:drop-indexes drop-indexes
:disable-triggers disable-triggers
:max-parallel-create-index max-parallel-create-index))
,(sql-code-block pg-db-conn :post after "after load"))))) ,(sql-code-block pg-db-conn 'state-after after "after load")
;; reporting
(when summary
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after))))))
(defrule load-csv-file load-csv-file-command (defrule load-csv-file load-csv-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri table-name columns (bind (((source encoding fields pg-db-uri columns
&key options gucs before after) command)) &key ((:csv-options options)) gucs before after) command))
(cond (*dry-run* (lisp-code-for-loading-from-csv source fields pg-db-uri
(lisp-code-for-csv-dry-run pg-db-uri)) :encoding encoding
(t :columns columns
(lisp-code-for-loading-from-csv source pg-db-uri :gucs gucs
:encoding encoding :before before
:fields fields :after after
:target-table-name table-name :csv-options options))))
:columns columns
:gucs gucs
:before before
:after after
:options options))))))

View File

@ -25,15 +25,7 @@
(defrule doubled-at-sign (and "@@") (:constant "@")) (defrule doubled-at-sign (and "@@") (:constant "@"))
(defrule doubled-colon (and "::") (:constant ":")) (defrule doubled-colon (and "::") (:constant ":"))
(defrule password (+ (or (not "@") doubled-at-sign)) (:text t)) (defrule password (+ (or (not "@") doubled-at-sign)) (:text t))
(defrule username (and (or #\_ (alpha-char-p character) (digit-char-p character)) (defrule username (and namestring (? (or doubled-at-sign doubled-colon)))
(* (or (alpha-char-p character)
(digit-char-p character)
#\.
#\\
punct
doubled-at-sign
doubled-colon
)))
(:text t)) (:text t))
(defrule dsn-user-password (and username (defrule dsn-user-password (and username
@ -44,6 +36,9 @@
;; password looks like '(":" "password") ;; password looks like '(":" "password")
(list :user username :password (cadr password))))) (list :user username :password (cadr password)))))
(defun hexdigit-char-p (character)
(member character #. (quote (coerce "0123456789abcdefABCDEF" 'list))))
(defrule ipv4-part (and (digit-char-p character) (defrule ipv4-part (and (digit-char-p character)
(? (digit-char-p character)) (? (digit-char-p character))
(? (digit-char-p character)))) (? (digit-char-p character))))
@ -52,79 +47,40 @@
(:lambda (ipv4) (:lambda (ipv4)
(list :ipv4 (text ipv4)))) (list :ipv4 (text ipv4))))
(defrule ipv6 (and #\[ (+ (or (hexdigit-char-p character) ":")) #\]) ;;; socket directory is unix only, so we can forbid ":" on the parsing
(:lambda (ipv6)
(list :ipv6 (text ipv6))))
;; socket directory is unix only, so we can forbid ":" on the parsing
(defun socket-directory-character-p (char) (defun socket-directory-character-p (char)
(or (find char "/.-_") (or (member char #.(quote (coerce "/.-_" 'list)))
(alphanumericp char))) (alphanumericp char)))
(defrule socket-directory (and "unix:" (defrule socket-directory (and "unix:" (* (socket-directory-character-p character)))
(* (or (not ":") doubled-colon)))
(:destructure (unix socket-directory) (:destructure (unix socket-directory)
(declare (ignore unix)) (declare (ignore unix))
(list :unix (when socket-directory (text socket-directory))))) (list :unix (when socket-directory (text socket-directory)))))
;;; (defrule network-name (and namestring (* (and "." namestring)))
;;; See https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_hostnames
;;;
;;; The characters allowed in labels are a subset of the ASCII character
;;; set, consisting of characters a through z, A through Z, digits 0 through
;;; 9, and hyphen.
;;;
;;; This rule is known as the LDH rule (letters, digits, hyphen).
;;;
;;; - Domain names are interpreted in case-independent manner.
;;; - Labels may not start or end with a hyphen.
;;; - An additional rule requires that top-level domain names should not be
;;; all-numeric.
;;;
(defrule network-label-letters-digit (or (alpha-char-p character)
(digit-char-p character)))
(defrule network-label-with-hyphen
(and network-label-letters-digit
(+ (or (and #\- network-label-letters-digit)
network-label-letters-digit)))
(:text t))
(defrule network-label-no-hyphen (+ network-label-letters-digit)
(:text t))
(defrule network-label (or network-label-with-hyphen network-label-no-hyphen)
(:identity t))
(defrule network-hostname (and network-label (* (and "." network-label)))
(:lambda (name) (:lambda (name)
(let ((host (text name))) (let ((host (text name)))
(list :host (unless (string= "" host) host))))) (list :host (unless (string= "" host) host)))))
(defrule hostname (or ipv4 ipv6 socket-directory network-hostname) (defrule hostname (or ipv4 socket-directory network-name)
(:identity t)) (:identity t))
(defun process-hostname (hostname)
(destructuring-bind (type &optional name) hostname
(ecase type
(:unix (if name (cons :unix name) :unix))
(:ipv4 name)
(:ipv6 name)
(:host name))))
(defrule dsn-hostname (and (? hostname) (? dsn-port)) (defrule dsn-hostname (and (? hostname) (? dsn-port))
(:lambda (host-port) (:lambda (host-port)
(destructuring-bind (host &optional port) host-port (destructuring-bind (host &optional port) host-port
(append (list :host (when host (process-hostname host))) (append (list :host
(when host
(destructuring-bind (type &optional name) host
(ecase type
(:unix (if name (cons :unix name) :unix))
(:ipv4 name)
(:host name)))))
port)))) port))))
(defrule dsn-dbname (and "/" (? (or single-quoted-string (defrule dsn-dbname (and "/" (? namestring))
(* (or (alpha-char-p character) (:destructure (slash dbname)
(digit-char-p character) (declare (ignore slash))
#\. (list :dbname dbname)))
punct)))))
(:lambda (dbn)
(list :dbname (text (second dbn)))))
(defrule dsn-option-ssl-disable "disable" (:constant :no)) (defrule dsn-option-ssl-disable "disable" (:constant :no))
(defrule dsn-option-ssl-allow "allow" (:constant :try)) (defrule dsn-option-ssl-allow "allow" (:constant :try))
@ -140,25 +96,13 @@
(declare (ignore key e)) (declare (ignore key e))
(cons :use-ssl val)))) (cons :use-ssl val))))
(defun get-pgsslmode (&optional (env-var-name "PGSSLMODE") default) (defrule qualified-table-name (and namestring "." namestring)
"Get PGSSLMODE from the environment."
(let ((pgsslmode (getenv-default env-var-name default)))
(when pgsslmode
(cdr (parse 'dsn-option-ssl (format nil "sslmode=~a" pgsslmode))))))
(defrule qualified-table-name (and maybe-quoted-namestring
"."
maybe-quoted-namestring)
(:destructure (schema dot table) (:destructure (schema dot table)
(declare (ignore dot)) (declare (ignore dot))
(cons (text schema) (text table)))) (format nil "~a.~a" (text schema) (text table))))
(defrule dsn-table-name (or qualified-table-name maybe-quoted-namestring) (defrule dsn-table-name (or qualified-table-name namestring)
(:lambda (name) (:lambda (name)
;; we can't make a table instance yet here, because for that we need to
;; apply-identifier-case on it, and that requires to have initialized
;; the *pgsql-reserved-keywords*, and we can't do that before parsing
;; the target database connection string, can we?
(cons :table-name name))) (cons :table-name name)))
(defrule dsn-option-table-name (and (? (and "tablename" "=")) (defrule dsn-option-table-name (and (? (and "tablename" "="))
@ -167,39 +111,7 @@
(bind (((_ table-name) opt-tn)) (bind (((_ table-name) opt-tn))
table-name))) table-name)))
(defrule uri-param (+ (not "&")) (:text t)) (defrule dsn-option (or dsn-option-ssl dsn-option-table-name))
(defmacro make-dsn-option-rule (name param &optional (rule 'uri-param) fn)
`(defrule ,name (and ,param "=" ,rule)
(:lambda (x)
(let ((cons (first (quri:url-decode-params (text x)))))
(setf (car cons) (intern (string-upcase (car cons)) "KEYWORD"))
(when ,fn
(setf (cdr cons) (funcall ,fn (cdr cons))))
cons))))
(make-dsn-option-rule dsn-option-host "host" uri-param
(lambda (hostname)
(process-hostname
(parse 'hostname
;; special case Unix Domain Socket paths
(cond ((char= (aref hostname 0) #\/)
(format nil "unix:~a" hostname))
(t hostname))))))
(make-dsn-option-rule dsn-option-port "port"
(+ (digit-char-p character))
#'parse-integer)
(make-dsn-option-rule dsn-option-dbname "dbname")
(make-dsn-option-rule dsn-option-user "user")
(make-dsn-option-rule dsn-option-pass "password")
(defrule dsn-option (or dsn-option-ssl
dsn-option-host
dsn-option-port
dsn-option-dbname
dsn-option-user
dsn-option-pass
dsn-option-table-name))
(defrule another-dsn-option (and "&" dsn-option) (defrule another-dsn-option (and "&" dsn-option)
(:lambda (source) (:lambda (source)
@ -228,42 +140,36 @@
dbname dbname
table-name table-name
use-ssl) use-ssl)
;; we want the options to take precedence over the URI components, (apply #'append uri)
;; so we destructure the URI again and prepend options here.
(destructuring-bind (prefix user-pass host-port dbname options) uri
(apply #'append options prefix user-pass host-port (list dbname)))
;; Default to environment variables as described in ;; Default to environment variables as described in
;; http://www.postgresql.org/docs/9.3/static/app-psql.html ;; http://www.postgresql.org/docs/9.3/static/app-psql.html
(declare (ignore type)) (declare (ignore type))
(let ((pgconn (make-instance 'pgsql-connection
(make-instance 'pgsql-connection :user (or user
:user (or user (getenv-default "PGUSER"
(getenv-default "PGUSER" #+unix (getenv-default "USER")
#+unix #-unix (getenv-default "UserName")))
(getenv-default "USER") :pass (or password (getenv-default "PGPASSWORD"))
#-unix :host (or host (getenv-default "PGHOST"
(getenv-default "UserName"))) #+unix :unix
:host (or host (getenv-default "PGHOST" #-unix "localhost"))
#+unix :unix :port (or port (parse-integer
#-unix "localhost")) (getenv-default "PGPORT" "5432")))
:port (or port (parse-integer :name (or dbname (getenv-default "PGDATABASE" user))
(getenv-default "PGPORT" "5432")))
:name (or dbname (getenv-default "PGDATABASE" user))
:use-ssl (or use-ssl (get-pgsslmode "PGSSLMODE")) :use-ssl use-ssl
:table-name table-name))) :table-name table-name))))
;; Now set the password, maybe from ~/.pgpass
(setf (db-pass pgconn)
(or password
(getenv-default "PGPASSWORD")
(match-pgpass-file (db-host pgconn)
(princ-to-string (db-port pgconn))
(db-name pgconn)
(db-user pgconn))))
;; And return our pgconn instance
pgconn))))
(defrule target (and kw-into pgsql-uri) (defrule get-pgsql-uri-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v))
(let ((connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'pgsql-uri connstring)))))
(defrule target (and kw-into (or pgsql-uri
get-pgsql-uri-from-environment-variable))
(:destructure (into target) (:destructure (into target)
(declare (ignore into)) (declare (ignore into))
target)) target))
@ -271,7 +177,7 @@
(defun pgsql-connection-bindings (pg-db-uri gucs) (defun pgsql-connection-bindings (pg-db-uri gucs)
"Generate the code needed to set PostgreSQL connection bindings." "Generate the code needed to set PostgreSQL connection bindings."
`((*pg-settings* (pgloader.pgsql:sanitize-user-gucs ',gucs)) `((*pg-settings* ',gucs)
(*pgsql-reserved-keywords* (pgloader.pgsql::*pgsql-reserved-keywords*
(pgloader.pgsql:list-reserved-keywords ,pg-db-uri)))) (pgloader.pgsql:list-reserved-keywords ,pg-db-uri))))

View File

@ -18,25 +18,30 @@
(bind (((_ _ _ table-name) tn)) (bind (((_ _ _ table-name) tn))
(cons :table-name (text table-name))))) (cons :table-name (text table-name)))))
(defrule dbf-option (or option-on-error-stop (defrule dbf-option (or option-batch-rows
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-prefetch-rows option-batch-concurrency
option-truncate option-truncate
option-disable-triggers
option-data-only option-data-only
option-schema-only option-schema-only
option-include-drop option-include-drop
option-create-table option-create-table
option-create-tables option-create-tables
option-table-name option-table-name))
option-identifiers-case))
(defrule dbf-options (and kw-with (and dbf-option (* (and comma dbf-option)))) (defrule another-dbf-option (and comma dbf-option)
(:function flatten-option-list)) (:lambda (source)
(bind (((_ option) source)) option)))
(defrule dbf-option-list (and dbf-option (* another-dbf-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule dbf-options (and kw-with dbf-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :dbf-options opts))))
(defrule dbf-uri (and "dbf://" filename) (defrule dbf-uri (and "dbf://" filename)
(:lambda (source) (:lambda (source)
@ -57,89 +62,57 @@
(defrule load-dbf-optional-clauses (* (or dbf-options (defrule load-dbf-optional-clauses (* (or dbf-options
gucs gucs
casts
before-load before-load
after-schema
after-load)) after-load))
(:lambda (clauses-list) (:lambda (clauses-list)
(alexandria:alist-plist clauses-list))) (alexandria:alist-plist clauses-list)))
;;; dbf defaults to ascii rather than utf-8 (defrule load-dbf-command (and dbf-source target load-dbf-optional-clauses)
(defrule dbf-file-encoding (? (and kw-with kw-encoding encoding))
(:lambda (enc)
(when enc
(bind (((_ _ encoding) enc)) encoding))))
(defrule load-dbf-command (and dbf-source
(? dbf-file-encoding)
target
(? csv-target-table)
load-dbf-optional-clauses)
(:lambda (command) (:lambda (command)
(destructuring-bind (source encoding pguri table-name clauses) (destructuring-bind (source target clauses) command
command `(,source ,target ,@clauses))))
(list* source
encoding
pguri
(or table-name (pgconn-table-name pguri))
clauses))))
(defun lisp-code-for-dbf-dry-run (dbf-db-conn pg-db-conn)
`(lambda ()
(let ((source-db (expand (fetch-file ,dbf-db-conn))))
(check-connection source-db)
(check-connection ,pg-db-conn))))
(defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn (defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn
&key &key
target-table-name gucs before after
encoding ((:dbf-options options)))
gucs casts options
before after-schema after
&allow-other-keys)
`(lambda () `(lambda ()
(let* ((*default-cast-rules* ',*db3-default-cast-rules*) (let* ((state-before (pgloader.utils:make-pgstate))
(*cast-rules* ',casts) (summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs) ,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
,@(identifier-case-binding options) ,@(identifier-case-binding options)
(on-error-stop (getf ',options :on-error-stop)) (table-name ,(pgconn-table-name pg-db-conn))
(source-db (with-stats-collection ("fetch" :section :pre) (source-db (with-stats-collection ("fetch" :state state-before)
(expand (fetch-file ,dbf-db-conn)))) (expand (fetch-file ,dbf-db-conn))))
(source (source
(make-instance 'copy-db3 (make-instance 'pgloader.db3:copy-db3
:target-db ,pg-db-conn :target-db ,pg-db-conn
:encoding ,encoding :source-db source-db
:source-db source-db :target table-name)))
:target ,(when target-table-name
(create-table target-table-name)))))
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
(copy-database source (pgloader.sources:copy-database source
,@(remove-batch-control-option options) :state-before state-before
:after-schema ',after-schema ,@(remove-batch-control-option options))
:on-error-stop on-error-stop
:create-indexes nil
:foreign-keys nil
:reset-sequences nil)
,(sql-code-block pg-db-conn :post after "after load")))) ,(sql-code-block pg-db-conn 'state-after after "after load")
;; reporting
(when summary
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after)))))
(defrule load-dbf-file load-dbf-command (defrule load-dbf-file load-dbf-command
(:lambda (command) (:lambda (command)
(bind (((source encoding pg-db-uri table-name (bind (((source pg-db-uri
&key options gucs casts before after-schema after) &key ((:dbf-options options)) gucs before after) command))
command)) (lisp-code-for-loading-from-dbf source pg-db-uri
(cond (*dry-run* :gucs gucs
(lisp-code-for-dbf-dry-run source pg-db-uri)) :before before
(t :after after
(lisp-code-for-loading-from-dbf source pg-db-uri :dbf-options options))))
:target-table-name table-name
:encoding encoding
:gucs gucs
:casts casts
:before before
:after-schema after-schema
:after after
:options options))))))

View File

@ -1,73 +0,0 @@
#|
distribute billers using id
distribute bills using biller_id
distribute receivable_accounts using biller_id
distribute payments using biller_id
distribute splits using biller_id
from receivable_accounts
distribute ach_accounts as reference table
|#
(in-package :pgloader.parser)
(defun create-table-from-dsn-table-name (dsn-table-name
&optional (schema-name "public"))
(let ((table (create-table (cdr (second dsn-table-name)))))
(unless (table-schema table)
(setf (table-schema table)
(make-schema :catalog nil
:source-name schema-name
:name (apply-identifier-case schema-name))))
table))
(defrule distribute-reference (and kw-distribute dsn-table-name
kw-as kw-reference kw-table)
(:lambda (d-r)
(make-citus-reference-rule :table (create-table-from-dsn-table-name d-r))))
(defrule distribute-using (and kw-distribute dsn-table-name
kw-using maybe-quoted-namestring)
(:lambda (d-u)
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u)
:using (make-column :name (fourth d-u)))))
;;;
;;; The namestring rule allows for commas and we use them as a separator
;;; here, so we need to have our own table name parsing. That's a bummer,
;;; maybe we should revisit the whole table names parsing code?
;;;
(defrule distribute-from-tablename
(or double-quoted-namestring
quoted-namestring
(and (or #\_ (alpha-char-p character))
(* (or (alpha-char-p character)
(digit-char-p character)))))
(:text t))
(defrule maybe-qualified-dist-from-table-name
(and distribute-from-tablename (? (and "." distribute-from-tablename)))
(:lambda (name)
(if (second name)
(cons (first name) (second (second name)))
(cons "public" (first name)))))
(defrule distribute-from-list (+ (and maybe-qualified-dist-from-table-name
(? (and "," ignore-whitespace))))
(:lambda (from-list)
(mapcar #'first from-list)))
(defrule distribute-using-from (and kw-distribute dsn-table-name
kw-using maybe-quoted-namestring
kw-from distribute-from-list)
(:lambda (d-u-f)
(make-citus-distributed-rule :table (create-table-from-dsn-table-name d-u-f)
:using (make-column :name (fourth d-u-f))
:from (mapcar #'create-table (sixth d-u-f)))))
(defrule distribute-commands (+ (or distribute-using-from
distribute-using
distribute-reference))
(:lambda (commands)
(cons :distribute commands)))

View File

@ -7,9 +7,6 @@
(in-package #:pgloader.parser) (in-package #:pgloader.parser)
(defrule option-fixed-header (and kw-fixed kw-header)
(:constant (cons :header t)))
(defrule hex-number (and "0x" (+ (hexdigit-char-p character))) (defrule hex-number (and "0x" (+ (hexdigit-char-p character)))
(:lambda (hex) (:lambda (hex)
(bind (((_ digits) hex)) (bind (((_ digits) hex))
@ -22,10 +19,10 @@
(defrule number (or hex-number dec-number)) (defrule number (or hex-number dec-number))
(defrule field-start-position (and (? kw-from) ignore-whitespace number) (defrule field-start-position (and (? kw-from) ignore-whitespace number)
(:function third)) (:destructure (from ws pos) (declare (ignore from ws)) pos))
(defrule fixed-field-length (and (? kw-for) ignore-whitespace number) (defrule fixed-field-length (and (? kw-for) ignore-whitespace number)
(:function third)) (:destructure (for ws len) (declare (ignore for ws)) len))
(defrule fixed-source-field (and csv-field-name (defrule fixed-source-field (and csv-field-name
field-start-position fixed-field-length field-start-position fixed-field-length
@ -46,29 +43,30 @@
(:lambda (source) (:lambda (source)
(bind (((_ field-defs _) source)) field-defs))) (bind (((_ field-defs _) source)) field-defs)))
(defrule fixed-option (or option-on-error-stop (defrule fixed-option (or option-batch-rows
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size option-batch-size
option-prefetch-rows option-batch-concurrency
option-max-parallel-create-index
option-truncate option-truncate
option-drop-indexes option-skip-header))
option-disable-triggers
option-identifiers-case
option-skip-header
option-fixed-header))
(defrule fixed-options (and kw-with (defrule another-fixed-option (and comma fixed-option)
(and fixed-option (* (and comma fixed-option)))) (:lambda (source)
(:function flatten-option-list)) (bind (((_ option) source)) option)))
(defrule fixed-option-list (and fixed-option (* another-fixed-option))
(:lambda (source)
(destructuring-bind (opt1 opts) source
(alexandria:alist-plist `(,opt1 ,@opts)))))
(defrule fixed-options (and kw-with csv-option-list)
(:lambda (source)
(bind (((_ opts) source))
(cons :fixed-options opts))))
(defrule fixed-uri (and "fixed://" filename) (defrule fixed-uri (and "fixed://" filename)
(:lambda (source) (:lambda (source)
(bind (((_ filename) source)) (bind (((_ filename) source))
(make-instance 'fixed-connection :spec filename)))) (make-instance 'fixed-connection :specs filename))))
(defrule fixed-file-source (or stdin (defrule fixed-file-source (or stdin
inline inline
@ -80,13 +78,23 @@
(if (typep src 'fixed-connection) src (if (typep src 'fixed-connection) src
(destructuring-bind (type &rest specs) src (destructuring-bind (type &rest specs) src
(case type (case type
(:stdin (make-instance 'fixed-connection :spec src)) (:stdin (make-instance 'fixed-connection :specs src))
(:inline (make-instance 'fixed-connection :spec src)) (:inline (make-instance 'fixed-connection :specs src))
(:filename (make-instance 'fixed-connection :spec src)) (:filename (make-instance 'fixed-connection :specs src))
(:regex (make-instance 'fixed-connection :spec src)) (:regex (make-instance 'fixed-connection :specs src))
(:http (make-instance 'fixed-connection :uri (first specs)))))))) (:http (make-instance 'fixed-connection :uri (first specs))))))))
(defrule fixed-source (and kw-load kw-fixed kw-from fixed-file-source) (defrule get-fixed-file-source-from-environment-variable (and kw-getenv name)
(:lambda (p-e-v)
(bind (((_ varname) p-e-v)
(connstring (getenv-default varname)))
(unless connstring
(error "Environment variable ~s is unset." varname))
(parse 'fixed-file-source connstring))))
(defrule fixed-source (and kw-load kw-fixed kw-from
(or get-fixed-file-source-from-environment-variable
fixed-file-source))
(:lambda (src) (:lambda (src)
(bind (((_ _ _ source) src)) source))) (bind (((_ _ _ source) src)) source)))
@ -98,85 +106,61 @@
(alexandria:alist-plist clauses-list))) (alexandria:alist-plist clauses-list)))
(defrule load-fixed-cols-file-command (and fixed-source (? file-encoding) (defrule load-fixed-cols-file-command (and fixed-source (? file-encoding)
(? fixed-source-field-list) fixed-source-field-list
target target
(? csv-target-table)
(? csv-target-column-list) (? csv-target-column-list)
load-fixed-cols-file-optional-clauses) load-fixed-cols-file-optional-clauses)
(:lambda (command) (:lambda (command)
(destructuring-bind (source encoding fields pguri table-name columns clauses) (destructuring-bind (source encoding fields target columns clauses) command
command `(,source ,encoding ,fields ,target ,columns ,@clauses))))
(list* source
encoding
fields
pguri
(or table-name (pgconn-table-name pguri))
columns
clauses))))
(defun lisp-code-for-loading-from-fixed (fixed-conn pg-db-conn (defun lisp-code-for-loading-from-fixed (fixed-conn fields pg-db-conn
&key &key
(encoding :utf-8) (encoding :utf-8)
fields
target-table-name
columns columns
gucs before after options gucs before after
&allow-other-keys ((:fixed-options options)))
&aux
(worker-count (getf options :worker-count))
(concurrency (getf options :concurrency)))
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
,@(identifier-case-binding options) (source-db (with-stats-collection ("fetch" :state state-before)
(source-db (with-stats-collection ("fetch" :section :pre) (expand (fetch-file ,fixed-conn)))))
(expand (fetch-file ,fixed-conn)))))
(progn (progn
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
(let ((on-error-stop ,(getf options :on-error-stop)) (let ((truncate ,(getf options :truncate))
(truncate ,(getf options :truncate))
(disable-triggers ,(getf options :disable-triggers))
(drop-indexes ,(getf options :drop-indexes))
(max-parallel-create-index ,(getf options :max-parallel-create-index))
(source (source
(make-instance 'copy-fixed (make-instance 'pgloader.fixed:copy-fixed
:target-db ,pg-db-conn :target-db ,pg-db-conn
:source source-db :source source-db
:target (create-table ',target-table-name) :target ,(pgconn-table-name pg-db-conn)
:encoding ,encoding :encoding ,encoding
:fields ',fields :fields ',fields
:columns ',columns :columns ',columns
:skip-lines ,(or (getf options :skip-lines) 0) :skip-lines ,(or (getf options :skip-line) 0))))
:header ,(getf options :header)))) (pgloader.sources:copy-from source :truncate truncate))
(copy-database source ,(sql-code-block pg-db-conn 'state-after after "after load")
,@ (when worker-count
(list :worker-count worker-count))
,@ (when concurrency
(list :concurrency concurrency))
:on-error-stop on-error-stop
:truncate truncate
:drop-indexes drop-indexes
:disable-triggers disable-triggers
:max-parallel-create-index max-parallel-create-index))
,(sql-code-block pg-db-conn :post after "after load"))))) ;; reporting
(when summary
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after))))))
(defrule load-fixed-cols-file load-fixed-cols-file-command (defrule load-fixed-cols-file load-fixed-cols-file-command
(:lambda (command) (:lambda (command)
(bind (((source encoding fields pg-db-uri table-name columns (bind (((source encoding fields pg-db-uri columns
&key options gucs before after) command)) &key ((:fixed-options options)) gucs before after) command))
(cond (*dry-run* (lisp-code-for-loading-from-fixed source fields pg-db-uri
(lisp-code-for-csv-dry-run pg-db-uri)) :encoding encoding
(t :columns columns
(lisp-code-for-loading-from-fixed source pg-db-uri :gucs gucs
:encoding encoding :before before
:fields fields :after after
:target-table-name table-name :fixed-options options))))
:columns columns
:gucs gucs
:before before
:after after
:options options))))))

View File

@ -1,17 +0,0 @@
;;;
;;; MS SQL and SQLite style including/excluding rules, using LIKE
;;;
(in-package #:pgloader.parser)
(defrule like-expression (and "'" (+ (not "'")) "'")
(:lambda (le)
(bind (((_ like _) le)) (text like))))
(defrule another-like-expression (and comma like-expression)
(:lambda (source)
(bind (((_ like) source)) like)))
(defrule filter-list-like (and like-expression (* another-like-expression))
(:lambda (source)
(destructuring-bind (filter1 filters) source
(list* filter1 filters))))

View File

@ -6,38 +6,14 @@
(in-package #:pgloader.parser) (in-package #:pgloader.parser)
(defrule tz-utc (~ "UTC") (:constant local-time:+utc-zone+)) (defrule option-create-table (and kw-create kw-table)
(defrule tz-gmt (~ "GMT") (:constant local-time:+gmt-zone+)) (:constant (cons :create-tables t)))
(defrule tz-name (and #\' (+ (not #\')) #\')
(:lambda (tzn)
(bind (((_ chars _) tzn))
(local-time:reread-timezone-repository)
(local-time:find-timezone-by-location-name (text chars)))))
(defrule option-timezone (and kw-timezone (or tz-utc tz-gmt tz-name)) ;;; piggyback on DBF parsing
(:lambda (tzopt) (defrule ixf-options (and kw-with dbf-option-list)
(bind (((_ tz) tzopt)) (cons :timezone tz)))) (:lambda (source)
(bind (((_ opts) source))
(defrule ixf-option (or option-on-error-stop (cons :ixf-options opts))))
option-on-error-resume-next
option-workers
option-concurrency
option-batch-rows
option-batch-size
option-prefetch-rows
option-truncate
option-disable-triggers
option-identifiers-case
option-data-only
option-schema-only
option-include-drop
option-create-table
option-create-tables
option-table-name
option-timezone))
(defrule ixf-options (and kw-with (and ixf-option (* (and comma ixf-option))))
(:function flatten-option-list))
(defrule ixf-uri (and "ixf://" filename) (defrule ixf-uri (and "ixf://" filename)
(:lambda (source) (:lambda (source)
@ -59,67 +35,55 @@
(defrule load-ixf-optional-clauses (* (or ixf-options (defrule load-ixf-optional-clauses (* (or ixf-options
gucs gucs
before-load before-load
after-schema
after-load)) after-load))
(:lambda (clauses-list) (:lambda (clauses-list)
(alexandria:alist-plist clauses-list))) (alexandria:alist-plist clauses-list)))
(defrule load-ixf-command (and ixf-source (defrule load-ixf-command (and ixf-source target load-ixf-optional-clauses)
target
(? csv-target-table)
load-ixf-optional-clauses)
(:lambda (command) (:lambda (command)
(destructuring-bind (source pguri table-name clauses) command (destructuring-bind (source target clauses) command
(list* source `(,source ,target ,@clauses))))
pguri
(or table-name (pgconn-table-name pguri))
clauses))))
(defun lisp-code-for-loading-from-ixf (ixf-db-conn pg-db-conn (defun lisp-code-for-loading-from-ixf (ixf-db-conn pg-db-conn
&key &key
target-table-name gucs options gucs before after
before after-schema after ((:ixf-options options)))
&allow-other-keys)
`(lambda () `(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs) (let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options) ,@(batch-control-bindings options)
,@(identifier-case-binding options) ,@(identifier-case-binding options)
(timezone (getf ',options :timezone)) (table-name ,(pgconn-table-name pg-db-conn))
(on-error-stop(getf ',options :on-error-stop)) (source-db (with-stats-collection ("fetch" :state state-before)
(source-db (with-stats-collection ("fetch" :section :pre)
(expand (fetch-file ,ixf-db-conn)))) (expand (fetch-file ,ixf-db-conn))))
(source (source
(make-instance 'copy-ixf (make-instance 'pgloader.ixf:copy-ixf
:target-db ,pg-db-conn :target-db ,pg-db-conn
:source-db source-db :source-db source-db
:target (create-table ',target-table-name) :target table-name)))
:timezone timezone)))
,(sql-code-block pg-db-conn :pre before "before load") ,(sql-code-block pg-db-conn 'state-before before "before load")
(copy-database source (pgloader.sources:copy-database source
,@(remove-batch-control-option :state-before state-before
options ,@(remove-batch-control-option options))
:extras '(:timezone))
:on-error-stop on-error-stop
:after-schema ',after-schema
:foreign-keys nil
:reset-sequences nil)
,(sql-code-block pg-db-conn :post after "after load")))) ,(sql-code-block pg-db-conn 'state-after after "after load")
(when summary
(report-full-summary "Total import time" *state*
:before state-before
:finally state-after)))))
(defrule load-ixf-file load-ixf-command (defrule load-ixf-file load-ixf-command
(:lambda (command) (:lambda (command)
(bind (((source pg-db-uri table-name (bind (((source pg-db-uri
&key options gucs before after-schema after) &key ((:ixf-options options)) gucs before after) command))
command)) (lisp-code-for-loading-from-ixf source pg-db-uri
(cond (*dry-run* :gucs gucs
(lisp-code-for-csv-dry-run pg-db-uri)) :before before
(t :after after
(lisp-code-for-loading-from-ixf source pg-db-uri :ixf-options options))))
:target-table-name table-name
:gucs gucs
:before before
:after-schema after-schema
:after after
:options options))))))

Some files were not shown because too many files have changed in this diff Show More