mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
New version of pgloader (2.0.2):
* new developpers * python code replaces tcl code * one config file for many tables (one command line for loading them all) * manpage * debian package * supports multi-line input file (without quotes) * Informix large objects support (loading to TEXT or BYTEA) * configurable amount of rows per COPY instruction * trailing slash optionnal removal (support informix UNLOAD file format) * begin processing at any line in the file, by number or row id * dry-run option, to validate input reading without connecting to database * pedantic option, to stop processing on warning
This commit is contained in:
parent
fe3a0480cc
commit
8ed1e0ff2c
25
LICENSE
25
LICENSE
@ -1,25 +0,0 @@
|
|||||||
Copyright (c) 2005, Jan Wieck
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
* Neither the name of the PostgreSQL Loader nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
||||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23
Makefile
Normal file
23
Makefile
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
DOCS = pgloader.1.sgml
|
||||||
|
GARBAGE = manpage.links manpage.refs
|
||||||
|
|
||||||
|
# debian setting
|
||||||
|
DESTDIR =
|
||||||
|
|
||||||
|
libdir = $(DESTDIR)/usr/share/pgloader
|
||||||
|
exdir = $(DESTDIR)/usr/share/doc/pgloader
|
||||||
|
|
||||||
|
pgloader = pgloader.py
|
||||||
|
examples = examples
|
||||||
|
libs = $(wildcard pgloader/*.py)
|
||||||
|
|
||||||
|
install:
|
||||||
|
install -m 755 $(pgloader) $(libdir)
|
||||||
|
install -m 755 -d $(libdir)/pgloader
|
||||||
|
|
||||||
|
cp -a $(libs) $(libdir)/pgloader
|
||||||
|
cp -a $(examples) $(exdir)
|
||||||
|
|
||||||
|
man: $(DOCS)
|
||||||
|
docbook2man $(DOCS) 2>/dev/null
|
||||||
|
-rm -f $(GARBAGE)
|
23
debian/changelog
vendored
Normal file
23
debian/changelog
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
pgloader (2.0.2) unstable; urgency=low
|
||||||
|
|
||||||
|
* configurable null and empty_string representations
|
||||||
|
* bugfix on newline_escapes behavior when all lines are not escaped
|
||||||
|
* new global newline_escapes setting
|
||||||
|
* uses by default psycopg2, failback to psycopg1 if not available
|
||||||
|
* client_encoding can now be set on each table
|
||||||
|
* documentation (manpage) update
|
||||||
|
|
||||||
|
-- Dimitri Fontaine <dim@dalibo.com> Wed, 15 Nov 2006 22:26:46 +0100
|
||||||
|
|
||||||
|
pgloader (2.0.1-2) unstable; urgency=low
|
||||||
|
|
||||||
|
* package cleaning (lintian warnings and error)
|
||||||
|
|
||||||
|
-- Dimitri Fontaine <dim@dalibo.com> Tue, 14 Nov 2006 18:14:57 +0100
|
||||||
|
|
||||||
|
pgloader (2.0.1-1) unstable; urgency=low
|
||||||
|
|
||||||
|
* Initial release
|
||||||
|
|
||||||
|
-- Dimitri Fontaine <dim@dalibo.com> Mon, 13 Nov 2006 22:56:15 +0100
|
||||||
|
|
1
debian/compat
vendored
Normal file
1
debian/compat
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
5
|
21
debian/control
vendored
Normal file
21
debian/control
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
Source: pgloader
|
||||||
|
Section: misc
|
||||||
|
Priority: extra
|
||||||
|
Maintainer: Dimitri Fontaine <dim@dalibo.com>
|
||||||
|
Build-Depends: debhelper (>= 5), docbook-to-man (>= 2.0.0), python-support (>= 0.3)
|
||||||
|
Standards-Version: 3.7.2
|
||||||
|
|
||||||
|
Package: pgloader
|
||||||
|
Architecture: all
|
||||||
|
Depends: python (>=2.4.4), python-psycopg2 | python-psycopg (<< 1.1.21)
|
||||||
|
Description: loads flat data files into PostgreSQL
|
||||||
|
pgloader imports data from a flat file and insert it into a database
|
||||||
|
table. It uses a flat file per database table, and you can configure as
|
||||||
|
many Sections as you want, each one associating a table name and a data
|
||||||
|
file.
|
||||||
|
.
|
||||||
|
Data are parsed and rewritten, then given to PostgreSQL COPY command.
|
||||||
|
Parsing is necessary for dealing with end of lines and eventual trailing
|
||||||
|
separator characters, and for column reordering: your flat data file may
|
||||||
|
not have the same column order as the databse table has.
|
||||||
|
|
32
debian/copyright
vendored
Normal file
32
debian/copyright
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
This package was debianized by Dimitri Fontaine <dim@dalibo.com> on
|
||||||
|
Mon, 13 Nov 2006 22:56:15 +0100.
|
||||||
|
|
||||||
|
It was downloaded from http://pgloader.dalibo.org/
|
||||||
|
|
||||||
|
Upstream Author: Dimitri Fontaine <dim@dalibo.com>
|
||||||
|
|
||||||
|
Copyright: 2005, Jan Wieck
|
||||||
|
2006, Dimitri Fontaine
|
||||||
|
|
||||||
|
License:
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted under the terms of the BSD License.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGE.
|
||||||
|
|
||||||
|
On Debian systems, the complete text of the BSD License can be
|
||||||
|
found in `/usr/share/common-licenses/BSD'.
|
||||||
|
|
||||||
|
|
||||||
|
The Debian packaging is (C) 2006, Dimitri Fontaine <dim@dalibo.com> and
|
||||||
|
is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
|
4
debian/dirs
vendored
Normal file
4
debian/dirs
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
usr/bin
|
||||||
|
usr/sbin
|
||||||
|
usr/share/pgloader
|
||||||
|
usr/share/doc/pgloader
|
1
debian/docs
vendored
Normal file
1
debian/docs
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pgloader.1
|
1
debian/files
vendored
Normal file
1
debian/files
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pgloader_2.0.2_all.deb misc extra
|
2
debian/pgloader.links
vendored
Normal file
2
debian/pgloader.links
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/usr/share/pgloader/pgloader.py /usr/bin/pgloader
|
||||||
|
|
1
debian/pgloader.manpages
vendored
Normal file
1
debian/pgloader.manpages
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pgloader.1
|
1
debian/pycompat
vendored
Normal file
1
debian/pycompat
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
2
|
1
debian/pyversions
vendored
Normal file
1
debian/pyversions
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
2.3-
|
98
debian/rules
vendored
Normal file
98
debian/rules
vendored
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/make -f
|
||||||
|
# -*- makefile -*-
|
||||||
|
# Sample debian/rules that uses debhelper.
|
||||||
|
# This file was originally written by Joey Hess and Craig Small.
|
||||||
|
# As a special exception, when this file is copied by dh-make into a
|
||||||
|
# dh-make output file, you may use that output file without restriction.
|
||||||
|
# This special exception was added by Craig Small in version 0.37 of dh-make.
|
||||||
|
|
||||||
|
# Uncomment this to turn on verbose mode.
|
||||||
|
#export DH_VERBOSE=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CFLAGS = -Wall -g
|
||||||
|
|
||||||
|
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
|
||||||
|
CFLAGS += -O0
|
||||||
|
else
|
||||||
|
CFLAGS += -O2
|
||||||
|
endif
|
||||||
|
|
||||||
|
configure: configure-stamp
|
||||||
|
configure-stamp:
|
||||||
|
dh_testdir
|
||||||
|
# Add here commands to configure the package.
|
||||||
|
|
||||||
|
touch configure-stamp
|
||||||
|
|
||||||
|
|
||||||
|
build: build-stamp
|
||||||
|
|
||||||
|
build-stamp: configure-stamp
|
||||||
|
dh_testdir
|
||||||
|
|
||||||
|
# Add here commands to compile the package.
|
||||||
|
#$(MAKE)
|
||||||
|
docbook-to-man pgloader.1.sgml > pgloader.1
|
||||||
|
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
dh_testdir
|
||||||
|
dh_testroot
|
||||||
|
rm -f build-stamp configure-stamp
|
||||||
|
|
||||||
|
# Add here commands to clean up after the build process.
|
||||||
|
-$(MAKE) clean
|
||||||
|
|
||||||
|
dh_clean
|
||||||
|
|
||||||
|
install: build
|
||||||
|
dh_testdir
|
||||||
|
dh_testroot
|
||||||
|
dh_clean -k
|
||||||
|
dh_installdirs
|
||||||
|
|
||||||
|
# Add here commands to install the package into debian/pgloader.
|
||||||
|
$(MAKE) DESTDIR=$(CURDIR)/debian/pgloader install
|
||||||
|
|
||||||
|
|
||||||
|
# Build architecture-independent files here.
|
||||||
|
binary-indep: build install
|
||||||
|
# We have nothing to do by default.
|
||||||
|
|
||||||
|
# Build architecture-dependent files here.
|
||||||
|
binary-arch: build install
|
||||||
|
dh_testdir
|
||||||
|
dh_testroot
|
||||||
|
dh_installchangelogs
|
||||||
|
dh_installdocs
|
||||||
|
dh_installexamples
|
||||||
|
# dh_install
|
||||||
|
# dh_installmenu
|
||||||
|
# dh_installdebconf
|
||||||
|
# dh_installlogrotate
|
||||||
|
# dh_installemacsen
|
||||||
|
# dh_installpam
|
||||||
|
# dh_installmime
|
||||||
|
dh_pysupport
|
||||||
|
# dh_installinit
|
||||||
|
# dh_installcron
|
||||||
|
# dh_installinfo
|
||||||
|
dh_installman
|
||||||
|
dh_link
|
||||||
|
dh_strip
|
||||||
|
dh_compress
|
||||||
|
dh_fixperms
|
||||||
|
# dh_perl
|
||||||
|
# dh_makeshlibs
|
||||||
|
dh_installdeb
|
||||||
|
dh_shlibdeps
|
||||||
|
dh_gencontrol
|
||||||
|
dh_md5sums
|
||||||
|
dh_builddeb
|
||||||
|
|
||||||
|
binary: binary-indep binary-arch
|
||||||
|
.PHONY: build clean binary-indep binary-arch binary install configure
|
43
doc/Makefile
43
doc/Makefile
@ -1,43 +0,0 @@
|
|||||||
# $Id: Makefile,v 1.1 2005-11-21 16:05:50 jpargudo Exp $
|
|
||||||
|
|
||||||
rest = $(wildcard *.rest)
|
|
||||||
html = $(addsuffix .html, $(basename $(rest)))
|
|
||||||
pdf = $(addsuffix .pdf, $(basename $(rest)))
|
|
||||||
|
|
||||||
pdf: $(pdf) clean
|
|
||||||
|
|
||||||
html: $(html)
|
|
||||||
|
|
||||||
dist-clean: clean
|
|
||||||
@rm -f $(pdf) $(html)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f *.aux *.log *~ *.tex *.out *.toc *.dvi
|
|
||||||
|
|
||||||
%.html: %.rest
|
|
||||||
rest2html --stylesheet lib/stylesheet.sty \
|
|
||||||
--no-section-numbering \
|
|
||||||
--language=fr \
|
|
||||||
$< > $@
|
|
||||||
|
|
||||||
%.pdf: %.dvi
|
|
||||||
dvipdf $<
|
|
||||||
|
|
||||||
%.dvi: %.tex
|
|
||||||
latex $< >> /dev/null
|
|
||||||
latex $< >> /dev/null
|
|
||||||
|
|
||||||
%.tex: %.rest
|
|
||||||
rest2latex --use-latex-toc \
|
|
||||||
--stylesheet lib/stylesheet.sty \
|
|
||||||
--use-latex-footnotes \
|
|
||||||
--no-section-numbering \
|
|
||||||
--language=fr \
|
|
||||||
--input-encoding=iso-8859-15 \
|
|
||||||
--table-style=booktabs \
|
|
||||||
--output-encoding=iso-8859-15 \
|
|
||||||
$< > $@
|
|
||||||
help:
|
|
||||||
@echo " Programmes nécessaires: docbook, latex, dvipdf, kpdf"
|
|
||||||
|
|
||||||
.PHONY: pdf
|
|
34
doc/README
34
doc/README
@ -1,34 +0,0 @@
|
|||||||
======
|
|
||||||
README
|
|
||||||
======
|
|
||||||
|
|
||||||
How to compile documentation for pgloader
|
|
||||||
-----------------------------------------
|
|
||||||
|
|
||||||
**doc_pgloader.rest**
|
|
||||||
|
|
||||||
Some documentation for pgloader project, in english.
|
|
||||||
|
|
||||||
ReST format (see
|
|
||||||
http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html for more info
|
|
||||||
on this format and tools).
|
|
||||||
|
|
||||||
Since its plain text, you don't need to compile anything to read it ;-)
|
|
||||||
|
|
||||||
But you can still get a PDF or an HTML file:
|
|
||||||
|
|
||||||
compile PDF version :
|
|
||||||
|
|
||||||
$ make doc_pgloader.pdf
|
|
||||||
|
|
||||||
compile HTML version :
|
|
||||||
|
|
||||||
$ make doc_pgloader.html
|
|
||||||
|
|
||||||
Contact the authors
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
Feel free to send me questions / comments / patches / whatever :
|
|
||||||
|
|
||||||
Jean-Paul Argudo <jean-paul.argudo@dalibo.com>
|
|
||||||
|
|
@ -1,189 +0,0 @@
|
|||||||
========
|
|
||||||
pgloader
|
|
||||||
========
|
|
||||||
|
|
||||||
:Author:
|
|
||||||
Jean-Paul Argudo <jean-paul.argudo@dalibo.com>
|
|
||||||
|
|
||||||
:Version:
|
|
||||||
$Id: doc_pgloader.rest,v 1.2 2006-03-28 21:39:25 jpargudo Exp $
|
|
||||||
:Type:
|
|
||||||
User manual
|
|
||||||
|
|
||||||
:Comment:
|
|
||||||
pgLoader v.1.x documentation (install, usage and example)
|
|
||||||
|
|
||||||
:Licence:
|
|
||||||
BSD
|
|
||||||
|
|
||||||
About
|
|
||||||
=====
|
|
||||||
|
|
||||||
pgloader (http://pgfoundry.org/projects/pgloader/) is a new project allowing
|
|
||||||
you to import data in a PostgreSQL database.
|
|
||||||
|
|
||||||
You have to launch pgloader as many times you have tables. pgloader handles
|
|
||||||
just one table at a time.
|
|
||||||
|
|
||||||
All bad records are put together in a file, with a logfile explaining origins
|
|
||||||
of errors.
|
|
||||||
|
|
||||||
|
|
||||||
Installation
|
|
||||||
============
|
|
||||||
|
|
||||||
Under Debian, the current installation is a bit tricky (as per 200510xx): ::
|
|
||||||
|
|
||||||
wget http://debian.wow-vision.com.sg/debian/pool/main/p/postgresql libpgtcl_7.4.7-6sarge1_i386.deb
|
|
||||||
dpgk -i libpgtcl_7.4.7-6sarge1_i386.deb
|
|
||||||
apt-get install tcllib
|
|
||||||
wget http://pgfoundry.org/frs/download.php/233/pgloader-1.0.tar.gz
|
|
||||||
tar zxvf pgloader-1.0.tar.gz
|
|
||||||
|
|
||||||
Then you can eventually put the binary into /usr/local/bin to facilitate
|
|
||||||
comandlines: ::
|
|
||||||
|
|
||||||
$ cp pgloader-1.0/pgloader /usr/local/bin
|
|
||||||
|
|
||||||
Principle
|
|
||||||
=========
|
|
||||||
|
|
||||||
You must fill two files per table:
|
|
||||||
|
|
||||||
* a parameter file, let's call it <table>.conf
|
|
||||||
* a datafile, let's call it <table>.data
|
|
||||||
|
|
||||||
You need also all necessary parameters to the db connexion you want to use:
|
|
||||||
|
|
||||||
Common ones are the following:
|
|
||||||
|
|
||||||
* host : name of the server where your PostgreSQL db lives (localhost ?)
|
|
||||||
* user : username (you?)
|
|
||||||
* password : username's password (mybigsecret)
|
|
||||||
* dbname : name of the PostgreSQL db
|
|
||||||
|
|
||||||
This parameters are put together in a double-quoted string:
|
|
||||||
|
|
||||||
"host=localhost user=me password=mybigsecret dbname=mydatabase"
|
|
||||||
|
|
||||||
This string as the same type that PQconnectdb awaits for in the libpq. Its
|
|
||||||
complete documentation can be read at:
|
|
||||||
http://www.postgresql.org/docs/current/static/libpq.html#LIBPQ-CONNECT
|
|
||||||
|
|
||||||
You can for sure add much more parameters, depending your db configuration.
|
|
||||||
|
|
||||||
Example
|
|
||||||
=======
|
|
||||||
|
|
||||||
We want to insert records in "foo" table: ::
|
|
||||||
|
|
||||||
test=> \d foo
|
|
||||||
Table «public.foo»
|
|
||||||
Colonne | Type | Modificateurs
|
|
||||||
---------+---------+---------------
|
|
||||||
a | integer | not null
|
|
||||||
b | date |
|
|
||||||
c | text |
|
|
||||||
Index :
|
|
||||||
«foo_pkey» PRIMARY KEY, btree (a)
|
|
||||||
|
|
||||||
The datafile
|
|
||||||
------------
|
|
||||||
|
|
||||||
Our datafile "foo.data" as following records: ::
|
|
||||||
|
|
||||||
1;1987-12-04;"This is a test of data file"
|
|
||||||
2;2005-03-02;"diziz'another test with som'o'lil'quotes"
|
|
||||||
42;;"No need to date this"
|
|
||||||
67;1999-01-02;Oops I didn't escape this string?!
|
|
||||||
|
|
||||||
Please note that:
|
|
||||||
|
|
||||||
* fields are separated with a semicolon
|
|
||||||
* you can handle presence of empty data: the empty field is represented with
|
|
||||||
two semicolons following
|
|
||||||
* we have a record per line
|
|
||||||
* theres is no other line separator excepted \n
|
|
||||||
* dates are in ISO format: YYYY-MM-DD (a fix is coming to handle "set datestyle
|
|
||||||
to" in the conf file)
|
|
||||||
* you can escape strings, optionnaly, double quoting them
|
|
||||||
|
|
||||||
Configuration file
|
|
||||||
------------------
|
|
||||||
|
|
||||||
The corresponding file "foo.conf" for the above datafile is the following: ::
|
|
||||||
|
|
||||||
# ----
|
|
||||||
# Conversion parameter file for pgloader
|
|
||||||
#
|
|
||||||
# Possible file formats:
|
|
||||||
# COPY native PostgreSQL COPY format (default)
|
|
||||||
# CSV Comma separated variables
|
|
||||||
# MSCSV Comma separated variables alternate format
|
|
||||||
#
|
|
||||||
# The COPY command is constructed from the table_name, the
|
|
||||||
# table_columns and the eventual nulls string definition.
|
|
||||||
#
|
|
||||||
# The default column separator character is comma.
|
|
||||||
# ----
|
|
||||||
|
|
||||||
table_name = foo
|
|
||||||
table_columns = a,b,c
|
|
||||||
file_format = CSV
|
|
||||||
group_size = 1000
|
|
||||||
file_sepchar = ;
|
|
||||||
#nulls = NULL
|
|
||||||
quote = "
|
|
||||||
file_is_utf8 = 0
|
|
||||||
|
|
||||||
Note that separation character is set to ";" and that quoting is specifyied
|
|
||||||
with the character double-quote: "
|
|
||||||
|
|
||||||
Inserts will be commited each 1000, per blocks of 1000 rows at a time.
|
|
||||||
|
|
||||||
The datafile nor the database is in utf-8, so the parameter *file_is_utf8* is
|
|
||||||
set to 0. Set it to 1 otherwise: when both database and datafile are in utf-8.
|
|
||||||
|
|
||||||
Since ``pgctl`` internals run in utf-8, the data must be converted *on the
|
|
||||||
fly* to utf-8 when reading the datafile, thats why pgloader needs to know how
|
|
||||||
is the datafile like, utf-8 or not.
|
|
||||||
|
|
||||||
|
|
||||||
pgloader execution
|
|
||||||
------------------
|
|
||||||
|
|
||||||
The execution is quite simple: ::
|
|
||||||
|
|
||||||
$ pgloader foo.conf foo.data "host=localhost user=me password=mybigsecret \
|
|
||||||
dbname=mydatabase"
|
|
||||||
|
|
||||||
4 row(s) loaded
|
|
||||||
0 row(s) rejected
|
|
||||||
|
|
||||||
A simple verification of what has been inserted: ::
|
|
||||||
|
|
||||||
test=> select * from foo ;
|
|
||||||
a | b | c
|
|
||||||
----+------------+------------------------------------------
|
|
||||||
1 | 1987-12-04 | This is a test of data file
|
|
||||||
2 | 2005-03-02 | diziz'another test with som'o'lil'quotes
|
|
||||||
42 | | No need to date this
|
|
||||||
67 | 1999-01-02 | Oops I didn't escape this string?!
|
|
||||||
(4 lines)
|
|
||||||
|
|
||||||
**Note**: You will find this example in the doc/example/ directory.
|
|
||||||
|
|
||||||
when errors occurs
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Check the following:
|
|
||||||
|
|
||||||
* if your configuration file is not okay, pgloader will tell you whats wrong
|
|
||||||
|
|
||||||
* if you have a problem with the data you try to import, you'll find in the
|
|
||||||
.rej file data that have bee rejected. In the .rejlog file given problems
|
|
||||||
will be explicited: a group of error messages per rejected row.
|
|
||||||
|
|
||||||
Then you'll have to correct errors in .rej file and import *that* file like all
|
|
||||||
the others: don't reimport anything else, all the good data is already in the
|
|
||||||
box :)
|
|
@ -1,2 +0,0 @@
|
|||||||
drop table foo;
|
|
||||||
create table foo (a integer primary key, b date, c text);
|
|
@ -1,21 +0,0 @@
|
|||||||
# ----
|
|
||||||
# Conversion parameter file for pgloader
|
|
||||||
#
|
|
||||||
# Possible file formats:
|
|
||||||
# COPY native PostgreSQL COPY format (default)
|
|
||||||
# CSV Comma separated variables
|
|
||||||
# MSCSV Comma separated variables alternate format
|
|
||||||
#
|
|
||||||
# The COPY command is constructed from the table_name, the
|
|
||||||
# table_columns and the eventual nulls string definition.
|
|
||||||
#
|
|
||||||
# The default column separator character is comma.
|
|
||||||
# ----
|
|
||||||
|
|
||||||
table_name = foo
|
|
||||||
table_columns = a,b,c
|
|
||||||
file_format = CSV
|
|
||||||
group_size = 1000
|
|
||||||
file_sepchar = ;
|
|
||||||
#nulls = NULL
|
|
||||||
quote = "
|
|
@ -1,4 +0,0 @@
|
|||||||
1;1987-12-04;"This is a test of data file"
|
|
||||||
2;2005-03-02;"diziz'another test with som'o'lil'quotes"
|
|
||||||
42;;"No need to date this"
|
|
||||||
67;1999-01-02;Oops I didn't escape this string?!
|
|
@ -1,52 +0,0 @@
|
|||||||
\let\oldAuthor\author
|
|
||||||
\renewcommand{\author}[1]{\newcommand{\myAuthor}{#1}\oldAuthor{#1}}
|
|
||||||
\let\oldTitle\title
|
|
||||||
\renewcommand{\title}[1]{\newcommand{\myTitle}{#1}\oldTitle{#1}}
|
|
||||||
|
|
||||||
\usepackage{eurosym}
|
|
||||||
\usepackage[latin9]{inputenc}
|
|
||||||
\let ¤ = \euro
|
|
||||||
|
|
||||||
\usepackage{fancyhdr}
|
|
||||||
\pagestyle{fancy}
|
|
||||||
|
|
||||||
\lhead{}
|
|
||||||
\chead{}
|
|
||||||
\rhead{\myTitle}
|
|
||||||
\lfoot{\textsf{pgFoundry}
|
|
||||||
}
|
|
||||||
\cfoot{\small{pgloader documentation \\
|
|
||||||
http://pgfoundry.org/projects/pgloader/}}
|
|
||||||
\rfoot{\thepage\ / \pageref*{LastPage}}
|
|
||||||
|
|
||||||
\renewcommand{\headrulewidth}{0.4pt}
|
|
||||||
\renewcommand{\footrulewidth}{0.4pt}
|
|
||||||
|
|
||||||
\usepackage{helvet}
|
|
||||||
|
|
||||||
\renewcommand{\familydefault}{phv}
|
|
||||||
|
|
||||||
%Parametrage pour une feuille A4 pleine (merci SBI)
|
|
||||||
\evensidemargin = 30mm
|
|
||||||
\oddsidemargin = 30mm
|
|
||||||
\voffset=-1in
|
|
||||||
\topmargin = 17mm
|
|
||||||
\headheight = 14.5mm
|
|
||||||
\headsep = 15mm
|
|
||||||
\hoffset=-1in
|
|
||||||
\marginparsep = 0pt
|
|
||||||
\marginparwidth = 0pt
|
|
||||||
\footskip = 20mm
|
|
||||||
\textwidth=162mm
|
|
||||||
\textheight=200mm
|
|
||||||
\paperwidth=210mm
|
|
||||||
\paperheight=297mm
|
|
||||||
\parindent=0pt
|
|
||||||
\parskip=5pt
|
|
||||||
%fin parametrage A4 plein
|
|
||||||
|
|
||||||
\usepackage{lastpage}
|
|
||||||
|
|
||||||
\hypersetup{colorlinks=true}
|
|
||||||
|
|
||||||
\usepackage{indentfirst}
|
|
55
examples/README
Normal file
55
examples/README
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
In this directory you'll find out some pgloader usage examples.
|
||||||
|
|
||||||
|
To use them, please first create a pgloader database, then for each example
|
||||||
|
the tables it needs, then issue the pgloader command:
|
||||||
|
|
||||||
|
$ createdb --encoding=utf-8 pgloader
|
||||||
|
$ cd examples
|
||||||
|
$ psql pgloader < simple/simple.sql
|
||||||
|
$ ../pgloader.py -Tvc examples/pgloader.conf simple
|
||||||
|
|
||||||
|
If you want to load data from all examples, create tables for all of them
|
||||||
|
first, then run pgloader without argument.
|
||||||
|
|
||||||
|
The provided examples are:
|
||||||
|
|
||||||
|
. simple
|
||||||
|
|
||||||
|
This dataset shows basic case, with trailing separator and data
|
||||||
|
reordering.
|
||||||
|
|
||||||
|
. errors
|
||||||
|
|
||||||
|
Same test, but with impossible dates. Should report some errors. It does
|
||||||
|
not report errors, check you're not using psycopg 1.1.21.
|
||||||
|
|
||||||
|
. clob
|
||||||
|
|
||||||
|
This dataset shows some text large object importing to PostgreSQL text
|
||||||
|
datatype.
|
||||||
|
|
||||||
|
. cluttured
|
||||||
|
|
||||||
|
A dataset with newline escaped and multi-line input (without quoting)
|
||||||
|
Beware of data reordering, too.
|
||||||
|
|
||||||
|
|
||||||
|
You can launch all those pgloader tests in one run, provided you created the
|
||||||
|
necessary tables:
|
||||||
|
|
||||||
|
$ for test in simple clob cluttured; do psql pgloader < $test/$test.sql; done
|
||||||
|
$ ../pgloader.py -Tc pgloader.conf
|
||||||
|
|
||||||
|
[...]
|
||||||
|
|
||||||
|
Table name | duration | size | updates | errors
|
||||||
|
====================================================================
|
||||||
|
clob | 0.121s | 32 kB | 7 | 0
|
||||||
|
cluttered | 0.041s | 32 kB | 3 | 0
|
||||||
|
simple | 0.040s | 16 kB | 6 | 0
|
||||||
|
====================================================================
|
||||||
|
Total | 0.369s | 80 kB | 16 | 0
|
||||||
|
|
||||||
|
And you then have a nice summary.
|
||||||
|
|
||||||
|
|
7
examples/clob/clob.data
Normal file
7
examples/clob/clob.data
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
1|0,16,clob.out
|
||||||
|
2|16,20,clob.out
|
||||||
|
3|36,23,clob.out
|
||||||
|
4|59,15,clob.out
|
||||||
|
5|6e,13,clob.out
|
||||||
|
6|81,6,clob.out
|
||||||
|
7|87,d,clob.out
|
1
examples/clob/clob.out
Normal file
1
examples/clob/clob.out
Normal file
@ -0,0 +1 @@
|
|||||||
|
This is some clob dataTo be used on several data entryReferences to those are to be foundIn the clob.data fileThey refer to beginlengthand filename.
|
4
examples/clob/clob.sql
Normal file
4
examples/clob/clob.sql
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
CREATE TABLE clob (
|
||||||
|
a integer primary key,
|
||||||
|
b text
|
||||||
|
);
|
16
examples/cluttered/cluttered.data
Normal file
16
examples/cluttered/cluttered.data
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
1^some multi\
|
||||||
|
line text with\
|
||||||
|
newline escaping^and some other data following^
|
||||||
|
2^and another line^clean^
|
||||||
|
3^and\
|
||||||
|
a last multiline\
|
||||||
|
escaped line
|
||||||
|
with a missing\
|
||||||
|
escaping^just to test^
|
||||||
|
4^\ ^empty value^
|
||||||
|
5^^null value^
|
||||||
|
6^multi line\
|
||||||
|
escaped value\
|
||||||
|
\
|
||||||
|
with empty line\
|
||||||
|
embeded^last line^
|
5
examples/cluttered/cluttered.sql
Normal file
5
examples/cluttered/cluttered.sql
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
CREATE TABLE cluttered (
|
||||||
|
a integer primary key,
|
||||||
|
b text,
|
||||||
|
c text
|
||||||
|
);
|
7
examples/errors/errors.data
Normal file
7
examples/errors/errors.data
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
1|some first row text|2006-13-11|
|
||||||
|
2|some second row text|2006-11-11|
|
||||||
|
3|some third row text|2006-10-12|
|
||||||
|
4|\ |2006-16-4|
|
||||||
|
5|some fifth row text|2006-5-12|
|
||||||
|
6|some sixth row text|2006-13-10|
|
||||||
|
7|some null date to play with||
|
5
examples/errors/errors.sql
Normal file
5
examples/errors/errors.sql
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
CREATE TABLE errors (
|
||||||
|
a integer primary key,
|
||||||
|
b date,
|
||||||
|
c text
|
||||||
|
);
|
51
examples/pgloader.conf
Normal file
51
examples/pgloader.conf
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
[pgsql]
|
||||||
|
host = localhost
|
||||||
|
port = 5432
|
||||||
|
base = pgloader
|
||||||
|
user = dim
|
||||||
|
pass = None
|
||||||
|
|
||||||
|
client_encoding = 'utf-8'
|
||||||
|
copy_every = 5
|
||||||
|
commit_every = 5
|
||||||
|
#copy_delimiter = %
|
||||||
|
|
||||||
|
null = ""
|
||||||
|
empty_string = "\ "
|
||||||
|
|
||||||
|
newline_escapes = \
|
||||||
|
|
||||||
|
[simple]
|
||||||
|
table = simple
|
||||||
|
filename = simple/simple.data
|
||||||
|
field_sep = |
|
||||||
|
trailing_sep = True
|
||||||
|
columns = a:1, b:3, c:2
|
||||||
|
|
||||||
|
# those reject settings are defaults one
|
||||||
|
reject_log = /tmp/simple.rej.log
|
||||||
|
reject_data = /tmp/simple.rej
|
||||||
|
|
||||||
|
[errors]
|
||||||
|
table = errors
|
||||||
|
filename = errors/errors.data
|
||||||
|
field_sep = |
|
||||||
|
trailing_sep = True
|
||||||
|
columns = a:1, b:3, c:2
|
||||||
|
|
||||||
|
[clob]
|
||||||
|
table = clob
|
||||||
|
filename = clob/clob.data
|
||||||
|
field_sep = |
|
||||||
|
columns = a:1, b:2
|
||||||
|
index = a:1
|
||||||
|
blob_columns = b:2:ifx_clob
|
||||||
|
|
||||||
|
[cluttered]
|
||||||
|
table = cluttered
|
||||||
|
filename = cluttered/cluttered.data
|
||||||
|
field_sep = ^
|
||||||
|
trailing_sep = True
|
||||||
|
newline_escapes = c:\
|
||||||
|
field_count = 3
|
||||||
|
columns = a:1, b:3, c:2
|
7
examples/simple/simple.data
Normal file
7
examples/simple/simple.data
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
1|some first row text|2006-11-11|
|
||||||
|
2|some second row text|2006-11-11|
|
||||||
|
3|some third row text|2006-10-12|
|
||||||
|
4|\ |2006-10-4|
|
||||||
|
5|some fifth row text|2006-5-12|
|
||||||
|
6|some sixth row text|2006-7-10|
|
||||||
|
7|some null date to play with||
|
5
examples/simple/simple.sql
Normal file
5
examples/simple/simple.sql
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
CREATE TABLE simple (
|
||||||
|
a integer primary key,
|
||||||
|
b date,
|
||||||
|
c text
|
||||||
|
);
|
742
pgloader.1.sgml
Normal file
742
pgloader.1.sgml
Normal file
@ -0,0 +1,742 @@
|
|||||||
|
<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
|
||||||
|
<refentry>
|
||||||
|
<refentryinfo>
|
||||||
|
<address>
|
||||||
|
<email>dim@dalibo.com</email>
|
||||||
|
</address>
|
||||||
|
<author>
|
||||||
|
<firstname>Dimitri</firstname>
|
||||||
|
<surname>Fontaine</surname>
|
||||||
|
</author>
|
||||||
|
<date>August 2006</date>
|
||||||
|
<copyright>
|
||||||
|
<year>2006</year>
|
||||||
|
<holder>Dimitri Fontaine</holder>
|
||||||
|
</copyright>
|
||||||
|
</refentryinfo>
|
||||||
|
|
||||||
|
<refmeta>
|
||||||
|
<refentrytitle>pgloader</refentrytitle>
|
||||||
|
<manvolnum>1</manvolnum>
|
||||||
|
</refmeta>
|
||||||
|
|
||||||
|
<refnamediv>
|
||||||
|
<refname>pgloader</refname>
|
||||||
|
<refpurpose>
|
||||||
|
Import CSV data and Large Object to PostgreSQL
|
||||||
|
</refpurpose>
|
||||||
|
</refnamediv>
|
||||||
|
|
||||||
|
<refsynopsisdiv>
|
||||||
|
<cmdsynopsis>
|
||||||
|
<command>pgloader</command>
|
||||||
|
<arg><option>-c</option> configuration file</arg>
|
||||||
|
<arg><option>-p</option> pedantic</arg>
|
||||||
|
<arg><option>-d</option> debug</arg>
|
||||||
|
<arg><option>-v</option> verbose</arg>
|
||||||
|
<arg><option>-n</option> dry run</arg>
|
||||||
|
<arg><option>-Cn</option> count</arg>
|
||||||
|
<arg><option>-Fn</option> from</arg>
|
||||||
|
<arg><option>-In</option> from id</arg>
|
||||||
|
<arg><option>-E</option> input files encoding</arg>
|
||||||
|
<arg>Section1 Section2</arg>
|
||||||
|
</cmdsynopsis>
|
||||||
|
</refsynopsisdiv>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>description</title>
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> imports data from a flat file and
|
||||||
|
insert it into a database table. It uses a flat file per
|
||||||
|
database table, and you can configure as many Sections as you
|
||||||
|
want, each one associating a table name and a data file.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Data are parsed and rewritten, then given to PostgreSQL
|
||||||
|
<command>COPY</command> command. Parsing is necessary for
|
||||||
|
dealing with end of lines and eventual trailing separator
|
||||||
|
characters, and for column reordering: your flat data file may
|
||||||
|
not have the same column order as the databse table has.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> is also able to load some large
|
||||||
|
objects data into PostgreSQL, as of now only Informix
|
||||||
|
<command>UNLOAD</command> data files are supported. This command
|
||||||
|
gives large objects data location information into the main data
|
||||||
|
file. <command>pgloader</command> parse it and produces and SQL
|
||||||
|
UPDATE order per large object, and commit those orders once
|
||||||
|
every <command>commit_every</command> configuration parameter.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> issue some timing statistics
|
||||||
|
every <command>commit_every</command> commits (see Configuration
|
||||||
|
for this setting). At the end of each section processing, a
|
||||||
|
summary of overall operations, numbers of updates and commits,
|
||||||
|
time it took in seconds, errors logged and database errors is
|
||||||
|
issued.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>options</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
In order for <command>pgloader</command> to run, you have to
|
||||||
|
edit a configuration file (see Configuration) consisting of
|
||||||
|
Section definitions. Each section refers to a PostgreSQL table
|
||||||
|
into which some data is to be loaded.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-c</option></term>
|
||||||
|
<term><option>--config</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
specifies the configuration file to use. The default file
|
||||||
|
name is <filename>pgloader.conf</filename>, searched into
|
||||||
|
current working directory.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-p</option></term>
|
||||||
|
<term><option>--pedantic</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
activates the <command>pedantic</command> mode, where any
|
||||||
|
warning is considered as a fatal error, thus stopping the
|
||||||
|
processing of the input file.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-d</option></term>
|
||||||
|
<term><option>--debug</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
makes <command>pgloader</command> say it all about what it
|
||||||
|
does. debug implies verbose.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-v</option></term>
|
||||||
|
<term><option>--verbose</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
makes <command>pgloader</command> very verbose about
|
||||||
|
what it does.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-n</option></term>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
makes <command>pgloader</command> simulate operations,
|
||||||
|
that implies no database connection and no data extraction
|
||||||
|
from blob files.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-T</option></term>
|
||||||
|
<term><option>--truncate</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
makes <command>pgloader</command> issue a truncate SQL
|
||||||
|
command before importing data.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-V</option></term>
|
||||||
|
<term><option>--vacuum</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
makes <command>pgloader</command> issue a vacuum full
|
||||||
|
verbose analyse SQL command before importing data.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This vacuum is run from shell command
|
||||||
|
<command>/usr/bin/vacuumdb</command> with connection
|
||||||
|
informations taken from configuration file (see
|
||||||
|
Configuration section of this manual page), but without
|
||||||
|
password prompting. If you use this option, please
|
||||||
|
configure your <filename>pg_hba.conf</filename> in a way
|
||||||
|
no password is prompted (trust).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-C</option></term>
|
||||||
|
<term><option>--count</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Number of input lines to process, default is to process
|
||||||
|
all the input lines.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option></term>
|
||||||
|
<term><option>--from</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Input line number from which we begin to process (and
|
||||||
|
count). <command>pgloader</command> will skip all
|
||||||
|
preceding lines.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
You can't use both <option>-F</option> and
|
||||||
|
<option>-I</option> at the same time.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-I</option></term>
|
||||||
|
<term><option>--from-id</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
From which <command>id</command> do we begin to process
|
||||||
|
(and count) input lines.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
When a composite key is used, you have to give each column
|
||||||
|
of the key separated by comma, on the form col_name=value.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Please notice using the <command>--from-id</command>
|
||||||
|
option implies <command>pgloader</command> will try to get
|
||||||
|
row id of each row, it being on the interval processed or
|
||||||
|
not. This could have some performance impact, and you may
|
||||||
|
end up prefering to use <command>--from</command> instead.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Example: <command>pgloader -I col1:val1,col2:val2</command>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
You can't use both <option>-F</option> and
|
||||||
|
<option>-I</option> at the same time.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-E</option></term>
|
||||||
|
<term><option>--encoding</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Input data files encoding. Defaults to 'latin9'.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>Section</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
is the name of a configured Section describing some data
|
||||||
|
to load
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Section arguments are optionnal, if no section is given
|
||||||
|
all configured sections are processed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>configuration</title>
|
||||||
|
<para>
|
||||||
|
The configuration file has a .ini file syntax, its first section
|
||||||
|
has to be the <command>pgsql</command> one, defining how to
|
||||||
|
access to the PostgreSQL database server where to load
|
||||||
|
data. Then you may define any number of sections, each one
|
||||||
|
describing a data loading task to be performed by
|
||||||
|
<command>pgloader</command>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The <command>[pgsql]</command> section has the following
|
||||||
|
options, which all must be set.
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>host</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
PostgreSQL database server name, for example
|
||||||
|
<filename>localhost</filename>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>port</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
PostgreSQL database server listening port, 5432. You have
|
||||||
|
to fill this entry.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>base</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The name of the database you want to load data into.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>user</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Connecting PostgreSQL user name.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>pass</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The password of the user. The better is to grant a
|
||||||
|
<command>trust</command> access privilege in PostgreSQL
|
||||||
|
<filename>pg_hba.conf</filename>. Then you can set this
|
||||||
|
entry to whatever value you want to.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>client_encoding</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Set this parameter to have <command>pgloader</command>
|
||||||
|
connects to PostgreSQL using this encoding.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to 'latin9'.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>copy_every</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
When issuing <command>COPY</command> PostgreSQL commands,
|
||||||
|
<command>pgloader</command> will not make a single big
|
||||||
|
COPY attempt, but copy <command>copy_every</command> lines
|
||||||
|
at a time.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to 10000.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>commit_every</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
PostgreSQL <command>COMMIT</command> frequency, exprimed
|
||||||
|
in <command>UPDATE</command> orders. A good value is 1000,
|
||||||
|
that means commiting the SQL transaction every 1000 input
|
||||||
|
lines.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> issues commit every
|
||||||
|
commit_every updates, on connection closing and when a SQL
|
||||||
|
error occurs.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to 1000.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>copy_delimiter</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The field separator to use in COPY FROM produced statements. If
|
||||||
|
you don't specify this, the same separator as the one given in
|
||||||
|
<command>field_sep</command> parameter will be used.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Please note <command>PostgreSQL</command> requires a single char
|
||||||
|
properly encoded (see your <command>client_encoding</command>
|
||||||
|
parameter), or it abort in error and even may crash.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to
|
||||||
|
<command>field_sep</command>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>newline_escapes</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
For parameter effect description, see below (same name, table
|
||||||
|
local setting).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
You can setup here a global escape caracter, to be considered on
|
||||||
|
each and every column of each and every table defined
|
||||||
|
thereafter.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>null</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
You can configure here how <command>null</command> value is
|
||||||
|
represented into your flat data file.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to
|
||||||
|
<command>''</command> (that is empty string).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>empty_string</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
You can configure here how empty values are represented into
|
||||||
|
your flat data file.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to <command>'\
|
||||||
|
'</command> (that is backslash followed by space).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
You then can define any number of data section, and give them an
|
||||||
|
arbitrary name. Some options are required, some are actually
|
||||||
|
optionnals, in which case it is said so thereafter.
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>table</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The table name of the database where to load data.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>filename</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The absolute path to the input data file. The large object
|
||||||
|
files are to be found into the same directory. Their name
|
||||||
|
can be in the form [bc]lob[0-9a-f]{4}.[0-9a-f]{3}, but
|
||||||
|
this information is not used by
|
||||||
|
<command>pgloader</command>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>reject_log</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In case of errors processing input data, a human readable
|
||||||
|
log per rejected input data line is produced into the
|
||||||
|
reject_log file.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>reject_data</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In case of errors processing input data, the rejected
|
||||||
|
input line is appended to the reject_data file.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>field_sep</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The field separator used into the data file. The same
|
||||||
|
separator will be used by the generated
|
||||||
|
<command>COPY</command> commands, thus
|
||||||
|
<command>pgloader</command> does not have to deal with
|
||||||
|
escaping the delimiter it uses (input data has to have
|
||||||
|
escaped it).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to pipe char '|'.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>field_count</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The <command>UNLOAD</command> command does not escape
|
||||||
|
newlines when they appear into table data. Hence, you may
|
||||||
|
obtain multi-line data files, where a single database row
|
||||||
|
(say tuple if you prefer to) can span multiple physical
|
||||||
|
lines into the unloaded file.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If this is your case, you may want to configure here the
|
||||||
|
number of columns per tuple. Then
|
||||||
|
<command>pgloader</command> will count columns and
|
||||||
|
buffer line input in order to re-assemble several physical
|
||||||
|
lines into one data row when needed.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>trailing_sep</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If this option is set to <command>True</command>, the
|
||||||
|
input data file is known to append a
|
||||||
|
<command>field_sep</command> as the last character of each
|
||||||
|
of its lines. With this option set, this last character is
|
||||||
|
then not considered as a field separator.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to False.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>client_encoding</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Set this parameter to have <command>pgloader</command>
|
||||||
|
connects to PostgreSQL using this encoding.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to 'latin9'. If defined
|
||||||
|
on a table level, this local value will overwritte the global
|
||||||
|
one.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>null</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
You can configure here how <command>null</command> value is
|
||||||
|
represented into your flat data file.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to
|
||||||
|
<command>''</command> (that is empty string). If defined on a
|
||||||
|
table level, this local value will overwritte the global one.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>empty_string</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
You can configure here how empty values are represented into
|
||||||
|
your flat data file.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal and defaults to <command>'\
|
||||||
|
'</command> (that is backslash followed by space). If defined on
|
||||||
|
a table level, this local value will overwritte the global one.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>newline_escapes</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Sometimes the input data file has field values containing
|
||||||
|
newlines, and the export program used (as Informix
|
||||||
|
<command>UNLOAD</command> command) escape in-field
|
||||||
|
newlines. So you want <command>pgloader</command> to keep
|
||||||
|
those newlines, while at the same time preserving them.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This option does the described work on specified fields
|
||||||
|
and considering the escaping character you configure,
|
||||||
|
following this syntax:
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
newline_escapes = colname:\, other_colname:§
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter is optionnal, and the extra work is only
|
||||||
|
done when set. You can configure
|
||||||
|
<command>newline_escapes</command> for as many fields as
|
||||||
|
necessary, and you may configure a different escaping
|
||||||
|
character each time.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Please note that at the moment,
|
||||||
|
<command>pgloader</command> does only support one
|
||||||
|
character length <command>newline_escapes</command>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
When both a global (see <command>[pgsql]</command> section)
|
||||||
|
<command>newline_escapes</command> parameter and a table local
|
||||||
|
one are set, <command>pgloader</command> issues a warning and
|
||||||
|
only consider the global setting.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>index</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Table index definition, to be used in blob UPDATE'ing. You
|
||||||
|
define an index column by giving its name and its column
|
||||||
|
number (as found into your data file, and counting from 1)
|
||||||
|
separated by a colon. If your table has a composite key,
|
||||||
|
then you can define multiple columns here, separated by a
|
||||||
|
comma.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
index = colname:3, other_colname:5
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>columns</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
You can define here table columns, with the same
|
||||||
|
definition format as in previous <command>index</command>
|
||||||
|
parameter.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In case you have a lot a columns per table, you will want
|
||||||
|
to use ultiple lines for this parameter value. Python
|
||||||
|
<command>ConfigParser</command> module knows how to read
|
||||||
|
multi-line parameters, you don't have to escape anything.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>blob_columns</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The definition of the colums where to find some blob or
|
||||||
|
clob reference. This definition is composed by a table
|
||||||
|
column name, a column number (couting from one) reference
|
||||||
|
into the Informix <command>UNLOAD</command> data file, and
|
||||||
|
a large object type, separated by a colon. You can have
|
||||||
|
several columns in this field, separated by a
|
||||||
|
comma.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Supported large objects type are Informix blob and clob,
|
||||||
|
the awaited configuration string are respectively
|
||||||
|
<command>ifx_blob</command> for binary (bytea) content
|
||||||
|
type and <command>ifx_clob</command> for text type values.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Here's an example:
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
blob_type = clob_column:3:ifx_blob, other_clob_column:5:ifx_clob
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>confifuration example</title>
|
||||||
|
<para>
|
||||||
|
Please see the given configuration example which should be distributed
|
||||||
|
in
|
||||||
|
<filename>/usr/share/doc/pgloader/examples/pgloader.conf</filename>.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>History</title>
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> was at first an Informix to
|
||||||
|
PostgreSQL migration helper which imported Informix large
|
||||||
|
objects directly into a PostgreSQL database.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Then as we got some data we couldn't file tools to care about,
|
||||||
|
we decided <command>ifx_blob</command> would become
|
||||||
|
<command>pgloader</command>, as it had to be able to import all
|
||||||
|
Informix UNLOAD data. Those data contains escaped separator into
|
||||||
|
unquoted data field and multi-lines fields (\r and \n are not
|
||||||
|
escaped).
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Bugs</title>
|
||||||
|
<para>
|
||||||
|
Please report bugs to Dimitri Fontaine <dim@dalibo.com>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
When last line is alone on a <command>COPY</command> command and its
|
||||||
|
parsing ends in error (not enough columns read for example), no
|
||||||
|
information is given back by <command>pgloader</command>.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Authors</title>
|
||||||
|
<para>
|
||||||
|
<command>pgloader</command> is written by <author>Dimitri
|
||||||
|
Fontaine</author> <email>dim@dalibo.com</email>.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
</refentry>
|
@ -1,36 +0,0 @@
|
|||||||
# $Id: pgloader.conf,v 1.4 2006-03-28 21:39:25 jpargudo Exp $
|
|
||||||
# ----
|
|
||||||
# Conversion parameter file for pgloader
|
|
||||||
#
|
|
||||||
# Possible file formats:
|
|
||||||
# COPY native PostgreSQL COPY format (default)
|
|
||||||
# CSV Comma separated variables
|
|
||||||
# MSCSV Comma separated variables alternate format
|
|
||||||
#
|
|
||||||
# The COPY command is constructed from the table_name, the
|
|
||||||
# table_columns and the eventual nulls string definition.
|
|
||||||
#
|
|
||||||
# The default column separator character is comma.
|
|
||||||
# ----
|
|
||||||
|
|
||||||
table_name = my_table
|
|
||||||
table_columns = column1, column2, ...
|
|
||||||
|
|
||||||
file_format = COPY
|
|
||||||
# COPY or CSV or MSCSV
|
|
||||||
|
|
||||||
group_size = 1000
|
|
||||||
|
|
||||||
# file_sepchar = ;
|
|
||||||
# , (default) or ; or other
|
|
||||||
|
|
||||||
# nulls = ''
|
|
||||||
# NULL or '' or other
|
|
||||||
|
|
||||||
# quote = "
|
|
||||||
# how your strings are quoted in the CSV file
|
|
||||||
|
|
||||||
file_is_utf8 = 0
|
|
||||||
# how the datafile and database are encoded: utf8/unicode or not?
|
|
||||||
# 0=NO # if utf8, both data file and
|
|
||||||
# 1=YES # database must be in utf8
|
|
362
pgloader.py
Normal file
362
pgloader.py
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
#! /usr/bin/env python
|
||||||
|
# -*- coding: ISO-8859-15 -*-
|
||||||
|
# Author: Dimitri Fontaine <dimitri@dalibo.com>
|
||||||
|
|
||||||
|
"""
|
||||||
|
PostgreSQL data import tool, aimed to replace and extands pgloader.
|
||||||
|
|
||||||
|
Important features :
|
||||||
|
- CSV file format import using COPY
|
||||||
|
- multi-line input file
|
||||||
|
- configurable amount of rows per COPY instruction
|
||||||
|
- large object to TEXT or BYTEA field handling
|
||||||
|
(only informix blobs and clobs supported as of now)
|
||||||
|
- trailing slash optionnal removal (support informix UNLOAD file format)
|
||||||
|
- begin processing at any line in the file, by number or row id
|
||||||
|
- dry-run option, to validate input reading without connecting to database
|
||||||
|
- pedantic option, to stop processing on warning
|
||||||
|
- reject log and reject data files: you can reprocess refused data later
|
||||||
|
- COPY errors recovery via redoing COPY with half files until file is
|
||||||
|
one line long, then reject log this line
|
||||||
|
|
||||||
|
Please read the fine manual page pg_import(1) for command line usage
|
||||||
|
(options) and configuration file format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, sys, os.path, time, codecs
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
import pgloader.options
|
||||||
|
import pgloader.tools
|
||||||
|
|
||||||
|
def parse_options():
|
||||||
|
""" Parse given options """
|
||||||
|
import ConfigParser
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
usage = "%prog [-c <config_filename>] Section [Section ...]"
|
||||||
|
parser = OptionParser(usage = usage)
|
||||||
|
|
||||||
|
parser.add_option("-c", "--config", dest = "config",
|
||||||
|
default = "pgloader.conf",
|
||||||
|
help = "configuration file, defauts to pgloader.conf")
|
||||||
|
|
||||||
|
parser.add_option("-p", "--pedantic", action = "store_true",
|
||||||
|
dest = "pedantic",
|
||||||
|
default = False,
|
||||||
|
help = "pedantic mode, stop processing on warning")
|
||||||
|
|
||||||
|
parser.add_option("-d", "--debug", action = "store_true",
|
||||||
|
dest = "debug",
|
||||||
|
default = False,
|
||||||
|
help = "add some debug information (a lot of)")
|
||||||
|
|
||||||
|
parser.add_option("-v", "--verbose", action = "store_true",
|
||||||
|
dest = "verbose",
|
||||||
|
default = False,
|
||||||
|
help = "be verbose and about processing progress")
|
||||||
|
|
||||||
|
parser.add_option("-n", "--dry-run", action = "store_true",
|
||||||
|
dest = "dryrun",
|
||||||
|
default = False,
|
||||||
|
help = "simulate operations, don't connect to the db")
|
||||||
|
|
||||||
|
parser.add_option("-T", "--truncate", action = "store_true",
|
||||||
|
dest = "truncate",
|
||||||
|
default = False,
|
||||||
|
help = "truncate tables before importing data")
|
||||||
|
|
||||||
|
parser.add_option("-V", "--vacuum", action = "store_true",
|
||||||
|
dest = "vacuum",
|
||||||
|
default = False,
|
||||||
|
help = "vacuum database after having imported data")
|
||||||
|
|
||||||
|
parser.add_option("-C", "--count", dest = "count",
|
||||||
|
default = None, type = "int",
|
||||||
|
help = "number of input lines to process")
|
||||||
|
|
||||||
|
parser.add_option("-F", "--from", dest = "fromcount",
|
||||||
|
default = 0, type = "int",
|
||||||
|
help = "number of input lines to skip")
|
||||||
|
|
||||||
|
parser.add_option("-I", "--from-id", dest = "fromid",
|
||||||
|
default = None,
|
||||||
|
help = "wait for given id on input to begin")
|
||||||
|
|
||||||
|
parser.add_option("-E", "--encoding", dest = "encoding",
|
||||||
|
default = None,
|
||||||
|
help = "input files encoding")
|
||||||
|
|
||||||
|
(opts, args) = parser.parse_args()
|
||||||
|
|
||||||
|
# check existence en read ability of config file
|
||||||
|
if not os.path.exists(opts.config):
|
||||||
|
print "Error: Configuration file %s does not exists" % opts.config
|
||||||
|
print parser.format_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not os.access(opts.config, os.R_OK):
|
||||||
|
print "Error: Can't read configuration file %s" % opts.config
|
||||||
|
print parser.format_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if opts.verbose:
|
||||||
|
print 'Using %s configuration file' % opts.config
|
||||||
|
|
||||||
|
if opts.fromcount != 0 and opts.fromid is not None:
|
||||||
|
print "Error: Can't set both options fromcount (-F) AND fromid (-I)"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
pgloader.options.DRY_RUN = opts.dryrun
|
||||||
|
pgloader.options.DEBUG = opts.debug
|
||||||
|
# if debug, then verbose
|
||||||
|
pgloader.options.VERBOSE = opts.verbose or opts.debug
|
||||||
|
pgloader.options.PEDANTIC = opts.pedantic
|
||||||
|
|
||||||
|
pgloader.options.TRUNCATE = opts.truncate
|
||||||
|
pgloader.options.VACUUM = opts.vacuum
|
||||||
|
|
||||||
|
pgloader.options.COUNT = opts.count
|
||||||
|
pgloader.options.FROM_COUNT = opts.fromcount
|
||||||
|
pgloader.options.FROM_ID = opts.fromid
|
||||||
|
|
||||||
|
pgloader.options.INPUT_ENCODING = opts.encoding
|
||||||
|
|
||||||
|
return opts.config, args
|
||||||
|
|
||||||
|
def parse_config(conffile):
|
||||||
|
""" Parse the configuration file """
|
||||||
|
section = 'pgsql'
|
||||||
|
|
||||||
|
# Now read pgsql configuration section
|
||||||
|
import ConfigParser
|
||||||
|
config = ConfigParser.ConfigParser()
|
||||||
|
|
||||||
|
try:
|
||||||
|
config.read(conffile)
|
||||||
|
except:
|
||||||
|
print "Error: Given file is not a configuration file"
|
||||||
|
sys.exit(4)
|
||||||
|
|
||||||
|
if not config.has_section(section):
|
||||||
|
print "Error: Please provide a [%s] section" % section
|
||||||
|
sys.exit(5)
|
||||||
|
|
||||||
|
# load some options
|
||||||
|
# this has to be done after command line parsing
|
||||||
|
from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
|
||||||
|
from pgloader.options import NULL, EMPTY_STRING
|
||||||
|
|
||||||
|
if DRY_RUN:
|
||||||
|
if VERBOSE:
|
||||||
|
print "Notice: dry run mode, not connecting to database"
|
||||||
|
return config, None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pgloader.db import db
|
||||||
|
|
||||||
|
dbconn = db(config.get(section, 'host'),
|
||||||
|
config.getint(section, 'port'),
|
||||||
|
config.get(section, 'base'),
|
||||||
|
config.get(section, 'user'),
|
||||||
|
config.get(section, 'pass'),
|
||||||
|
connect = False)
|
||||||
|
|
||||||
|
if config.has_option(section, 'client_encoding'):
|
||||||
|
dbconn.client_encoding = config.get(section, 'client_encoding')
|
||||||
|
|
||||||
|
if config.has_option(section, 'copy_every'):
|
||||||
|
dbconn.copy_every = config.getint(section, 'copy_every')
|
||||||
|
|
||||||
|
if config.has_option(section, 'commit_every'):
|
||||||
|
dbconn.commit_every = config.getint(section, 'commit_every')
|
||||||
|
|
||||||
|
if config.has_option(section, 'copy_delimiter'):
|
||||||
|
dbconn.copy_sep = config.get(section, 'copy_delimiter')
|
||||||
|
|
||||||
|
# Then there are null and empty_string optionnal parameters
|
||||||
|
# They canbe overriden in specific table configuration
|
||||||
|
if config.has_option(section, 'null'):
|
||||||
|
pgloader.options.NULL = pgloader.tools.parse_config_string(
|
||||||
|
config.get(section, 'null'))
|
||||||
|
|
||||||
|
if config.has_option(section, 'empty_string'):
|
||||||
|
pgloader.options.EMPTY_STRING = pgloader.tools.parse_config_string(
|
||||||
|
config.get(section, 'empty_string'))
|
||||||
|
|
||||||
|
# optionnal global newline_escapes
|
||||||
|
if config.has_option(section, 'newline_escapes'):
|
||||||
|
setting = pgloader.tools.parse_config_string(
|
||||||
|
config.get(section, 'newline_escapes'))
|
||||||
|
pgloader.options.NEWLINE_ESCAPES = setting
|
||||||
|
|
||||||
|
except Exception, error:
|
||||||
|
print "Error: Could not initialize PostgreSQL connection:"
|
||||||
|
print error
|
||||||
|
sys.exit(6)
|
||||||
|
|
||||||
|
return config, dbconn
|
||||||
|
|
||||||
|
def myprint(l, line_prefix = " ", cols = 78):
|
||||||
|
""" pretty print list l elements """
|
||||||
|
# some code for pretty print
|
||||||
|
tmp = line_prefix
|
||||||
|
for e in l:
|
||||||
|
if len(tmp) + len(e) > cols:
|
||||||
|
print tmp
|
||||||
|
tmp = line_prefix
|
||||||
|
|
||||||
|
if tmp != line_prefix: tmp += " "
|
||||||
|
tmp += e
|
||||||
|
print tmp
|
||||||
|
|
||||||
|
def duration_pprint(duration):
|
||||||
|
""" pretty print duration (human readable information) """
|
||||||
|
if duration > 3600:
|
||||||
|
h = int(duration / 3600)
|
||||||
|
m = int((duration - 3600 * h) / 60)
|
||||||
|
s = duration - 3600 * h - 60 * m + 0.5
|
||||||
|
return '%2dh%02dm%03.1f' % (h, m, s)
|
||||||
|
|
||||||
|
elif duration > 60:
|
||||||
|
m = int(duration / 60)
|
||||||
|
s = duration - 60 * m
|
||||||
|
return ' %02dm%06.3f' % (m, s)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return '%10.3f' % duration
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
""" read option line and configuration file, then process data
|
||||||
|
import of given section, or all sections if no section is given on
|
||||||
|
command line """
|
||||||
|
|
||||||
|
# first parse command line options, and set pgloader.options values
|
||||||
|
# accordingly
|
||||||
|
conffile, args = parse_options()
|
||||||
|
|
||||||
|
# now init db connection
|
||||||
|
config, dbconn = parse_config(conffile)
|
||||||
|
|
||||||
|
# load some pgloader package modules
|
||||||
|
from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC, VACUUM
|
||||||
|
from pgloader.pgloader import PGLoader
|
||||||
|
from pgloader.tools import PGLoader_Error
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
summary = {}
|
||||||
|
|
||||||
|
# args are meant to be configuration sections
|
||||||
|
if len(args) > 0:
|
||||||
|
for s in args:
|
||||||
|
if config.has_section(s):
|
||||||
|
sections.append(s)
|
||||||
|
|
||||||
|
else:
|
||||||
|
for s in config.sections():
|
||||||
|
if s != 'pgsql':
|
||||||
|
sections.append(s)
|
||||||
|
|
||||||
|
if VERBOSE:
|
||||||
|
print 'Will consider following sections:'
|
||||||
|
myprint(sections)
|
||||||
|
|
||||||
|
# we count time passed from now on
|
||||||
|
begin = time.time()
|
||||||
|
|
||||||
|
# we run through sorted section list
|
||||||
|
sections.sort()
|
||||||
|
for s in sections:
|
||||||
|
try:
|
||||||
|
pgloader = PGLoader(s, config, dbconn)
|
||||||
|
pgloader.run()
|
||||||
|
|
||||||
|
summary[s] = (pgloader.name,) + pgloader.summary()
|
||||||
|
except PGLoader_Error, e:
|
||||||
|
if e == '':
|
||||||
|
print '[%s] Please correct previous errors' % s
|
||||||
|
else:
|
||||||
|
print
|
||||||
|
print 'Error: %s' % e
|
||||||
|
|
||||||
|
if PEDANTIC:
|
||||||
|
pgloader.print_stats()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print "Aborting on user demand (Interrupt)"
|
||||||
|
|
||||||
|
# total duration
|
||||||
|
td = time.time() - begin
|
||||||
|
|
||||||
|
retcode = 0
|
||||||
|
|
||||||
|
# print a pretty summary
|
||||||
|
t= 'Table name | duration | size | updates | errors '
|
||||||
|
_= '===================================================================='
|
||||||
|
|
||||||
|
tu = te = ts = 0 # total updates, errors, size
|
||||||
|
if not DRY_RUN:
|
||||||
|
dbconn.reset()
|
||||||
|
cursor = dbconn.dbconn.cursor()
|
||||||
|
|
||||||
|
s_ok = 0
|
||||||
|
for s in sections:
|
||||||
|
if s not in summary:
|
||||||
|
continue
|
||||||
|
|
||||||
|
s_ok += 1
|
||||||
|
if s_ok == 1:
|
||||||
|
# print pretty sumary header now
|
||||||
|
print
|
||||||
|
print t
|
||||||
|
print _
|
||||||
|
|
||||||
|
t, d, u, e = summary[s]
|
||||||
|
d = duration_pprint(d)
|
||||||
|
|
||||||
|
if not DRY_RUN:
|
||||||
|
sql = "select pg_total_relation_size(%s), " + \
|
||||||
|
"pg_size_pretty(pg_total_relation_size(%s));"
|
||||||
|
cursor.execute(sql, [t, t])
|
||||||
|
octets, s = cursor.fetchone()
|
||||||
|
ts += octets
|
||||||
|
|
||||||
|
if s[5:] == 'bytes': s = s[:-5] + ' B'
|
||||||
|
else:
|
||||||
|
s = '-'
|
||||||
|
|
||||||
|
print '%-18s| %ss | %7s | %10d | %10d' % (t, d, s, u, e)
|
||||||
|
|
||||||
|
tu += u
|
||||||
|
te += e
|
||||||
|
|
||||||
|
if e > 0:
|
||||||
|
retcode += 1
|
||||||
|
|
||||||
|
if s_ok > 1:
|
||||||
|
td = duration_pprint(td)
|
||||||
|
|
||||||
|
# pretty size
|
||||||
|
cursor.execute("select pg_size_pretty(%s);", [ts])
|
||||||
|
[ts] = cursor.fetchone()
|
||||||
|
if ts[5:] == 'bytes': ts = ts[:-5] + ' B'
|
||||||
|
|
||||||
|
print _
|
||||||
|
print 'Total | %ss | %7s | %10d | %10d' % (td, ts, tu, te)
|
||||||
|
|
||||||
|
if not DRY_RUN:
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
print
|
||||||
|
if VACUUM and not DRY_RUN:
|
||||||
|
print 'vacuumdb... '
|
||||||
|
try:
|
||||||
|
dbconn.vacuum()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return retcode
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(load_data())
|
||||||
|
|
Loading…
Reference in New Issue
Block a user