From 8ed1e0ff2ca08fea6629fc1f4e010eccb85d02ad Mon Sep 17 00:00:00 2001
From: dim <dim>
Date: Sun, 19 Nov 2006 21:18:42 +0000
Subject: [PATCH] New version of pgloader (2.0.2):

* new developpers
* python code replaces tcl code
* one config file for many tables (one command line for loading them all)
* manpage
* debian package
* supports multi-line input file (without quotes)
* Informix large objects support (loading to TEXT or BYTEA)
* configurable amount of rows per COPY instruction
* trailing slash optionnal removal (support informix UNLOAD file format)
* begin processing at any line in the file, by number or row id
* dry-run option, to validate input reading without connecting to database
* pedantic option, to stop processing on warning
---
 LICENSE                           |  25 -
 Makefile                          |  23 +
 debian/changelog                  |  23 +
 debian/compat                     |   1 +
 debian/control                    |  21 +
 debian/copyright                  |  32 ++
 debian/dirs                       |   4 +
 debian/docs                       |   1 +
 debian/files                      |   1 +
 debian/pgloader.links             |   2 +
 debian/pgloader.manpages          |   1 +
 debian/pycompat                   |   1 +
 debian/pyversions                 |   1 +
 debian/rules                      |  98 ++++
 doc/Makefile                      |  43 --
 doc/README                        |  34 --
 doc/doc_pgloader.rest             | 189 --------
 doc/example/create_table.sql      |   2 -
 doc/example/foo.conf              |  21 -
 doc/example/foo.data              |   4 -
 doc/lib/stylesheet.sty            |  52 ---
 examples/README                   |  55 +++
 examples/clob/clob.data           |   7 +
 examples/clob/clob.out            |   1 +
 examples/clob/clob.sql            |   4 +
 examples/cluttered/cluttered.data |  16 +
 examples/cluttered/cluttered.sql  |   5 +
 examples/errors/errors.data       |   7 +
 examples/errors/errors.sql        |   5 +
 examples/pgloader.conf            |  51 ++
 examples/simple/simple.data       |   7 +
 examples/simple/simple.sql        |   5 +
 pgloader.1.sgml                   | 742 ++++++++++++++++++++++++++++++
 pgloader.conf                     |  36 --
 pgloader.py                       | 362 +++++++++++++++
 35 files changed, 1476 insertions(+), 406 deletions(-)
 delete mode 100644 LICENSE
 create mode 100644 Makefile
 create mode 100644 debian/changelog
 create mode 100644 debian/compat
 create mode 100644 debian/control
 create mode 100644 debian/copyright
 create mode 100644 debian/dirs
 create mode 100644 debian/docs
 create mode 100644 debian/files
 create mode 100644 debian/pgloader.links
 create mode 100644 debian/pgloader.manpages
 create mode 100644 debian/pycompat
 create mode 100644 debian/pyversions
 create mode 100644 debian/rules
 delete mode 100644 doc/Makefile
 delete mode 100644 doc/README
 delete mode 100644 doc/doc_pgloader.rest
 delete mode 100644 doc/example/create_table.sql
 delete mode 100644 doc/example/foo.conf
 delete mode 100644 doc/example/foo.data
 delete mode 100644 doc/lib/stylesheet.sty
 create mode 100644 examples/README
 create mode 100644 examples/clob/clob.data
 create mode 100644 examples/clob/clob.out
 create mode 100644 examples/clob/clob.sql
 create mode 100644 examples/cluttered/cluttered.data
 create mode 100644 examples/cluttered/cluttered.sql
 create mode 100644 examples/errors/errors.data
 create mode 100644 examples/errors/errors.sql
 create mode 100644 examples/pgloader.conf
 create mode 100644 examples/simple/simple.data
 create mode 100644 examples/simple/simple.sql
 create mode 100644 pgloader.1.sgml
 delete mode 100644 pgloader.conf
 create mode 100644 pgloader.py
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 88db6ed..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2005, Jan Wieck
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of the PostgreSQL Loader nor the names of its 
-      contributors may be used to endorse or promote products derived from this
-      software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2c26c20
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,23 @@
+DOCS = pgloader.1.sgml
+GARBAGE = manpage.links manpage.refs
+
+# debian setting
+DESTDIR =
+
+libdir   = $(DESTDIR)/usr/share/pgloader
+exdir    = $(DESTDIR)/usr/share/doc/pgloader
+
+pgloader = pgloader.py
+examples = examples
+libs = $(wildcard pgloader/*.py)
+
+install:
+	install -m 755 $(pgloader) $(libdir)
+	install -m 755 -d $(libdir)/pgloader
+
+	cp -a $(libs) $(libdir)/pgloader
+	cp -a $(examples) $(exdir)
+
+man: $(DOCS)
+	docbook2man $(DOCS) 2>/dev/null
+	-rm -f $(GARBAGE)
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..f3c39db
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,23 @@
+pgloader (2.0.2) unstable; urgency=low
+
+  * configurable null and empty_string representations
+  * bugfix on newline_escapes behavior when all lines are not escaped
+  * new global newline_escapes setting
+  * uses by default psycopg2, failback to psycopg1 if not available
+  * client_encoding can now be set on each table
+  * documentation (manpage) update
+
+ -- Dimitri Fontaine <dim@dalibo.com>  Wed, 15 Nov 2006 22:26:46 +0100
+
+pgloader (2.0.1-2) unstable; urgency=low
+
+  * package cleaning (lintian warnings and error)
+
+ -- Dimitri Fontaine <dim@dalibo.com>  Tue, 14 Nov 2006 18:14:57 +0100
+
+pgloader (2.0.1-1) unstable; urgency=low
+
+  * Initial release
+
+ -- Dimitri Fontaine <dim@dalibo.com>  Mon, 13 Nov 2006 22:56:15 +0100
+
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7ed6ff8
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+5
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..f330bbb
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,21 @@
+Source: pgloader
+Section: misc
+Priority: extra
+Maintainer: Dimitri Fontaine <dim@dalibo.com>
+Build-Depends: debhelper (>= 5), docbook-to-man (>= 2.0.0), python-support (>= 0.3)
+Standards-Version: 3.7.2
+
+Package: pgloader
+Architecture: all
+Depends: python (>=2.4.4), python-psycopg2 | python-psycopg (<< 1.1.21)
+Description: loads flat data files into PostgreSQL
+ pgloader imports data from a flat file and insert it into a database
+ table. It uses a flat file per database table, and you can configure as
+ many Sections as you want, each one associating a table name and a data
+ file.
+ .
+ Data are parsed and rewritten, then given to PostgreSQL COPY command.
+ Parsing is necessary for dealing with end of lines and eventual trailing
+ separator characters, and for column reordering: your flat data file may
+ not have the same column order as the databse table has.
+
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..def199e
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,32 @@
+This package was debianized by Dimitri Fontaine <dim@dalibo.com> on
+Mon, 13 Nov 2006 22:56:15 +0100.
+
+It was downloaded from http://pgloader.dalibo.org/
+
+Upstream Author: Dimitri Fontaine <dim@dalibo.com>
+
+Copyright: 2005, Jan Wieck
+           2006, Dimitri Fontaine
+
+License:
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted under the terms of the BSD License.
+
+    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+
+On Debian systems, the complete text of the BSD License can be 
+found in `/usr/share/common-licenses/BSD'.
+
+
+The Debian packaging is (C) 2006, Dimitri Fontaine <dim@dalibo.com> and
+is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
diff --git a/debian/dirs b/debian/dirs
new file mode 100644
index 0000000..275a1b3
--- /dev/null
+++ b/debian/dirs
@@ -0,0 +1,4 @@
+usr/bin
+usr/sbin
+usr/share/pgloader
+usr/share/doc/pgloader
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..2312955
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1 @@
+pgloader.1
\ No newline at end of file
diff --git a/debian/files b/debian/files
new file mode 100644
index 0000000..841a98a
--- /dev/null
+++ b/debian/files
@@ -0,0 +1 @@
+pgloader_2.0.2_all.deb misc extra
diff --git a/debian/pgloader.links b/debian/pgloader.links
new file mode 100644
index 0000000..202fef5
--- /dev/null
+++ b/debian/pgloader.links
@@ -0,0 +1,2 @@
+/usr/share/pgloader/pgloader.py /usr/bin/pgloader
+
diff --git a/debian/pgloader.manpages b/debian/pgloader.manpages
new file mode 100644
index 0000000..2312955
--- /dev/null
+++ b/debian/pgloader.manpages
@@ -0,0 +1 @@
+pgloader.1
\ No newline at end of file
diff --git a/debian/pycompat b/debian/pycompat
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/debian/pycompat
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/debian/pyversions b/debian/pyversions
new file mode 100644
index 0000000..cd06769
--- /dev/null
+++ b/debian/pyversions
@@ -0,0 +1 @@
+2.3-
\ No newline at end of file
diff --git a/debian/rules b/debian/rules
new file mode 100644
index 0000000..781b540
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,98 @@
+#!/usr/bin/make -f
+# -*- makefile -*-
+# Sample debian/rules that uses debhelper.
+# This file was originally written by Joey Hess and Craig Small.
+# As a special exception, when this file is copied by dh-make into a
+# dh-make output file, you may use that output file without restriction.
+# This special exception was added by Craig Small in version 0.37 of dh-make.
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+
+
+
+CFLAGS = -Wall -g
+
+ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+	CFLAGS += -O0
+else
+	CFLAGS += -O2
+endif
+
+configure: configure-stamp
+configure-stamp:
+	dh_testdir
+	# Add here commands to configure the package.
+
+	touch configure-stamp
+
+
+build: build-stamp
+
+build-stamp: configure-stamp 
+	dh_testdir
+
+	# Add here commands to compile the package.
+	#$(MAKE)
+	docbook-to-man pgloader.1.sgml > pgloader.1
+
+	touch $@
+
+clean:
+	dh_testdir
+	dh_testroot
+	rm -f build-stamp configure-stamp
+
+	# Add here commands to clean up after the build process.
+	-$(MAKE) clean
+
+	dh_clean 
+
+install: build
+	dh_testdir
+	dh_testroot
+	dh_clean -k 
+	dh_installdirs
+
+	# Add here commands to install the package into debian/pgloader.
+	$(MAKE) DESTDIR=$(CURDIR)/debian/pgloader install
+
+
+# Build architecture-independent files here.
+binary-indep: build install
+# We have nothing to do by default.
+
+# Build architecture-dependent files here.
+binary-arch: build install
+	dh_testdir
+	dh_testroot
+	dh_installchangelogs 
+	dh_installdocs
+	dh_installexamples
+#	dh_install
+#	dh_installmenu
+#	dh_installdebconf	
+#	dh_installlogrotate
+#	dh_installemacsen
+#	dh_installpam
+#	dh_installmime
+	dh_pysupport
+#	dh_installinit
+#	dh_installcron
+#	dh_installinfo
+	dh_installman
+	dh_link
+	dh_strip
+	dh_compress
+	dh_fixperms
+#	dh_perl
+#	dh_makeshlibs
+	dh_installdeb
+	dh_shlibdeps
+	dh_gencontrol
+	dh_md5sums
+	dh_builddeb
+
+binary: binary-indep binary-arch
+.PHONY: build clean binary-indep binary-arch binary install configure
diff --git a/doc/Makefile b/doc/Makefile
deleted file mode 100644
index 79af31e..0000000
--- a/doc/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-# $Id: Makefile,v 1.1 2005-11-21 16:05:50 jpargudo Exp $
-
-rest = $(wildcard *.rest)
-html = $(addsuffix .html, $(basename $(rest)))
-pdf  = $(addsuffix .pdf, $(basename $(rest)))
-
-pdf: $(pdf) clean
-
-html: $(html)
-
-dist-clean: clean
-	@rm -f $(pdf) $(html)
-
-clean:
-	@rm -f *.aux *.log *~ *.tex *.out *.toc *.dvi
-
-%.html: %.rest
-	rest2html --stylesheet lib/stylesheet.sty \
-		      --no-section-numbering \
-		      --language=fr \
-		      $< > $@
-
-%.pdf: %.dvi
-	dvipdf $<
-
-%.dvi: %.tex
-	latex $< >> /dev/null
-	latex $< >> /dev/null
-
-%.tex: %.rest 
-	rest2latex --use-latex-toc \
-	           --stylesheet lib/stylesheet.sty \
-	           --use-latex-footnotes \
-		   --no-section-numbering \
-		   --language=fr \
-                   --input-encoding=iso-8859-15 \
-		   --table-style=booktabs \
-                   --output-encoding=iso-8859-15 \
-		       $< > $@
-help:
-	@echo " Programmes nécessaires: docbook, latex, dvipdf, kpdf"
-
-.PHONY: pdf
diff --git a/doc/README b/doc/README
deleted file mode 100644
index 3c8ca86..0000000
--- a/doc/README
+++ /dev/null
@@ -1,34 +0,0 @@
-======
-README
-======
-
-How to compile documentation for pgloader
------------------------------------------
-
-**doc_pgloader.rest**
-
-Some documentation for pgloader project, in english. 
-
-ReST format (see
-http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html for more info
-on this format and tools).
-
-Since its plain text, you don't need to compile anything to read it ;-)
-
-But you can still get a PDF or an HTML file:
-
-compile PDF version : 
-
- $ make doc_pgloader.pdf
-
-compile HTML version : 
-
- $ make doc_pgloader.html
-
-Contact the authors
--------------------
-
-Feel free to send me questions / comments / patches / whatever :
-
-Jean-Paul Argudo <jean-paul.argudo@dalibo.com>
-
diff --git a/doc/doc_pgloader.rest b/doc/doc_pgloader.rest
deleted file mode 100644
index 92578d6..0000000
--- a/doc/doc_pgloader.rest
+++ /dev/null
@@ -1,189 +0,0 @@
-========
-pgloader
-========
-
-:Author:
-	Jean-Paul Argudo <jean-paul.argudo@dalibo.com>
-
-:Version:
-	$Id: doc_pgloader.rest,v 1.2 2006-03-28 21:39:25 jpargudo Exp $
-:Type:
-    User manual
-
-:Comment:
-	pgLoader v.1.x documentation (install, usage and example)
-
-:Licence:
-	BSD
-
-About
-=====
-
-pgloader (http://pgfoundry.org/projects/pgloader/) is a new project allowing
-you to import data in a PostgreSQL database.
-
-You have to launch pgloader as many times you have tables. pgloader handles
-just one table at a time.
-
-All bad records are put together in a file, with a logfile explaining origins
-of errors.
-
-
-Installation
-============
-
-Under Debian, the current installation is a bit tricky (as per 200510xx): ::
-
-  wget http://debian.wow-vision.com.sg/debian/pool/main/p/postgresql libpgtcl_7.4.7-6sarge1_i386.deb
-  dpgk -i libpgtcl_7.4.7-6sarge1_i386.deb
-  apt-get install tcllib
-  wget http://pgfoundry.org/frs/download.php/233/pgloader-1.0.tar.gz
-  tar zxvf pgloader-1.0.tar.gz
-
-Then you can eventually put the binary into /usr/local/bin to facilitate
-comandlines: ::
-
-  $ cp pgloader-1.0/pgloader /usr/local/bin
-
-Principle
-=========
-
-You must fill two files per table:
-
- * a parameter file, let's call it <table>.conf
- * a datafile, let's call it <table>.data
-
-You need also all necessary parameters to the db connexion you want to use:
-
-Common ones are the following:
-
- * host : name of the server where your PostgreSQL db lives (localhost ?)
- * user : username (you?)
- * password : username's password (mybigsecret)
- * dbname : name of the PostgreSQL db
-
-This parameters are put together in a double-quoted string:
-
-  "host=localhost user=me password=mybigsecret dbname=mydatabase"
-
-This string as the same type that PQconnectdb awaits for in the libpq. Its
-complete documentation can be read at:
-http://www.postgresql.org/docs/current/static/libpq.html#LIBPQ-CONNECT
-
-You can for sure add much more parameters, depending your db configuration.
-
-Example
-=======
-
-We want to insert records in "foo" table: ::
-
-  test=> \d foo
-          Table «public.foo»
-   Colonne |  Type   | Modificateurs
-  ---------+---------+---------------
-   a       | integer | not null
-   b       | date    |
-   c       | text    |
-  Index :
-      «foo_pkey» PRIMARY KEY, btree (a)
-
-The datafile
-------------
-
-Our datafile "foo.data" as following records: ::
-
-  1;1987-12-04;"This is a test of data file"
-  2;2005-03-02;"diziz'another test with som'o'lil'quotes"
-  42;;"No need to date this"
-  67;1999-01-02;Oops I didn't escape this string?!
-
-Please note that:
-
- * fields are separated with a semicolon
- * you can handle presence of empty data: the empty field is represented with 
-   two semicolons following
- * we have a record per line
- * theres is no other line separator excepted \n
- * dates are in ISO format: YYYY-MM-DD (a fix is coming to handle "set datestyle
-   to" in the conf file)
- * you can escape strings, optionnaly, double quoting them
-
-Configuration file
-------------------
-
-The corresponding file "foo.conf" for the above datafile is the following: ::
-
- # ----
- # Conversion parameter file for pgloader
- #
- #	Possible file formats:
- #		COPY		native PostgreSQL COPY format (default)
- #		CSV			Comma separated variables
- #		MSCSV		Comma separated variables alternate format
- #
- #	The COPY command is constructed from the table_name, the
- #	table_columns and the eventual nulls string definition.
- #
- #	The default column separator character is comma.
- # ----
- 
- table_name	= foo
- table_columns	= a,b,c
- file_format	= CSV
- group_size	= 1000
- file_sepchar	= ;
- #nulls		= NULL
- quote 		= "
- file_is_utf8   = 0
-
-Note that separation character is set to ";" and that quoting is specifyied 
-with the character double-quote: "
-
-Inserts will be commited each 1000, per blocks of 1000 rows at a time.
-
-The datafile nor the database is in utf-8, so the parameter *file_is_utf8* is
-set to 0. Set it to 1 otherwise: when both database and datafile are in utf-8.
-
-Since ``pgctl`` internals run in utf-8, the data must be converted *on the
-fly* to utf-8 when reading the datafile, thats why pgloader needs to know how
-is the datafile like, utf-8 or not.
-
-
-pgloader execution
-------------------
-
-The execution is quite simple: ::
-
-  $ pgloader foo.conf foo.data "host=localhost user=me password=mybigsecret \
-    dbname=mydatabase"
-
-  4 row(s) loaded
-  0 row(s) rejected
-
-A simple verification of what has been inserted: ::
-
-  test=> select * from foo ;
-   a  |     b      |                    c
-  ----+------------+------------------------------------------
-    1 | 1987-12-04 | This is a test of data file
-    2 | 2005-03-02 | diziz'another test with som'o'lil'quotes
-   42 |            | No need to date this
-   67 | 1999-01-02 | Oops I didn't escape this string?!
-  (4 lines)
-
-**Note**: You will find this example in the doc/example/ directory.
-
-when errors occurs
-------------------
-
-Check the following:
-
- * if your configuration file is not okay, pgloader will tell you whats wrong
-
- * if you have a problem with the data you try to import, you'll find in the
-   .rej file data that have bee rejected. In the .rejlog file given problems 
-   will be explicited: a group of error messages per rejected row.
-
-Then you'll have to correct errors in .rej file and import *that* file like all
-the others: don't reimport anything else, all the good data is already in the
-box :)
diff --git a/doc/example/create_table.sql b/doc/example/create_table.sql
deleted file mode 100644
index 15cb8ad..0000000
--- a/doc/example/create_table.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-drop table foo;
-create table foo (a integer primary key, b date, c text);
diff --git a/doc/example/foo.conf b/doc/example/foo.conf
deleted file mode 100644
index 33d3e25..0000000
--- a/doc/example/foo.conf
+++ /dev/null
@@ -1,21 +0,0 @@
-# ----
-# Conversion parameter file for pgloader
-#
-#      Possible file formats:
-#              COPY            native PostgreSQL COPY format (default)
-#              CSV                     Comma separated variables
-#              MSCSV           Comma separated variables alternate format
-#
-#      The COPY command is constructed from the table_name, the
-#      table_columns and the eventual nulls string definition.
-#
-#      The default column separator character is comma.
-# ----
-
-table_name     = foo
-table_columns  = a,b,c
-file_format    = CSV
-group_size     = 1000
-file_sepchar   = ;
-#nulls         = NULL
-quote          = "
diff --git a/doc/example/foo.data b/doc/example/foo.data
deleted file mode 100644
index 6ee9e96..0000000
--- a/doc/example/foo.data
+++ /dev/null
@@ -1,4 +0,0 @@
-1;1987-12-04;"This is a test of data file"
-2;2005-03-02;"diziz'another test with som'o'lil'quotes"
-42;;"No need to date this"
-67;1999-01-02;Oops I didn't escape this string?!
diff --git a/doc/lib/stylesheet.sty b/doc/lib/stylesheet.sty
deleted file mode 100644
index 8147887..0000000
--- a/doc/lib/stylesheet.sty
+++ /dev/null
@@ -1,52 +0,0 @@
-\let\oldAuthor\author
-\renewcommand{\author}[1]{\newcommand{\myAuthor}{#1}\oldAuthor{#1}} 
-\let\oldTitle\title
-\renewcommand{\title}[1]{\newcommand{\myTitle}{#1}\oldTitle{#1}} 
-
-\usepackage{eurosym} 
-\usepackage[latin9]{inputenc} 
-\let ¤ = \euro 
-
-\usepackage{fancyhdr}
-\pagestyle{fancy}
-
-\lhead{}
-\chead{}
-\rhead{\myTitle}
-\lfoot{\textsf{pgFoundry}
-}
-\cfoot{\small{pgloader documentation \\
-http://pgfoundry.org/projects/pgloader/}}
-\rfoot{\thepage\ / \pageref*{LastPage}}
-
-\renewcommand{\headrulewidth}{0.4pt}
-\renewcommand{\footrulewidth}{0.4pt}
-
-\usepackage{helvet}
-
-\renewcommand{\familydefault}{phv}
-
-%Parametrage pour une feuille A4 pleine (merci SBI)
-\evensidemargin = 30mm
-\oddsidemargin = 30mm
-\voffset=-1in
-\topmargin = 17mm
-\headheight = 14.5mm
-\headsep = 15mm
-\hoffset=-1in
-\marginparsep = 0pt
-\marginparwidth = 0pt
-\footskip = 20mm
-\textwidth=162mm
-\textheight=200mm
-\paperwidth=210mm
-\paperheight=297mm
-\parindent=0pt
-\parskip=5pt
-%fin parametrage A4 plein
-
-\usepackage{lastpage}
-
-\hypersetup{colorlinks=true}
-
-\usepackage{indentfirst}
diff --git a/examples/README b/examples/README
new file mode 100644
index 0000000..5889c20
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,55 @@
+In this directory you'll find out some pgloader usage examples.
+
+To use them, please first create a pgloader database, then for each example
+the tables it needs, then issue the pgloader command:
+
+ $ createdb --encoding=utf-8 pgloader
+ $ cd examples
+ $ psql pgloader < simple/simple.sql
+ $ ../pgloader.py -Tvc examples/pgloader.conf simple
+
+If you want to load data from all examples, create tables for all of them
+first, then run pgloader without argument.
+
+The provided examples are:
+
+. simple
+
+  This dataset shows basic case, with trailing separator and data
+  reordering.
+
+. errors
+
+  Same test, but with impossible dates. Should report some errors. It does
+not report errors, check you're not using psycopg 1.1.21.
+
+. clob
+
+  This dataset shows some text large object importing to PostgreSQL text
+  datatype.
+
+. cluttured
+
+  A dataset with newline escaped and multi-line input (without quoting)
+  Beware of data reordering, too.
+
+
+You can launch all those pgloader tests in one run, provided you created the
+necessary tables:
+
+ $ for test in simple clob cluttured; do psql pgloader < $test/$test.sql; done
+ $ ../pgloader.py -Tc pgloader.conf
+
+ [...]
+
+ Table name        |    duration |    size |    updates |     errors
+ ====================================================================
+ clob              |      0.121s |   32 kB |          7 |          0
+ cluttered         |      0.041s |   32 kB |          3 |          0
+ simple            |      0.040s |   16 kB |          6 |          0
+ ====================================================================
+ Total             |      0.369s |   80 kB |         16 |          0
+
+And you then have a nice summary.
+
+
diff --git a/examples/clob/clob.data b/examples/clob/clob.data
new file mode 100644
index 0000000..ce2a1e8
--- /dev/null
+++ b/examples/clob/clob.data
@@ -0,0 +1,7 @@
+1|0,16,clob.out
+2|16,20,clob.out
+3|36,23,clob.out
+4|59,15,clob.out
+5|6e,13,clob.out
+6|81,6,clob.out
+7|87,d,clob.out
\ No newline at end of file
diff --git a/examples/clob/clob.out b/examples/clob/clob.out
new file mode 100644
index 0000000..d9a15de
--- /dev/null
+++ b/examples/clob/clob.out
@@ -0,0 +1 @@
+This is some clob dataTo be used on several data entryReferences to those are to be foundIn the clob.data fileThey refer to beginlengthand filename.
\ No newline at end of file
diff --git a/examples/clob/clob.sql b/examples/clob/clob.sql
new file mode 100644
index 0000000..d59fb26
--- /dev/null
+++ b/examples/clob/clob.sql
@@ -0,0 +1,4 @@
+CREATE TABLE clob (
+ a integer primary key,
+ b text
+);
\ No newline at end of file
diff --git a/examples/cluttered/cluttered.data b/examples/cluttered/cluttered.data
new file mode 100644
index 0000000..484374c
--- /dev/null
+++ b/examples/cluttered/cluttered.data
@@ -0,0 +1,16 @@
+1^some multi\
+line text with\
+newline escaping^and some other data following^
+2^and another line^clean^
+3^and\
+a last multiline\
+escaped line
+with a missing\
+escaping^just to test^
+4^\ ^empty value^
+5^^null value^
+6^multi line\
+escaped value\
+\
+with empty line\
+embeded^last line^
\ No newline at end of file
diff --git a/examples/cluttered/cluttered.sql b/examples/cluttered/cluttered.sql
new file mode 100644
index 0000000..d327cf6
--- /dev/null
+++ b/examples/cluttered/cluttered.sql
@@ -0,0 +1,5 @@
+CREATE TABLE cluttered (
+ a integer primary key,
+ b text,
+ c text
+);
\ No newline at end of file
diff --git a/examples/errors/errors.data b/examples/errors/errors.data
new file mode 100644
index 0000000..33ca814
--- /dev/null
+++ b/examples/errors/errors.data
@@ -0,0 +1,7 @@
+1|some first row text|2006-13-11|
+2|some second row text|2006-11-11|
+3|some third row text|2006-10-12|
+4|\ |2006-16-4|
+5|some fifth row text|2006-5-12|
+6|some sixth row text|2006-13-10|
+7|some null date to play with||
\ No newline at end of file
diff --git a/examples/errors/errors.sql b/examples/errors/errors.sql
new file mode 100644
index 0000000..8daead7
--- /dev/null
+++ b/examples/errors/errors.sql
@@ -0,0 +1,5 @@
+CREATE TABLE errors (
+ a integer primary key,
+ b date,
+ c text
+);
\ No newline at end of file
diff --git a/examples/pgloader.conf b/examples/pgloader.conf
new file mode 100644
index 0000000..78473da
--- /dev/null
+++ b/examples/pgloader.conf
@@ -0,0 +1,51 @@
+[pgsql]
+host = localhost
+port = 5432
+base = pgloader
+user = dim
+pass = None
+
+client_encoding = 'utf-8'
+copy_every      = 5
+commit_every    = 5
+#copy_delimiter  = %
+
+null         = ""
+empty_string = "\ "
+
+newline_escapes = \
+
+[simple]
+table        = simple
+filename     = simple/simple.data
+field_sep    = |
+trailing_sep = True
+columns      = a:1, b:3, c:2
+
+# those reject settings are defaults one
+reject_log   = /tmp/simple.rej.log
+reject_data  = /tmp/simple.rej
+
+[errors]
+table        = errors
+filename     = errors/errors.data
+field_sep    = |
+trailing_sep = True
+columns      = a:1, b:3, c:2
+
+[clob]
+table        = clob
+filename     = clob/clob.data
+field_sep    = |
+columns      = a:1, b:2
+index        = a:1
+blob_columns = b:2:ifx_clob
+
+[cluttered]
+table           = cluttered
+filename        = cluttered/cluttered.data
+field_sep       = ^
+trailing_sep    = True
+newline_escapes = c:\
+field_count     = 3
+columns         = a:1, b:3, c:2
diff --git a/examples/simple/simple.data b/examples/simple/simple.data
new file mode 100644
index 0000000..6ef3d1f
--- /dev/null
+++ b/examples/simple/simple.data
@@ -0,0 +1,7 @@
+1|some first row text|2006-11-11|
+2|some second row text|2006-11-11|
+3|some third row text|2006-10-12|
+4|\ |2006-10-4|
+5|some fifth row text|2006-5-12|
+6|some sixth row text|2006-7-10|
+7|some null date to play with||
\ No newline at end of file
diff --git a/examples/simple/simple.sql b/examples/simple/simple.sql
new file mode 100644
index 0000000..981bd68
--- /dev/null
+++ b/examples/simple/simple.sql
@@ -0,0 +1,5 @@
+CREATE TABLE simple (
+ a integer primary key,
+ b date,
+ c text
+);
\ No newline at end of file
diff --git a/pgloader.1.sgml b/pgloader.1.sgml
new file mode 100644
index 0000000..046280a
--- /dev/null
+++ b/pgloader.1.sgml
@@ -0,0 +1,742 @@
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
+<refentry>
+  <refentryinfo>
+    <address>
+      <email>dim@dalibo.com</email>
+    </address>
+    <author>
+      <firstname>Dimitri</firstname>
+      <surname>Fontaine</surname>
+    </author>
+    <date>August 2006</date>
+    <copyright>
+      <year>2006</year>
+      <holder>Dimitri Fontaine</holder>
+    </copyright>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>pgloader</refentrytitle>
+    <manvolnum>1</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>pgloader</refname>
+    <refpurpose>
+Import CSV data and Large Object to PostgreSQL
+    </refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>pgloader</command>
+      <arg><option>-c</option> configuration file</arg>
+      <arg><option>-p</option> pedantic</arg>
+      <arg><option>-d</option> debug</arg>
+      <arg><option>-v</option> verbose</arg>
+      <arg><option>-n</option> dry run</arg>
+      <arg><option>-Cn</option> count</arg>
+      <arg><option>-Fn</option> from</arg>
+      <arg><option>-In</option> from id</arg>
+      <arg><option>-E</option> input files encoding</arg>
+      <arg>Section1 Section2</arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>description</title>
+    <para>
+      <command>pgloader</command> imports data from a flat file and
+      insert it into a database table. It uses a flat file per
+      database table, and you can configure as many Sections as you
+      want, each one associating a table name and a data file.
+    </para>
+
+    <para>
+      Data are parsed and rewritten, then given to PostgreSQL
+      <command>COPY</command> command. Parsing is necessary for
+      dealing with end of lines and eventual trailing separator
+      characters, and for column reordering: your flat data file may
+      not have the same column order as the databse table has.
+    </para>
+
+    <para>
+      <command>pgloader</command> is also able to load some large
+      objects data into PostgreSQL, as of now only Informix
+      <command>UNLOAD</command> data files are supported. This command
+      gives large objects data location information into the main data
+      file. <command>pgloader</command> parse it and produces and SQL
+      UPDATE order per large object, and commit those orders once
+      every <command>commit_every</command> configuration parameter.
+    </para>
+
+    <para>
+      <command>pgloader</command> issue some timing statistics
+      every <command>commit_every</command> commits (see Configuration
+      for this setting). At the end of each section processing, a
+      summary of overall operations, numbers of updates and commits,
+      time it took in seconds, errors logged and database errors is
+      issued.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>options</title>
+
+    <para>
+      In order for <command>pgloader</command> to run, you have to
+      edit a configuration file (see Configuration) consisting of
+      Section definitions. Each section refers to a PostgreSQL table
+      into which some data is to be loaded.
+    </para>
+
+    <variablelist>
+      <varlistentry>
+        <term><option>-c</option></term>
+        <term><option>--config</option></term>
+        <listitem>
+          <para>
+	    specifies the configuration file to use. The default file
+	    name is <filename>pgloader.conf</filename>, searched into
+	    current working directory.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-p</option></term>
+        <term><option>--pedantic</option></term>
+        <listitem>
+          <para>
+	    activates the <command>pedantic</command> mode, where any
+	    warning is considered as a fatal error, thus stopping the
+	    processing of the input file.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-d</option></term>
+        <term><option>--debug</option></term>
+        <listitem>
+          <para>
+	    makes <command>pgloader</command> say it all about what it
+	    does. debug implies verbose.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-v</option></term>
+        <term><option>--verbose</option></term>
+        <listitem>
+          <para>
+	    makes <command>pgloader</command> very verbose about
+	    what it does.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-n</option></term>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+	    makes <command>pgloader</command> simulate operations,
+	    that implies no database connection and no data extraction
+	    from blob files.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-T</option></term>
+        <term><option>--truncate</option></term>
+        <listitem>
+          <para>
+	    makes <command>pgloader</command> issue a truncate SQL
+	    command before importing data.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-V</option></term>
+        <term><option>--vacuum</option></term>
+        <listitem>
+          <para>
+	    makes <command>pgloader</command> issue a vacuum full
+	    verbose analyse SQL command before importing data.
+	  </para>
+	  <para>
+	    This vacuum is run from shell command
+	    <command>/usr/bin/vacuumdb</command> with connection
+	    informations taken from configuration file (see
+	    Configuration section of this manual page), but without
+	    password prompting. If you use this option, please
+	    configure your <filename>pg_hba.conf</filename> in a way
+	    no password is prompted (trust).
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-C</option></term>
+        <term><option>--count</option></term>
+        <listitem>
+          <para>
+	    Number of input lines to process, default is to process
+	    all the input lines.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-F</option></term>
+        <term><option>--from</option></term>
+        <listitem>
+          <para>
+	    Input line number from which we begin to process (and
+	    count). <command>pgloader</command> will skip all
+	    preceding lines.
+	  </para>
+	  <para>
+	    You can't use both <option>-F</option> and
+	    <option>-I</option> at the same time.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-I</option></term>
+        <term><option>--from-id</option></term>
+        <listitem>
+          <para>
+	    From which <command>id</command> do we begin to process
+	    (and count) input lines.
+	  </para>
+	  <para>
+	    When a composite key is used, you have to give each column
+	    of the key separated by comma, on the form col_name=value.
+	  </para>
+	  <para>
+	    Please notice using the <command>--from-id</command>
+	    option implies <command>pgloader</command> will try to get
+	    row id of each row, it being on the interval processed or
+	    not. This could have some performance impact, and you may
+	    end up prefering to use <command>--from</command> instead.
+	  </para>
+	  <para>
+	    Example: <command>pgloader -I col1:val1,col2:val2</command>
+	  </para>
+	  <para>
+	    You can't use both <option>-F</option> and
+	    <option>-I</option> at the same time.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-E</option></term>
+        <term><option>--encoding</option></term>
+        <listitem>
+          <para>
+	    Input data files encoding. Defaults to 'latin9'.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>Section</option></term>
+        <listitem>
+          <para>
+	    is the name of a configured Section describing some data
+	    to load
+	  </para>
+	  <para>
+	    Section arguments are optionnal, if no section is given
+	    all configured sections are processed.
+	  </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>configuration</title>
+    <para>
+      The configuration file has a .ini file syntax, its first section
+      has to be the <command>pgsql</command> one, defining how to
+      access to the PostgreSQL database server where to load
+      data. Then you may define any number of sections, each one
+      describing a data loading task to be performed by
+      <command>pgloader</command>.
+    </para>
+
+    <para>
+      The <command>[pgsql]</command> section has the following
+      options, which all must be set.
+    </para>
+    <variablelist>
+      <varlistentry>
+        <term><option>host</option></term>
+        <listitem>
+          <para>
+	    PostgreSQL database server name, for example
+	    <filename>localhost</filename>.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>port</option></term>
+        <listitem>
+          <para>
+	    PostgreSQL database server listening port, 5432. You have
+	    to fill this entry.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>base</option></term>
+        <listitem>
+          <para>
+	    The name of the database you want to load data into.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>user</option></term>
+        <listitem>
+          <para>
+	    Connecting PostgreSQL user name.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>pass</option></term>
+        <listitem>
+          <para>
+	    The password of the user. The better is to grant a
+	    <command>trust</command> access privilege in PostgreSQL
+	    <filename>pg_hba.conf</filename>. Then you can set this
+	    entry to whatever value you want to.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>client_encoding</option></term>
+        <listitem>
+          <para>
+	    Set this parameter to have <command>pgloader</command>
+	    connects to PostgreSQL using this encoding.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to 'latin9'.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>copy_every</option></term>
+        <listitem>
+          <para>
+	    When issuing <command>COPY</command> PostgreSQL commands,
+	    <command>pgloader</command> will not make a single big
+	    COPY attempt, but copy <command>copy_every</command> lines
+	    at a time.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to 10000.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>commit_every</option></term>
+        <listitem>
+          <para>
+	    PostgreSQL <command>COMMIT</command> frequency, exprimed
+	    in <command>UPDATE</command> orders. A good value is 1000,
+	    that means commiting the SQL transaction every 1000 input
+	    lines.
+	  </para>
+	  <para>
+	    <command>pgloader</command> issues commit every
+	    commit_every updates, on connection closing and when a SQL
+	    error occurs.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to 1000.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>copy_delimiter</option></term>
+        <listitem>
+          <para>
+	    The field separator to use in COPY FROM produced statements. If
+	    you don't specify this, the same separator as the one given in
+	    <command>field_sep</command> parameter will be used.
+	  </para>
+	  <para>
+	    Please note <command>PostgreSQL</command> requires a single char
+	    properly encoded (see your <command>client_encoding</command>
+	    parameter), or it abort in error and even may crash.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to
+	    <command>field_sep</command>.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>newline_escapes</option></term>
+        <listitem>
+          <para>
+	    For parameter effect description, see below (same name, table
+	    local setting).
+	  </para>
+          <para>
+	    You can setup here a global escape caracter, to be considered on
+	    each and every column of each and every table defined
+	    thereafter.
+	  </para>
+	</listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>null</option></term>
+        <listitem>
+          <para>
+	    You can configure here how <command>null</command> value is
+	    represented into your flat data file.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to
+	    <command>''</command> (that is empty string).
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>empty_string</option></term>
+        <listitem>
+          <para>
+	    You can configure here how empty values are represented into
+	    your flat data file.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to <command>'\
+	    '</command> (that is backslash followed by space).
+	  </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      You then can define any number of data section, and give them an
+      arbitrary name. Some options are required, some are actually
+      optionnals, in which case it is said so thereafter.
+    </para>
+    <variablelist>
+      <varlistentry>
+        <term><option>table</option></term>
+        <listitem>
+          <para>
+	    The table name of the database where to load data.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>filename</option></term>
+        <listitem>
+          <para>
+	    The absolute path to the input data file. The large object
+	    files are to be found into the same directory. Their name
+	    can be in the form [bc]lob[0-9a-f]{4}.[0-9a-f]{3}, but
+	    this information is not used by
+	    <command>pgloader</command>.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>reject_log</option></term>
+        <listitem>
+          <para>
+	    In case of errors processing input data, a human readable
+	    log per rejected input data line is produced into the
+	    reject_log file.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>reject_data</option></term>
+        <listitem>
+          <para>
+	    In case of errors processing input data, the rejected
+	    input line is appended to the reject_data file.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>field_sep</option></term>
+        <listitem>
+          <para>
+	    The field separator used into the data file. The same
+	    separator will be used by the generated
+	    <command>COPY</command> commands, thus
+	    <command>pgloader</command> does not have to deal with
+	    escaping the delimiter it uses (input data has to have
+	    escaped it).
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to pipe char '|'.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>field_count</option></term>
+        <listitem>
+          <para>
+	    The <command>UNLOAD</command> command does not escape
+	    newlines when they appear into table data. Hence, you may
+	    obtain multi-line data files, where a single database row
+	    (say tuple if you prefer to) can span multiple physical
+	    lines into the unloaded file.
+	  </para>
+	  <para>
+	    If this is your case, you may want to configure here the
+	    number of columns per tuple. Then
+	    <command>pgloader</command> will count columns and
+	    buffer line input in order to re-assemble several physical
+	    lines into one data row when needed.
+	  </para>
+	  <para>
+	    This parameter is optionnal.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>trailing_sep</option></term>
+        <listitem>
+          <para>
+	    If this option is set to <command>True</command>, the
+	    input data file is known to append a
+	    <command>field_sep</command> as the last character of each
+	    of its lines. With this option set, this last character is
+	    then not considered as a field separator.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to False.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>client_encoding</option></term>
+        <listitem>
+          <para>
+	    Set this parameter to have <command>pgloader</command>
+	    connects to PostgreSQL using this encoding.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to 'latin9'. If defined
+	    on a table level, this local value will overwritte the global
+	    one.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>null</option></term>
+        <listitem>
+          <para>
+	    You can configure here how <command>null</command> value is
+	    represented into your flat data file.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to
+	    <command>''</command> (that is empty string). If defined on a
+	    table level, this local value will overwritte the global one.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>empty_string</option></term>
+        <listitem>
+          <para>
+	    You can configure here how empty values are represented into
+	    your flat data file.
+	  </para>
+	  <para>
+	    This parameter is optionnal and defaults to <command>'\
+	    '</command> (that is backslash followed by space). If defined on
+	    a table level, this local value will overwritte the global one.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>newline_escapes</option></term>
+        <listitem>
+          <para>
+	    Sometimes the input data file has field values containing
+	    newlines, and the export program used (as Informix
+	    <command>UNLOAD</command> command) escape in-field
+	    newlines. So you want <command>pgloader</command> to keep
+	    those newlines, while at the same time preserving them.
+	  </para>
+	  <para>
+	    This option does the described work on specified fields
+	    and considering the escaping character you configure,
+	    following this syntax:
+	  </para>
+	  <para>
+	    newline_escapes = colname:\, other_colname:§
+	  </para>
+	  <para>
+	    This parameter is optionnal, and the extra work is only
+	    done when set. You can configure
+	    <command>newline_escapes</command> for as many fields as
+	    necessary, and you may configure a different escaping
+	    character each time.
+	  </para>
+	  <para>
+	    Please note that at the moment,
+	    <command>pgloader</command> does only support one
+	    character length <command>newline_escapes</command>.
+	  </para>
+	  <para>
+	    When both a global (see <command>[pgsql]</command> section)
+	    <command>newline_escapes</command> parameter and a table local
+	    one are set, <command>pgloader</command> issues a warning and
+	    only consider the global setting.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>index</option></term>
+        <listitem>
+          <para>
+	    Table index definition, to be used in blob UPDATE'ing. You
+	    define an index column by giving its name and its column
+	    number (as found into your data file, and counting from 1)
+	    separated by a colon.  If your table has a composite key,
+	    then you can define multiple columns here, separated by a
+	    comma.
+	  </para>
+	  <para>
+	    index = colname:3, other_colname:5
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>columns</option></term>
+        <listitem>
+          <para>
+	    You can define here table columns, with the same
+	    definition format as in previous <command>index</command>
+	    parameter.
+	  </para>
+	  <para>
+	    In case you have a lot a columns per table, you will want
+	    to use ultiple lines for this parameter value. Python
+	    <command>ConfigParser</command> module knows how to read
+	    multi-line parameters, you don't have to escape anything.
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>blob_columns</option></term>
+        <listitem>
+          <para>
+	    The definition of the colums where to find some blob or
+	    clob reference. This definition is composed by a table
+	    column name, a column number (couting from one) reference
+	    into the Informix <command>UNLOAD</command> data file, and
+	    a large object type, separated by a colon. You can have
+	    several columns in this field, separated by a
+	    comma.
+	  </para>
+	  <para>
+	    Supported large objects type are Informix blob and clob,
+	    the awaited configuration string are respectively
+	    <command>ifx_blob</command> for binary (bytea) content
+	    type and <command>ifx_clob</command> for text type values.
+	  </para>
+	  <para>
+	    Here's an example:
+	  </para>
+	  <para>
+	    blob_type = clob_column:3:ifx_blob, other_clob_column:5:ifx_clob
+	  </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>confifuration example</title>
+    <para>
+      Please see the given configuration example which should be distributed
+      in
+      <filename>/usr/share/doc/pgloader/examples/pgloader.conf</filename>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>History</title>
+    <para>
+      <command>pgloader</command> was at first an Informix to
+      PostgreSQL migration helper which imported Informix large
+      objects directly into a PostgreSQL database.
+    </para>
+
+    <para>
+      Then as we got some data we couldn't file tools to care about,
+      we decided <command>ifx_blob</command> would become
+      <command>pgloader</command>, as it had to be able to import all
+      Informix UNLOAD data. Those data contains escaped separator into
+      unquoted data field and multi-lines fields (\r and \n are not
+      escaped).
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Bugs</title>
+    <para>
+      Please report bugs to Dimitri Fontaine &lt;dim@dalibo.com&gt;.
+    </para>
+    <para>
+      When last line is alone on a <command>COPY</command> command and its
+      parsing ends in error (not enough columns read for example), no
+      information is given back by <command>pgloader</command>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Authors</title>
+    <para>
+      <command>pgloader</command> is written by <author>Dimitri
+      Fontaine</author> <email>dim@dalibo.com</email>.
+    </para>
+  </refsect1>
+  
+</refentry>
diff --git a/pgloader.conf b/pgloader.conf
deleted file mode 100644
index a432739..0000000
--- a/pgloader.conf
+++ /dev/null
@@ -1,36 +0,0 @@
-# $Id: pgloader.conf,v 1.4 2006-03-28 21:39:25 jpargudo Exp $
-# ----
-# Conversion parameter file for pgloader
-#
-#	Possible file formats:
-#		COPY		native PostgreSQL COPY format (default)
-#		CSV			Comma separated variables
-#		MSCSV		Comma separated variables alternate format
-#
-#	The COPY command is constructed from the table_name, the
-#	table_columns and the eventual nulls string definition.
-#
-#	The default column separator character is comma.
-# ----
-
-table_name		= my_table
-table_columns		= column1, column2, ...
-
-file_format		= COPY 
-# COPY or CSV or MSCSV
-
-group_size		= 1000
-
-# file_sepchar		= ;  
-# , (default) or ; or other
-
-# nulls			= ''
-# NULL or '' or other
-
-# quote 		= "  
-# how your strings are quoted in the CSV file
-
-file_is_utf8            = 0
-# how the datafile and database are encoded: utf8/unicode or not?
-# 0=NO   # if utf8, both data file and
-# 1=YES  # database must be in utf8
diff --git a/pgloader.py b/pgloader.py
new file mode 100644
index 0000000..dabcec6
--- /dev/null
+++ b/pgloader.py
@@ -0,0 +1,362 @@
+#! /usr/bin/env python
+# -*- coding: ISO-8859-15 -*-
+# Author: Dimitri Fontaine <dimitri@dalibo.com>
+
+"""
+PostgreSQL data import tool, aimed to replace and extands pgloader.
+
+Important features :
+ - CSV file format import using COPY
+ - multi-line input file
+ - configurable amount of rows per COPY instruction
+ - large object to TEXT or BYTEA field handling
+   (only informix blobs and clobs supported as of now)
+ - trailing slash optionnal removal (support informix UNLOAD file format)
+ - begin processing at any line in the file, by number or row id
+ - dry-run option, to validate input reading without connecting to database
+ - pedantic option, to stop processing on warning
+ - reject log and reject data files: you can reprocess refused data later
+ - COPY errors recovery via redoing COPY with half files until file is
+   one line long, then reject log this line
+
+Please read the fine manual page pg_import(1) for command line usage
+(options) and configuration file format.
+"""
+
+import os, sys, os.path, time, codecs
+from cStringIO import StringIO
+
+import pgloader.options
+import pgloader.tools
+
+def parse_options():
+    """ Parse given options """
+    import ConfigParser
+    from optparse import OptionParser
+
+    usage  = "%prog [-c <config_filename>] Section [Section ...]"
+    parser = OptionParser(usage = usage)
+    
+    parser.add_option("-c", "--config", dest = "config",
+                      default = "pgloader.conf",
+                      help    = "configuration file, defauts to pgloader.conf")
+
+    parser.add_option("-p", "--pedantic", action = "store_true",
+                      dest    = "pedantic",
+                      default = False,
+                      help    = "pedantic mode, stop processing on warning")
+
+    parser.add_option("-d", "--debug", action = "store_true",
+                      dest    = "debug",
+                      default = False,
+                      help    = "add some debug information (a lot of)")
+
+    parser.add_option("-v", "--verbose", action = "store_true",
+                      dest    = "verbose",
+                      default = False,
+                      help    = "be verbose and about processing progress")
+
+    parser.add_option("-n", "--dry-run", action = "store_true",
+                      dest    = "dryrun",
+                      default = False,
+                      help    = "simulate operations, don't connect to the db")
+
+    parser.add_option("-T", "--truncate", action = "store_true",
+                      dest = "truncate",
+                      default = False,
+                      help    = "truncate tables before importing data")
+
+    parser.add_option("-V", "--vacuum", action = "store_true",
+                      dest = "vacuum",
+                      default = False,
+                      help    = "vacuum database after having imported data")
+
+    parser.add_option("-C", "--count", dest = "count",
+                      default = None, type = "int",
+                      help    = "number of input lines to process")
+    
+    parser.add_option("-F", "--from", dest = "fromcount",
+                      default = 0, type = "int",
+                      help    = "number of input lines to skip")
+
+    parser.add_option("-I", "--from-id", dest = "fromid",
+                      default = None,
+                      help    = "wait for given id on input to begin")
+
+    parser.add_option("-E", "--encoding", dest = "encoding",
+                      default = None,
+                      help    = "input files encoding")
+
+    (opts, args) = parser.parse_args()
+
+    # check existence en read ability of config file
+    if not os.path.exists(opts.config):
+        print "Error: Configuration file %s does not exists" % opts.config
+        print parser.format_help()
+        sys.exit(1)
+
+    if not os.access(opts.config, os.R_OK):
+        print "Error: Can't read configuration file %s" % opts.config
+        print parser.format_help()
+        sys.exit(1)
+
+    if opts.verbose:
+        print 'Using %s configuration file' % opts.config
+
+    if opts.fromcount != 0 and opts.fromid is not None:
+        print "Error: Can't set both options fromcount (-F) AND fromid (-I)"
+        sys.exit(1)
+
+    pgloader.options.DRY_RUN    = opts.dryrun
+    pgloader.options.DEBUG      = opts.debug
+    # if debug, then verbose
+    pgloader.options.VERBOSE    = opts.verbose or opts.debug
+    pgloader.options.PEDANTIC   = opts.pedantic
+
+    pgloader.options.TRUNCATE   = opts.truncate
+    pgloader.options.VACUUM     = opts.vacuum
+    
+    pgloader.options.COUNT      = opts.count
+    pgloader.options.FROM_COUNT = opts.fromcount
+    pgloader.options.FROM_ID    = opts.fromid
+
+    pgloader.options.INPUT_ENCODING = opts.encoding
+
+    return opts.config, args
+
+def parse_config(conffile):
+    """ Parse the configuration file """
+    section = 'pgsql'
+
+    # Now read pgsql configuration section
+    import ConfigParser
+    config = ConfigParser.ConfigParser()
+
+    try:
+        config.read(conffile)
+    except:
+        print "Error: Given file is not a configuration file"
+        sys.exit(4)
+
+    if not config.has_section(section):
+        print "Error: Please provide a [%s] section" % section
+        sys.exit(5)
+
+    # load some options
+    # this has to be done after command line parsing
+    from pgloader.options  import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
+    from pgloader.options  import NULL, EMPTY_STRING
+
+    if DRY_RUN:
+        if VERBOSE:
+            print "Notice: dry run mode, not connecting to database"
+        return config, None
+
+    try:
+        from pgloader.db import db
+        
+        dbconn = db(config.get(section, 'host'),
+                    config.getint(section, 'port'),
+                    config.get(section, 'base'),
+                    config.get(section, 'user'),
+                    config.get(section, 'pass'),
+                    connect = False)
+
+        if config.has_option(section, 'client_encoding'):
+            dbconn.client_encoding = config.get(section, 'client_encoding')
+
+        if config.has_option(section, 'copy_every'):
+            dbconn.copy_every = config.getint(section, 'copy_every')
+
+        if config.has_option(section, 'commit_every'):
+            dbconn.commit_every = config.getint(section, 'commit_every')
+
+        if config.has_option(section, 'copy_delimiter'):
+            dbconn.copy_sep = config.get(section, 'copy_delimiter')
+
+        # Then there are null and empty_string optionnal parameters
+        # They canbe overriden in specific table configuration
+        if config.has_option(section, 'null'):
+            pgloader.options.NULL = pgloader.tools.parse_config_string(
+                config.get(section, 'null'))
+
+        if config.has_option(section, 'empty_string'):
+            pgloader.options.EMPTY_STRING = pgloader.tools.parse_config_string(
+                config.get(section, 'empty_string'))
+
+        # optionnal global newline_escapes
+        if config.has_option(section, 'newline_escapes'):
+            setting = pgloader.tools.parse_config_string(
+                config.get(section, 'newline_escapes'))
+            pgloader.options.NEWLINE_ESCAPES = setting
+            
+    except Exception, error:
+        print "Error: Could not initialize PostgreSQL connection:"
+        print error
+        sys.exit(6)
+
+    return config, dbconn
+
+def myprint(l, line_prefix = "  ", cols = 78):
+    """ pretty print list l elements """
+    # some code for pretty print
+    tmp = line_prefix
+    for e in l:
+        if len(tmp) + len(e) > cols:
+            print tmp
+            tmp = line_prefix
+            
+        if tmp != line_prefix: tmp += " "
+        tmp += e
+    print tmp
+
+def duration_pprint(duration):
+    """ pretty print duration (human readable information) """
+    if duration > 3600:
+        h  = int(duration / 3600)
+        m  = int((duration - 3600 * h) / 60)
+        s  = duration - 3600 * h - 60 * m + 0.5
+        return '%2dh%02dm%03.1f' % (h, m, s)
+    
+    elif duration > 60:
+        m  = int(duration / 60)
+        s  = duration - 60 * m
+        return ' %02dm%06.3f' % (m, s)
+        
+    else:
+        return '%10.3f' % duration
+
+def load_data():
+    """ read option line and configuration file, then process data
+    import of given section, or all sections if no section is given on
+    command line """
+
+    # first parse command line options, and set pgloader.options values
+    # accordingly
+    conffile, args = parse_options()
+
+    # now init db connection
+    config, dbconn = parse_config(conffile)
+
+    # load some pgloader package modules
+    from pgloader.options  import DRY_RUN, VERBOSE, DEBUG, PEDANTIC, VACUUM
+    from pgloader.pgloader import PGLoader
+    from pgloader.tools    import PGLoader_Error
+
+    sections = []
+    summary  = {}
+
+    # args are meant to be configuration sections
+    if len(args) > 0:
+        for s in args:
+            if config.has_section(s):
+                sections.append(s)
+
+    else:
+        for s in config.sections():
+            if s != 'pgsql':
+                sections.append(s)
+
+    if VERBOSE:
+        print 'Will consider following sections:'
+        myprint(sections)
+
+    # we count time passed from now on
+    begin = time.time()
+
+    # we run through sorted section list
+    sections.sort()
+    for s in sections:
+        try:
+            pgloader = PGLoader(s, config, dbconn)
+            pgloader.run()
+            
+            summary[s] = (pgloader.name,) + pgloader.summary()
+        except PGLoader_Error, e:
+            if e == '':
+                print '[%s] Please correct previous errors' % s
+            else:
+                print
+                print 'Error: %s' % e
+
+            if PEDANTIC:
+                pgloader.print_stats()
+
+        except KeyboardInterrupt:
+            print "Aborting on user demand (Interrupt)"
+
+    # total duration
+    td = time.time() - begin
+
+    retcode = 0
+    
+    # print a pretty summary
+    t= 'Table name        |    duration |    size |    updates |     errors '
+    _= '===================================================================='
+
+    tu = te = ts = 0 # total updates, errors, size
+    if not DRY_RUN:
+        dbconn.reset()
+        cursor = dbconn.dbconn.cursor()
+
+    s_ok = 0
+    for s in sections:
+        if s not in summary:
+            continue
+
+        s_ok += 1
+        if s_ok == 1:
+            # print pretty sumary header now
+            print
+            print t
+            print _
+        
+        t, d, u, e = summary[s]
+        d = duration_pprint(d)
+
+        if not DRY_RUN:
+            sql = "select pg_total_relation_size(%s), " + \
+                  "pg_size_pretty(pg_total_relation_size(%s));"
+            cursor.execute(sql, [t, t])
+            octets, s = cursor.fetchone()
+            ts += octets
+            
+            if s[5:] == 'bytes': s = s[:-5] + ' B'
+        else:
+            s = '-'
+        
+        print '%-18s| %ss | %7s | %10d | %10d' % (t, d, s, u, e)
+
+        tu += u
+        te += e
+
+        if e > 0:
+            retcode += 1
+
+    if s_ok > 1:
+        td = duration_pprint(td)
+
+        # pretty size
+        cursor.execute("select pg_size_pretty(%s);", [ts])
+        [ts] = cursor.fetchone()
+        if ts[5:] == 'bytes': ts = ts[:-5] + ' B'
+        
+        print _
+        print 'Total             | %ss | %7s | %10d | %10d' % (td, ts, tu, te)
+
+        if not DRY_RUN:
+            cursor.close()
+
+    print
+    if VACUUM and not DRY_RUN:
+        print 'vacuumdb... '
+        try:
+            dbconn.vacuum()
+        except KeyboardInterrupt:
+            pass    
+
+    return retcode
+
+if __name__ == "__main__":
+    sys.exit(load_data())
+