* Support for datestyle setting

* Support for omiting column numbering * Change documentation source format from SGML to asciidoc * New version 2.2.1
2026-05-04 10:31:02 +02:00 · 2007-08-23 10:38:09 +00:00 · 2007-08-23 10:38:09 +00:00 · 921db51d65
commit 921db51d65
parent 8ed8219e37
16 changed files with 559 additions and 882 deletions
--- a/14
+++ b/14
@ -1,5 +1,4 @@
-DOCS = pgloader.1.sgml
-GARBAGE = manpage.links manpage.refs
+DOCS = pgloader.1.txt

 # debian setting
 DESTDIR =
@ -18,6 +17,11 @@ install:
 	cp -a $(libs) $(libdir)/pgloader
 	cp -a $(examples) $(exdir)

-man: $(DOCS)
-	docbook2man $(DOCS) 2>/dev/null
-	-rm -f $(GARBAGE)
+html: $(DOCS)
+	asciidoc -a toc $<
+
+pgloader.1.xml: $(DOCS)
+	asciidoc -d manpage -b docbook $<
+
+man: pgloader.1.xml
+	xmlto man $<
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,11 @@
+pgloader (2.2.1) unstable; urgency=low
+
+  * Support for datestyle setting
+  * Support for omiting column numbering
+  * Change documentation source format from SGML to asciidoc
+
+ -- Dimitri Fontaine <dim@tapoueh.org>  Thu, 23 Aug 2007 12:35:34 +0200
+
 pgloader (2.2.0) unstable; urgency=low

  * Support for partial loading of data (subrange(s) of columns)
--- a/debian/files
+++ b/debian/files
@ -1 +1 @@
-pgloader_2.0.2_all.deb misc extra
+pgloader_2.2.0_all.deb misc extra
--- a/examples/pgloader.conf
+++ b/examples/pgloader.conf
@ -18,6 +18,7 @@ newline_escapes = \
 [simple]
 table        = simple
 format       = text
+datestyle    = dmy
 filename     = simple/simple.data
 field_sep    = |
 trailing_sep = True
@ -75,6 +76,6 @@ format       = csv
 filename     = csv/csv.data
 field_sep    = ,
 quotechar    = "
-columns      = x:1, y:2, a:3, b:4, c:5, d:6
+columns      = x, y, a, b, d:6, c:5
 only_cols    = 3-6

--- a/examples/simple/simple.data
+++ b/examples/simple/simple.data
@ -1,7 +1,7 @@
 1|some first row text|2006-11-11|
-2|some second row text|2006-11-11|
-3|some third row text|2006-10-12|
+2|some second row text|13/11/2006|
+3|some third row text|12-10-2006|
 4|\ |2006-10-4|
 5|some fifth row text|2006-5-12|
-6|some sixth row text|2006-7-10|
+6|some sixth row text|10/7/6|
 7|some null date to play with||
--- a/pgloader.1.sgml
+++ b/pgloader.1.sgml
@ -1,853 +0,0 @@
-<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
-<refentry>
-  <refentryinfo>
-    <address>
-      <email>dim@tapoueh.org</email>
-    </address>
-    <author>
-      <firstname>Dimitri</firstname>
-      <surname>Fontaine</surname>
-    </author>
-    <date>August 2006</date>
-    <copyright>
-      <year>2006</year>
-      <holder>Dimitri Fontaine</holder>
-    </copyright>
-  </refentryinfo>
-
-  <refmeta>
-    <refentrytitle>pgloader</refentrytitle>
-    <manvolnum>1</manvolnum>
-  </refmeta>
-
-  <refnamediv>
-    <refname>pgloader</refname>
-    <refpurpose>
-Import CSV data and Large Object to PostgreSQL
-    </refpurpose>
-  </refnamediv>
-
-  <refsynopsisdiv>
-    <cmdsynopsis>
-      <command>pgloader</command>
-      <arg><option>-c</option> configuration file</arg>
-      <arg><option>-p</option> pedantic</arg>
-      <arg><option>-d</option> debug</arg>
-      <arg><option>-v</option> verbose</arg>
-      <arg><option>-n</option> dry run</arg>
-      <arg><option>-Cn</option> count</arg>
-      <arg><option>-Fn</option> from</arg>
-      <arg><option>-In</option> from id</arg>
-      <arg><option>-E</option> input files encoding</arg>
-      <arg>Section1 Section2</arg>
-    </cmdsynopsis>
-  </refsynopsisdiv>
-
-  <refsect1>
-    <title>DESCRIPTION</title>
-    <para>
-      <command>pgloader</command> imports data from a flat file and
-      insert it into a database table. It uses a flat file per
-      database table, and you can configure as many Sections as you
-      want, each one associating a table name and a data file.
-    </para>
-
-    <para>
-      Data are parsed and rewritten, then given to PostgreSQL
-      <command>COPY</command> command. Parsing is necessary for
-      dealing with end of lines and eventual trailing separator
-      characters, and for column reordering: your flat data file may
-      not have the same column order as the databse table has.
-    </para>
-
-    <para>
-      <command>pgloader</command> is also able to load some large
-      objects data into PostgreSQL, as of now only Informix
-      <command>UNLOAD</command> data files are supported. This command
-      gives large objects data location information into the main data
-      file. <command>pgloader</command> parse it and produces and SQL
-      UPDATE order per large object, and commit those orders once
-      every <command>commit_every</command> configuration parameter.
-    </para>
-
-    <para>
-      <command>pgloader</command> issue some timing statistics
-      every <command>commit_every</command> commits (see Configuration
-      for this setting). At the end of each section processing, a
-      summary of overall operations, numbers of updates and commits,
-      time it took in seconds, errors logged and database errors is
-      issued.
-    </para>
-  </refsect1>
-
-  <refsect1>
-    <title>OPTIONS</title>
-
-    <para>
-      In order for <command>pgloader</command> to run, you have to
-      edit a configuration file (see Configuration) consisting of
-      Section definitions. Each section refers to a PostgreSQL table
-      into which some data is to be loaded.
-    </para>
-
-    <variablelist>
-      <varlistentry>
-        <term><option>-c</option></term>
-        <term><option>--config</option></term>
-        <listitem>
-          <para>
-	    specifies the configuration file to use. The default file
-	    name is <filename>pgloader.conf</filename>, searched into
-	    current working directory.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-p</option></term>
-        <term><option>--pedantic</option></term>
-        <listitem>
-          <para>
-	    activates the <command>pedantic</command> mode, where any
-	    warning is considered as a fatal error, thus stopping the
-	    processing of the input file.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-d</option></term>
-        <term><option>--debug</option></term>
-        <listitem>
-          <para>
-	    makes <command>pgloader</command> say it all about what it
-	    does. debug implies verbose.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-v</option></term>
-        <term><option>--verbose</option></term>
-        <listitem>
-          <para>
-	    makes <command>pgloader</command> very verbose about
-	    what it does.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-n</option></term>
-        <term><option>--dry-run</option></term>
-        <listitem>
-          <para>
-	    makes <command>pgloader</command> simulate operations,
-	    that implies no database connection and no data extraction
-	    from blob files.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-T</option></term>
-        <term><option>--truncate</option></term>
-        <listitem>
-          <para>
-	    makes <command>pgloader</command> issue a truncate SQL
-	    command before importing data.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-V</option></term>
-        <term><option>--vacuum</option></term>
-        <listitem>
-          <para>
-	    makes <command>pgloader</command> issue a vacuum full
-	    verbose analyse SQL command before importing data.
-	  </para>
-	  <para>
-	    This vacuum is run from shell command
-	    <command>/usr/bin/vacuumdb</command> with connection
-	    informations taken from configuration file (see
-	    Configuration section of this manual page), but without
-	    password prompting. If you use this option, please
-	    configure your <filename>pg_hba.conf</filename> in a way
-	    no password is prompted (trust).
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-C</option></term>
-        <term><option>--count</option></term>
-        <listitem>
-          <para>
-	    Number of input lines to process, default is to process
-	    all the input lines.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-F</option></term>
-        <term><option>--from</option></term>
-        <listitem>
-          <para>
-	    Input line number from which we begin to process (and
-	    count). <command>pgloader</command> will skip all
-	    preceding lines.
-	  </para>
-	  <para>
-	    You can't use both <option>-F</option> and
-	    <option>-I</option> at the same time.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-I</option></term>
-        <term><option>--from-id</option></term>
-        <listitem>
-          <para>
-	    From which <command>id</command> do we begin to process
-	    (and count) input lines.
-	  </para>
-	  <para>
-	    When a composite key is used, you have to give each column
-	    of the key separated by comma, on the form col_name=value.
-	  </para>
-	  <para>
-	    Please notice using the <command>--from-id</command>
-	    option implies <command>pgloader</command> will try to get
-	    row id of each row, it being on the interval processed or
-	    not. This could have some performance impact, and you may
-	    end up prefering to use <command>--from</command> instead.
-	  </para>
-	  <para>
-	    Example: <command>pgloader -I col1:val1,col2:val2</command>
-	  </para>
-	  <para>
-	    You can't use both <option>-F</option> and
-	    <option>-I</option> at the same time.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>-E</option></term>
-        <term><option>--encoding</option></term>
-        <listitem>
-          <para>
-	    Input data files encoding. Defaults to 'latin9'.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>Section</option></term>
-        <listitem>
-          <para>
-	    is the name of a configured Section describing some data
-	    to load
-	  </para>
-	  <para>
-	    Section arguments are optionnal, if no section is given
-	    all configured sections are processed.
-	  </para>
-        </listitem>
-      </varlistentry>
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>GLOBAL CONFIGURATION SECTION</title>
-    <para>
-      The configuration file has a .ini file syntax, its first section
-      has to be the <command>pgsql</command> one, defining how to
-      access to the PostgreSQL database server where to load
-      data. Then you may define any number of sections, each one
-      describing a data loading task to be performed by
-      <command>pgloader</command>.
-    </para>
-
-    <para>
-      The <command>[pgsql]</command> section has the following
-      options, which all must be set.
-    </para>
-    <variablelist>
-      <varlistentry>
-        <term><option>host</option></term>
-        <listitem>
-          <para>
-	    PostgreSQL database server name, for example
-	    <filename>localhost</filename>.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>port</option></term>
-        <listitem>
-          <para>
-	    PostgreSQL database server listening port, 5432. You have
-	    to fill this entry.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>base</option></term>
-        <listitem>
-          <para>
-	    The name of the database you want to load data into.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>user</option></term>
-        <listitem>
-          <para>
-	    Connecting PostgreSQL user name.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>pass</option></term>
-        <listitem>
-          <para>
-	    The password of the user. The better is to grant a
-	    <command>trust</command> access privilege in PostgreSQL
-	    <filename>pg_hba.conf</filename>. Then you can set this
-	    entry to whatever value you want to.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>client_encoding</option></term>
-        <listitem>
-          <para>
-	    Set this parameter to have <command>pgloader</command>
-	    connects to PostgreSQL using this encoding.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to 'latin9'.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>copy_every</option></term>
-        <listitem>
-          <para>
-	    When issuing <command>COPY</command> PostgreSQL commands,
-	    <command>pgloader</command> will not make a single big
-	    COPY attempt, but copy <command>copy_every</command> lines
-	    at a time.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to 10000.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>commit_every</option></term>
-        <listitem>
-          <para>
-	    PostgreSQL <command>COMMIT</command> frequency, exprimed
-	    in <command>UPDATE</command> orders. A good value is 1000,
-	    that means commiting the SQL transaction every 1000 input
-	    lines.
-	  </para>
-	  <para>
-	    <command>pgloader</command> issues commit every
-	    commit_every updates, on connection closing and when a SQL
-	    error occurs.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to 1000.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>copy_delimiter</option></term>
-        <listitem>
-          <para>
-	    The field separator to use in COPY FROM produced statements. If
-	    you don't specify this, the same separator as the one given in
-	    <command>field_sep</command> parameter will be used.
-	  </para>
-	  <para>
-	    Please note <command>PostgreSQL</command> requires a single char
-	    properly encoded (see your <command>client_encoding</command>
-	    parameter), or it abort in error and even may crash.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to
-	    <command>field_sep</command>.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>newline_escapes</option></term>
-        <listitem>
-          <para>
-	    For parameter effect description, see below (same name, table
-	    local setting).
-	  </para>
-          <para>
-	    You can setup here a global escape caracter, to be
-	    considered on each and every column of each and every
-	    text-format table defined thereafter.
-	  </para>
-	</listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>null</option></term>
-        <listitem>
-          <para>
-	    You can configure here how <command>null</command> value is
-	    represented into your flat data file.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to
-	    <command>''</command> (that is empty string).
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>empty_string</option></term>
-        <listitem>
-          <para>
-	    You can configure here how empty values are represented into
-	    your flat data file.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to <command>'\
-	    '</command> (that is backslash followed by space).
-	  </para>
-        </listitem>
-      </varlistentry>
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>COMMON FORMAT CONFIGURATION PARAMETERS</title>
-    <para>
-      You then can define any number of data section, and give them an
-      arbitrary name. Some options are required, some are actually
-      optionnals, in which case it is said so thereafter.
-    </para>
-    <para>
-      First, we'll go through common parameters, applicable whichever
-      format of data you're refering to. Then text-format only
-      parameters will be presented, followed by csv-only parameters.
-    </para>
-    <variablelist>
-      <varlistentry>
-        <term><option>table</option></term>
-        <listitem>
-          <para>
-	    The table name of the database where to load data.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>format</option></term>
-        <listitem>
-          <para>
-	    The format data are to be found, either
-	    <command>text</command> or <command>csv</command>.
-	  </para>
-	  <para>
-	    See next sections for format specific options.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>filename</option></term>
-        <listitem>
-          <para>
-	    The absolute path to the input data file. The large object
-	    files are to be found into the same directory. Their name
-	    can be in the form [bc]lob[0-9a-f]{4}.[0-9a-f]{3}, but
-	    this information is not used by
-	    <command>pgloader</command>.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>reject_log</option></term>
-        <listitem>
-          <para>
-	    In case of errors processing input data, a human readable
-	    log per rejected input data line is produced into the
-	    reject_log file.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>reject_data</option></term>
-        <listitem>
-          <para>
-	    In case of errors processing input data, the rejected
-	    input line is appended to the reject_data file.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>field_sep</option></term>
-        <listitem>
-          <para>
-	    The field separator used into the data file. The same
-	    separator will be used by the generated
-	    <command>COPY</command> commands, thus
-	    <command>pgloader</command> does not have to deal with
-	    escaping the delimiter it uses (input data has to have
-	    escaped it).
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to pipe char '|'.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>client_encoding</option></term>
-        <listitem>
-          <para>
-	    Set this parameter to have <command>pgloader</command>
-	    connects to PostgreSQL using this encoding.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to 'latin9'. If defined
-	    on a table level, this local value will overwritte the global
-	    one.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>null</option></term>
-        <listitem>
-          <para>
-	    You can configure here how <command>null</command> value is
-	    represented into your flat data file.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to
-	    <command>''</command> (that is empty string). If defined on a
-	    table level, this local value will overwritte the global one.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>empty_string</option></term>
-        <listitem>
-          <para>
-	    You can configure here how empty values are represented into
-	    your flat data file.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to <command>'\
-	    '</command> (that is backslash followed by space). If defined on
-	    a table level, this local value will overwritte the global one.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>index</option></term>
-        <listitem>
-          <para>
-	    Table index definition, to be used in blob UPDATE'ing. You
-	    define an index column by giving its name and its column
-	    number (as found into your data file, and counting from 1)
-	    separated by a colon.  If your table has a composite key,
-	    then you can define multiple columns here, separated by a
-	    comma.
-	  </para>
-	  <para>
-	    index = colname:3, other_colname:5
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>columns</option></term>
-        <listitem>
-          <para>
-	    You can define here table columns, with the same
-	    definition format as in previous <command>index</command>
-	    parameter.
-	  </para>
-	  <para>
-	    Note you'll have to define here all the columns to be
-	    found in data file, whether you want to use them all or
-	    not. When not using them all, use the
-	    <command>only_cols</command> parameter to restrict.
-	  </para>
-	  <para>
-	    As of <command>pgloader 2.2</command> the column list used
-	    might not be the same as the table columns definition.
-	  </para>
-	  <para>
-	    In case you have a lot a columns per table, you will want
-	    to use multiple lines for this parameter value. Python
-	    <command>ConfigParser</command> module knows how to read
-	    multi-line parameters, you don't have to escape anything.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>only_cols</option></term>
-        <listitem>
-          <para>
-	    If you want to only load a part of the columns you have
-	    into the data file, this option let you define which
-	    columns you're interrested in. <command>only_col</command>
-	    is a comma separated list of ranges or values, as in
-	    following example.
-	  </para>
-          <para>
-	    only_cols = 1-3, 5
-	  </para>
-          <para>
-	    This parameter is optionnal and defaults to the list of
-	    all columns given on the <command>columns</command>
-	    parameter list, in the colname order.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>blob_columns</option></term>
-        <listitem>
-          <para>
-	    The definition of the colums where to find some blob or
-	    clob reference. This definition is composed by a table
-	    column name, a column number (couting from one) reference
-	    into the Informix <command>UNLOAD</command> data file, and
-	    a large object type, separated by a colon. You can have
-	    several columns in this field, separated by a
-	    comma.
-	  </para>
-	  <para>
-	    Supported large objects type are Informix blob and clob,
-	    the awaited configuration string are respectively
-	    <command>ifx_blob</command> for binary (bytea) content
-	    type and <command>ifx_clob</command> for text type values.
-	  </para>
-	  <para>
-	    Here's an example:
-	  </para>
-	  <para>
-	    blob_type = clob_column:3:ifx_blob, other_clob_column:5:ifx_clob
-	  </para>
-        </listitem>
-      </varlistentry>
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>TEXT FORMAT CONFIGURATION PARAMETERS</title>
-    <variablelist>
-      <varlistentry>
-        <term><option>field_count</option></term>
-        <listitem>
-          <para>
-	    The <command>UNLOAD</command> command does not escape
-	    newlines when they appear into table data. Hence, you may
-	    obtain multi-line data files, where a single database row
-	    (say tuple if you prefer to) can span multiple physical
-	    lines into the unloaded file.
-	  </para>
-	  <para>
-	    If this is your case, you may want to configure here the
-	    number of columns per tuple. Then
-	    <command>pgloader</command> will count columns and
-	    buffer line input in order to re-assemble several physical
-	    lines into one data row when needed.
-	  </para>
-	  <para>
-	    This parameter is optionnal.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>trailing_sep</option></term>
-        <listitem>
-          <para>
-	    If this option is set to <command>True</command>, the
-	    input data file is known to append a
-	    <command>field_sep</command> as the last character of each
-	    of its lines. With this option set, this last character is
-	    then not considered as a field separator.
-	  </para>
-	  <para>
-	    This parameter is optionnal and defaults to False.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>newline_escapes</option></term>
-        <listitem>
-          <para>
-	    Sometimes the input data file has field values containing
-	    newlines, and the export program used (as Informix
-	    <command>UNLOAD</command> command) escape in-field
-	    newlines. So you want <command>pgloader</command> to keep
-	    those newlines, while at the same time preserving them.
-	  </para>
-	  <para>
-	    This option does the described work on specified fields
-	    and considering the escaping character you configure,
-	    following this syntax:
-	  </para>
-	  <para>
-	    newline_escapes = colname:\, other_colname:§
-	  </para>
-	  <para>
-	    This parameter is optionnal, and the extra work is only
-	    done when set. You can configure
-	    <command>newline_escapes</command> for as many fields as
-	    necessary, and you may configure a different escaping
-	    character each time.
-	  </para>
-	  <para>
-	    Please note that at the moment,
-	    <command>pgloader</command> does only support one
-	    character length <command>newline_escapes</command>.
-	  </para>
-	  <para>
-	    When both a global (see <command>[pgsql]</command> section)
-	    <command>newline_escapes</command> parameter and a table local
-	    one are set, <command>pgloader</command> issues a warning and
-	    only consider the global setting.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>CSV FORMAT CONFIGURATION PARAMETERS</title>
-
-    <variablelist>
-      <varlistentry>
-        <term><option>doublequote</option></term>
-        <listitem>
-	  <para>
-	    Controls how instances of quotechar appearing inside a
-	    field should be themselves be quoted. When True, the
-	    character is doubled. When False, the escapechar is used
-	    as a prefix to the quotechar. It defaults to True.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>escapechar</option></term>
-        <listitem>
-          <para>
-	    A one-character string used by the writer to escape the
-	    delimiter if quoting is set to QUOTE_NONE and the
-	    quotechar if doublequote is False. On reading, the
-	    escapechar removes any special meaning from the following
-	    character. It defaults to None, which disables escaping.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>quotechar</option></term>
-        <listitem>
-          <para>
-	    A one-character string used to quote fields containing
-	    special characters, such as the delimiter or quotechar, or
-	    which contain new-line characters. It defaults to '"'.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><option>skipinitialspace</option></term>
-        <listitem>
-          <para>
-	    When True, whitespace immediately following the delimiter
-	    is ignored. The default is False.
-	  </para>
-        </listitem>
-      </varlistentry>
-
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>CONFIGURATION EXAMPLE</title>
-    <para>
-      Please see the given configuration example which should be distributed
-      in
-      <filename>/usr/share/doc/pgloader/examples/pgloader.conf</filename>.
-    </para>
-  </refsect1>
-
-  <refsect1>
-    <title>HISTORY</title>
-    <para>
-      <command>pgloader</command> was at first an Informix to
-      PostgreSQL migration helper which imported Informix large
-      objects directly into a PostgreSQL database.
-    </para>
-
-    <para>
-      Then as we got some data we couldn't file tools to care about,
-      we decided <command>ifx_blob</command> would become
-      <command>pgloader</command>, as it had to be able to import all
-      Informix UNLOAD data. Those data contains escaped separator into
-      unquoted data field and multi-lines fields (\r and \n are not
-      escaped).
-    </para>
-  </refsect1>
-
-  <refsect1>
-    <title>BUGS</title>
-    <para>
-      Please report bugs to Dimitri Fontaine &lt;dim@tapoueh.org&gt;.
-    </para>
-    <para>
-      When last line is alone on a <command>COPY</command> command and its
-      parsing ends in error (not enough columns read for example), no
-      information is given back by <command>pgloader</command>.
-    </para>
-  </refsect1>
-
-  <refsect1>
-    <title>AUTHORS</title>
-    <para>
-      <command>pgloader</command> is written by <author>Dimitri
-      Fontaine</author> <email>dim@tapoueh.org</email>.
-    </para>
-  </refsect1>
-  
-</refentry>
--- a/pgloader.1.txt
+++ b/pgloader.1.txt
@ -0,0 +1,486 @@
+= pgloader(1) =
+
+== NAME ==
+
+pgloader - Import CSV data and Large Object to PostgreSQL
+
+== SYNOPSIS ==
+
+  pgloader [-c configuration file] [-p pedantic] [-d debug] [-v verbose]
+           [-n dryrun] [-Cn count] [-Fn from] [-In from id] [-E input files encoding]
+           [Section1 Section2]
+
+== DESCRIPTION ==
+
+pgloader+ imports data from a flat file and insert it into a database
+table. It uses a flat file per database table, and you can configure
+as many Sections as you want, each one associating a table name and a
+data file.
+
+Data are parsed and rewritten, then given to PostgreSQL +COPY+
+command. Parsing is necessary for dealing with end of lines and
+eventual trailing separator characters, and for column reordering:
+your flat data file may not have the same column order as the database
+table has.
+
+pgloader+ is also able to load some large objects data into
+PostgreSQL, as of now only Informix +UNLOAD+ data files are
+supported. This command gives large objects data location information
+into the main data file. +pgloader+ parse it add the +text+ or +bytea+
+content properly escaped to the +COPY+ data.
+
+pgloader+ issue some timing statistics every +commit_every+ commits
+(see Configuration for this setting). At the end of each section
+processing, a summary of overall operations, numbers of updates and
+commits, time it took in seconds, errors logged and database errors is
+issued.
+
+pgloader+ is available from +pgfoundry+ at
+http://pgfoundry.org/projects/pgloader/[], where you'll find a debian
+package, a source package and an anonymous CVS.
+
+== OPTIONS ==
+
+In order for pgloader to run, you have to edit a configuration file
+(see Configuration) consisting of Section definitions. Each section
+refers to a PostgreSQL table into which some data is to be loaded.
+
+-c, --config::
+
+    specifies the configuration file to use. The default file name is
+    pgloader.conf, searched into current working directory.
+
+-p, --pedantic::
+
+    activates the pedantic mode, where any warning is considered as a fatal
+    error, thus stopping the processing of the input file.
+
+-d, --debug::
+
+    makes pgloader say it all about what it does. debug implies verbose.
+
+-v, --verbose::
+
+    makes pgloader very verbose about what it does.
+
+-n, --dry-run::
+
+    makes pgloader simulate operations, that implies no database connection and
+    no data extraction from blob files.
+
+-T, --truncate::
+
+    makes pgloader issue a truncate SQL command before importing data.
+
+-V, --vacuum::
+
+makes pgloader issue a vacuum full verbose analyse SQL command before
+importing data.
+ 
+This vacuum is run from shell command /usr/bin/vacuumdb with
+connection informations taken from configuration file (see
+Configuration section of this manual page), but without password
+prompting. If you use this option, please configure your pg_hba.conf
+in a way no password is prompted (trust).
+
+-C, --count::
+
+    Number of input lines to process, default is to process all the input
+    lines.
+
+-F, --from::
+
+Input line number from which we begin to process (and count). pgloader
+will skip all preceding lines.
+
+You can't use both -F and -I at the same time.
+
+-I, --from-id::
+
+From which id do we begin to process (and count) input lines.
+
+When a composite key is used, you have to give each column of the key
+separated by comma, on the form col_name=value.
+
+Please notice using the --from-id option implies pgloader will try to
+get row id of each row, it being on the interval processed or
+not. This could have some performance impact, and you may end up
+prefering to use --from instead.
+
+  Example: pgloader -I col1:val1,col2:val2
+
+You can't use both -F and -I at the same time.
+
+-E, --encoding::
+
+    Input data files encoding. Defaults to 'latin9'.
+
+Section::
+
+is the name of a configured Section describing some data to load
+
+Section arguments are optionnal, if no section is given all configured
+sections are processed.
+
+== GLOBAL CONFIGURATION SECTION ==
+
+The configuration file has a .ini file syntax, its first section has
+to be the pgsql one, defining how to access to the PostgreSQL database
+server where to load data. Then you may define any number of sections,
+each one describing a data loading task to be performed by pgloader.
+
+The [pgsql] section has the following options, which all must be set.
+
+host::
+
+    PostgreSQL database server name, for example localhost.
+
+port::
+
+    PostgreSQL database server listening port, 5432. You have to fill this
+    entry.
+
+base::
+
+    The name of the database you want to load data into.
+
+user::
+
+    Connecting PostgreSQL user name.
+
+pass::
+
+    The password of the user. The better is to grant a trust access privilege
+    in PostgreSQL pg_hba.conf. Then you can set this entry to whatever value
+    you want to.
+
+client_encoding::
+
+Set this parameter to have pgloader connects to PostgreSQL using this
+encoding.
+
+This parameter is optionnal and defaults to 'latin9'.
+
+datestyle::
+
+Set this parameter to have pgloader connects to PostgreSQL using this
+datestyle setting.
+
+This parameter is optionnal and has no default value, thus pgloader will
+use whatever your PostgreSQL is configured to as default.
+
+copy_every::
+
+When issuing +COPY+ PostgreSQL commands, pgloader will not make a
+single big +COPY+ attempt, but copy copy_every lines at a time.
+
+This parameter is optionnal and defaults to 10000.
+
+commit_every::
+
+PostgreSQL +COMMIT+ frequency, exprimed in +UPDATE+ orders. A good
+value is 1000, that means commiting the SQL transaction every 1000
+input lines.
+
+pgloader+ issues commit every +commit_every+ updates, on connection
+closing and when a SQL error occurs.
+
+This parameter is optionnal and defaults to 1000.
+
+copy_delimiter::
+
+The field separator to use in +COPY FROM+ produced statements. If you
+don't specify this, the same separator as the one given in +field_sep+
+parameter will be used.
+
+Please note PostgreSQL requires a single char properly encoded (see
+your +client_encoding+ parameter), or it abort in error and even may
+crash.
+
+This parameter is optionnal and defaults to +field_sep+.
+
+newline_escapes::
+
+For parameter effect description, see below (same name, table local
+setting).
+
+You can setup here a global escape caracter, to be considered on each
+and every column of each and every text-format table defined
+thereafter.
+
+null::
+
+You can configure here how null value is represented into your flat
+data file.
+
+This parameter is optionnal and defaults to '' (that is +empty string+).
+
+empty_string::
+
+You can configure here how empty values are represented into your flat
+data file.
+
+This parameter is optionnal and defaults to '\ ' (that is backslash
+followed by space).
+
+
+== COMMON FORMAT CONFIGURATION PARAMETERS ==
+
+You then can define any number of data section, and give them an arbitrary
+name. Some options are required, some are actually optionnals, in which case it
+is said so thereafter.
+
+First, we'll go through common parameters, applicable whichever format of data
+you're refering to. Then text-format only parameters will be presented,
+followed by csv-only parameters.
+
+table::
+
+    The table name of the database where to load data.
+
+format::
+
+The format data are to be found, either text or csv.
+
+See next sections for format specific options.
+
+filename::
+
+    The absolute path to the input data file. The large object files
+    are to be found into the same directory. Their name can be in the
+    form +[bc]lob[0-9a-f]{4}.[0-9a-f]{3}+, but this information is not
+    used by +pgloader+.
+
+reject_log::
+
+    In case of errors processing input data, a human readable log per rejected
+    input data line is produced into the +reject_log+ file.
+
+reject_data::
+
+    In case of errors processing input data, the rejected input line is
+    appended to the +reject_data+ file.
+
+field_sep::
+
+The field separator used into the data file. The same separator will
+be used by the generated +COPY+ commands, thus +pgloader+ does not
+have to deal with escaping the delimiter it uses (input data has to
+have escaped it).
+
+This parameter is optionnal and defaults to pipe char '|'.
+
+client_encoding::
+
+Set this parameter to have pgloader connects to PostgreSQL using this
+encoding.
+
+This parameter is optionnal and defaults to 'latin9'. If defined on a
+table level, this local value will overwritte the global one.
+
+datestyle::
+
+Set this parameter to have pgloader connects to PostgreSQL using this
+datestyle+ setting.
+
+This parameter is optionnal and has no default. If defined on a table
+level, this local value will overwritte the global one.
+
+null::
+
+You can configure here how null value is represented into your flat
+data file.
+
+This parameter is optionnal and defaults to +''+ (that is empty
+string). If defined on a table level, this local value will overwritte
+the global one.
+
+empty_string::
+
+You can configure here how empty values are represented into your flat
+data file.
+
+This parameter is optionnal and defaults to '\ ' (that is backslash
+followed by space). If defined on a table level, this local value will
+overwritte the global one.
+
+index::
+
+Table index definition, to be used in blob +UPDATE+'ing. You define an
+index column by giving its name and its column number (as found into
+your data file, and counting from 1) separated by a colon. If your
+table has a composite key, then you can define multiple columns here,
+separated by a comma.
+
+    index = colname:3, other_colname:5
+
+columns::
+
+You can define here table columns, with the same definition format as
+in previous index parameter.
+
+Note you'll have to define here all the columns to be found in data
+file, whether you want to use them all or not. When not using them
+all, use the +only_cols+ parameter to restrict.
+
+As of +pgloader 2.2+ the column list used might not be the same as the
+table columns definition.
+
+As of +pgloader 2.2.1+ you can omit column numbering if you want to, a
+counter is then maintained for you, starting from 1 and set to +last
+value + 1+ on each column, where +last value+ was either computed or
+given in the config. So you can even omit only 'some' columns in
+there.
+
+    columns = x, y, a, b, d:6, c:5
+
+In case you have a lot a columns per table, you will want to use
+multiple lines for this parameter value. Python ConfigParser module
+knows how to read multi-line parameters, you don't have to escape
+anything.
+
+only_cols::
+
+If you want to only load a part of the columns you have into the data
+file, this option let you define which columns you're interrested
+in. +only_col+ is a comma separated list of ranges or values, as in
+following example.
+
+    only_cols = 1-3, 5
+
+This parameter is optionnal and defaults to the list of all columns
+given on the columns parameter list, in the colname order.
+
+blob_columns::
+
+The definition of the colums where to find some blob or clob
+reference. This definition is composed by a table column name, a
+column number (couting from one) reference into the Informix +UNLOAD+
+data file, and a large object type, separated by a colon. You can have
+several columns in this field, separated by a comma.
+
+Supported large objects type are Informix blob and clob, the awaited
+configuration string are respectively +ifx_blob+ for binary (bytea)
+content type and +ifx_clob+ for text type values.
+
+Here's an example:
+
+    blob_type = clob_column:3:ifx_blob, other_clob_column:5:ifx_clob
+
+== TEXT FORMAT CONFIGURATION PARAMETERS ==
+
+field_count::
+
+The +UNLOAD+ command does not escape newlines when they appear into
+table data. Hence, you may obtain multi-line data files, where a
+single database row (say tuple if you prefer to) can span multiple
+physical lines into the unloaded file.
+
+If this is your case, you may want to configure here the number of
+columns per tuple. Then pgloader will count columns and buffer line
+input in order to re-assemble several physical lines into one data row
+when needed.
+
+This parameter is optionnal.
+
+trailing_sep::
+
+If this option is set to True, the input data file is known to append
+a +field_sep+ as the last character of each of its lines. With this
+option set, this last character is then not considered as a field
+separator.
+
+This parameter is optionnal and defaults to +False+.
+
+newline_escapes::
+
+Sometimes the input data file has field values containing newlines,
+and the export program used (as Informix +UNLOAD+ command) escape
+in-field newlines.  So you want +pgloader+ to keep those newlines,
+while at the same time preserving them.
+
+This option does the described work on specified fields and
+considering the escaping character you configure, following this
+syntax:
+
+    newline_escapes = colname:\, other_colname:§
+
+This parameter is optionnal, and the extra work is only done when
+set. You can configure +newline_escapes+ for as many fields as
+necessary, and you may configure a different escaping character each
+time.
+
+Please note that at the moment, +pgloader+ does only support one
+character length +newline_escapes+.
+
+When both a global (see +[pgsql]+ section) +newline_escapes+ parameter
+and a table local one are set, +pgloader+ issues a warning and only
+consider the global setting.
+
+== CSV FORMAT CONFIGURATION PARAMETERS ==
+
+doublequote::
+
+    Controls how instances of +quotechar+ appearing inside a field
+    should be themselves be quoted. When +True+, the character is
+    doubled. When +False+, the +escapechar+ is used as a prefix to the
+    +quotechar+. It defaults to +True+.
+
+escapechar::
+
+    A one-character string used by the writer to escape the delimiter
+    if quoting is set to +QUOTE_NONE+ and the +quotechar+ if
+    +doublequote+ is +False+. On reading, the +escapechar+ removes any
+    special meaning from the following character. It defaults to
+    +None+, which disables escaping.
+
+quotechar::
+
+    A one-character string used to quote fields containing special
+    characters, such as the +delimiter+ or +quotechar+, or which
+    contain new-line characters.  It defaults to '"'.
+
+skipinitialspace::
+
+    When +True+, whitespace immediately following the +delimiter+ is
+    ignored. The default is +False+.
+
+== CONFIGURATION EXAMPLE ==
+
+Please see the given configuration example which should be distributed in
+/usr/share/doc/pgloader/examples/pgloader.conf+.
+
+The example configuration file comes with example data and can be used
+a unit test of +pgloader+.
+
+== HISTORY ==
+
+pgloader+ has first been a +tcl+ tool written by Christopher
+Kings-Lynne and Jan Wieck, and then maintained by Jean-Paul
+Argudo. When it became clear it would be easier to rewrite it in
+another language than to properly learn +tcl+ and add to the project
+missing options, +pgloader+ was rewritten in python by Dimitri
+Fontaine.
+
+pgloader+ was rewritten to act as an Informix to PostgreSQL migration
+helper which imported Informix large objects directly into a
+PostgreSQL database.
+
+Then as we got some data we couldn't file tools to care about, we
+decided ifx_blob would become +pgloader+, as it had to be able to
+import all Informix +UNLOAD+ data. Those data contains escaped
+separator into unquoted data field and multi-lines fields (+\r+ and
+\n+ are not escaped).
+
+== BUGS ==
+
+Please report bugs to Dimitri Fontaine <dim@tapoueh.org>.
+
+When last line is alone on a +COPY+ command and its parsing ends in
+error (not enough columns read for example), no information is given
+back by +pgloader+.
+
+== AUTHORS ==
+
+pgloader+ is written by Dimitri Fontaine <dim@tapoueh.org>.
+
--- a/pgloader.py
+++ b/pgloader.py
@ -1,5 +1,4 @@
 #! /usr/bin/env python
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>

 """
@ -165,6 +164,9 @@ def parse_config(conffile):
        if config.has_option(section, 'client_encoding'):
            dbconn.client_encoding = config.get(section, 'client_encoding')

+        if config.has_option(section, 'datestyle'):
+            dbconn.datestyle = config.get(section, 'datestyle')
+
        if config.has_option(section, 'copy_every'):
            dbconn.copy_every = config.getint(section, 'copy_every')

@ -174,6 +176,12 @@ def parse_config(conffile):
        if config.has_option(section, 'copy_delimiter'):
            dbconn.copy_sep = config.get(section, 'copy_delimiter')

+        # optionnal global newline_escapes
+        if config.has_option(section, 'newline_escapes'):
+            setting = pgloader.tools.parse_config_string(
+                config.get(section, 'newline_escapes'))
+            pgloader.options.NEWLINE_ESCAPES = setting
+
        # Then there are null and empty_string optionnal parameters
        # They canbe overriden in specific table configuration
        if config.has_option(section, 'null'):
@ -184,12 +192,6 @@ def parse_config(conffile):
            pgloader.options.EMPTY_STRING = pgloader.tools.parse_config_string(
                config.get(section, 'empty_string'))

-        # optionnal global newline_escapes
-        if config.has_option(section, 'newline_escapes'):
-            setting = pgloader.tools.parse_config_string(
-                config.get(section, 'newline_escapes'))
-            pgloader.options.NEWLINE_ESCAPES = setting
-            
    except Exception, error:
        print "Error: Could not initialize PostgreSQL connection:"
        print error
--- a/pgloader/csvreader.py
+++ b/pgloader/csvreader.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader text format reader
--- a/pgloader/db.py
+++ b/pgloader/db.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader database connection handling
@ -9,7 +8,7 @@ from cStringIO import StringIO

 from options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
 from options import TRUNCATE, VACUUM
-from options import INPUT_ENCODING, PG_CLIENT_ENCODING
+from options import INPUT_ENCODING, PG_CLIENT_ENCODING, DATESTYLE
 from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING

 from tools   import PGLoader_Error
@ -40,6 +39,7 @@ class db:
        self.copy_every      = copy_every
        self.commit_every    = commit_every
        self.client_encoding = client_encoding
+        self.datestyle       = DATESTYLE
        self.null            = NULL
        self.empty_string    = EMPTY_STRING

@ -72,6 +72,22 @@ class db:
        cursor.execute(sql, [self.client_encoding])
        cursor.close()

+    def set_datestyle(self):
+        """ set session datestyle to self.datestyle """
+
+        if self.datestyle is None:
+            return
+
+        if DEBUG:
+            # debug only cause reconnecting happens on every
+            # configured section
+            print 'Setting datestyle to %s' % self.datestyle
+        
+        sql = 'set session datestyle to %s'
+        cursor = self.dbconn.cursor()
+        cursor.execute(sql, [self.datestyle])
+        cursor.close()
+
    def reset(self):
        """ reset internal counters and open a new database connection """
        self.buffer            = None
@ -94,6 +110,7 @@ class db:

        self.dbconn = psycopg.connect(self.dsn)
        self.set_encoding()
+        self.set_datestyle()

    def print_stats(self):
        """ output some stats about recent activity """
@ -339,8 +356,8 @@ class db:
            except psycopg.DatabaseError, error:
                # non recoverable error
                mesg = "\n".join(["Please check PostgreSQL logs",
-                                  "HINT:  double check your client_encoding" +
-                                  " and copy_delimiter settings"])
+                                  "HINT:  double check your client_encoding,"+
+                                  " datestyle and copy_delimiter settings"])
                raise PGLoader_Error, mesg

        # prepare next run
--- a/pgloader/lo.py
+++ b/pgloader/lo.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader Large Object support
--- a/pgloader/options.py
+++ b/pgloader/options.py
@ -1,10 +1,10 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # Some common options, for each module to get them

 INPUT_ENCODING     = None
 PG_CLIENT_ENCODING = 'latin9'
+DATESTYLE          = None

 COPY_SEP     = None
 FIELD_SEP    = '|'
--- a/pgloader/pgloader.py
+++ b/pgloader/pgloader.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader main class
@ -77,6 +76,14 @@ class PGLoader:
            print "client_encoding: '%s'" % self.db.client_encoding


+        # optionnal local option datestyle
+        if config.has_option(name, 'datestyle'):
+            self.db.datestyle = config.get(name, 'datestyle')
+
+        if DEBUG:
+            print "datestyle: '%s'" % self.db.datestyle
+
+
        ##
        # data filename
        for opt in ('table', 'filename'):
@ -252,8 +259,14 @@ class PGLoader:
        f = self.__dict__[attr] = []

        try:
+            serial = 1
+            
            for field_def in str.split(','):
-                properties = [x.strip() for x in field_def.split(':')]
+                if argtype == 'int' and field_def.find(':') == -1:
+                    # support for automatic ordering
+                    properties = [field_def.strip(), serial]
+                else:
+                    properties = [x.strip() for x in field_def.split(':')]

                if not btype:
                    # normal column definition, for COPY usage
@ -265,6 +278,10 @@ class PGLoader:
                    # UPDATE usage
                    colname, arg, btype = properties
                    f.append((colname, __getarg(arg, argtype), btype))
+
+                # update serial
+                if argtype == 'int':
+                    serial = int(arg) + 1
                    
        except Exception, error:
            # FIXME: make some errors and write some error messages
--- a/pgloader/reader.py
+++ b/pgloader/reader.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dim@tapoueh.org>
 #
 # pgloader data reader interface and defaults
--- a/pgloader/textreader.py
+++ b/pgloader/textreader.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader text format reader
--- a/pgloader/tools.py
+++ b/pgloader/tools.py
@ -1,4 +1,3 @@
-# -*- coding: ISO-8859-15 -*-
 # Author: Dimitri Fontaine <dimitri@dalibo.com>
 #
 # pgloader librairies