Support partial columns definition for COPY

This commit is contained in:
dim 2007-01-19 11:30:09 +00:00
parent 7eed610ecb
commit 29934d7112
6 changed files with 73 additions and 10 deletions

View File

@ -49,3 +49,10 @@ trailing_sep = True
newline_escapes = c:\
field_count = 3
columns = a:1, b:3, c:2
[serial]
table = serial
filename = serial/serial.data
field_sep = ;
partial_copy = True
columns = b:2, c:1

View File

@ -0,0 +1,7 @@
some first row text;2006-11-11
some second row text;2006-11-11
some third row text;2006-10-12
\ ;2006-10-4
some fifth row text;2006-5-12
some sixth row text;2006-7-10
some null date to play with;

View File

@ -0,0 +1,5 @@
CREATE TABLE serial (
a serial primary key,
b date,
c text
);

View File

@ -44,7 +44,7 @@ Import CSV data and Large Object to PostgreSQL
</refsynopsisdiv>
<refsect1>
<title>description</title>
<title>DESCRIPTION</title>
<para>
<command>pgloader</command> imports data from a flat file and
insert it into a database table. It uses a flat file per
@ -81,7 +81,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>options</title>
<title>OPTIONS</title>
<para>
In order for <command>pgloader</command> to run, you have to
@ -263,7 +263,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>configuration</title>
<title>CONFIGURATION</title>
<para>
The configuration file has a .ini file syntax, its first section
has to be the <command>pgsql</command> one, defining how to
@ -590,6 +590,21 @@ Import CSV data and Large Object to PostgreSQL
</listitem>
</varlistentry>
<varlistentry>
<term><option>partial_copy</option></term>
<listitem>
<para>
If your columns definition does not contain all of the
PostgreSQL table definition, set this parameter to
<command>True</command>.
</para>
<para>
This parameter is optionnal and defaults to
<command>False</command>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>newline_escapes</option></term>
<listitem>
@ -693,7 +708,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>confifuration example</title>
<title>CONFIGURATION EXAMPLE</title>
<para>
Please see the given configuration example which should be distributed
in
@ -702,7 +717,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>History</title>
<title>HISTORY</title>
<para>
<command>pgloader</command> was at first an Informix to
PostgreSQL migration helper which imported Informix large
@ -720,7 +735,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>Bugs</title>
<title>BUGS</title>
<para>
Please report bugs to Dimitri Fontaine &lt;dim@dalibo.com&gt;.
</para>
@ -732,7 +747,7 @@ Import CSV data and Large Object to PostgreSQL
</refsect1>
<refsect1>
<title>Authors</title>
<title>AUTHORS</title>
<para>
<command>pgloader</command> is written by <author>Dimitri
Fontaine</author> <email>dim@dalibo.com</email>.

View File

@ -82,6 +82,7 @@ class db:
self.running_commands = 0
self.last_commit_time = time.time()
self.first_commit_time = self.last_commit_time
self.partial_coldef = None
if DEBUG:
if self.dbconn is not None:
@ -256,11 +257,17 @@ class db:
print "--- COPY data buffer saved in %s ---" % n
return n
def copy_from(self, table, columns, input_line, reject, EOF = False):
def copy_from(self, table, partial_coldef, columns, input_line,
reject, EOF = False):
""" Generate some COPY SQL for PostgreSQL """
ok = True
if not self.copy: self.copy = True
if partial_coldef is not None:
# we prefer not having to mess table param on the caller side
# as it's an implementation detail concerning db class
table = "%s (%s) " % (table, partial_coldef)
if EOF or self.running_commands == self.copy_every \
and self.buffer is not None:
# time to copy data to PostgreSQL table

View File

@ -136,6 +136,17 @@ class PGLoader:
print 'columns', self.columns
print 'blob_columns', self.blob_cols
# optionnal partial loading option (sequences case)
self.partial_copy = False
self.partial_coldef = None
if config.has_option(name, 'partial_copy'):
self.partial_copy = config.get(name, 'partial_copy') == 'True'
if self.partial_copy:
self.partial_coldef = [name for (name, pos) in self.columns]
# optionnal newline escaped option
self.newline_escapes = []
if config.has_option(name, 'newline_escapes'):
@ -352,6 +363,14 @@ class PGLoader:
def csv_import(self):
""" import CSV data, using COPY """
##
# Inform database about optionnal partial columns definition
# usage for COPY (sequences case, e.g.)
if self.partial_coldef is not None:
partial_copy_coldef = ", ".join(self.partial_coldef)
else:
partial_copy_coldef = None
for line, columns in self.read_data():
if self.blob_cols is not None:
columns, rowids = self.read_blob(line, columns)
@ -374,14 +393,17 @@ class PGLoader:
print line
print c_ordered
print len(c_ordered)
print self.db.partial_coldef
print
if not DRY_RUN:
self.db.copy_from(self.table, c_ordered, line, self.reject)
self.db.copy_from(self.table, partial_copy_coldef,
c_ordered, line, self.reject)
if not DRY_RUN:
# we may need a last COPY for the rest of data
self.db.copy_from(self.table, None, None, self.reject, EOF = True)
self.db.copy_from(self.table, partial_copy_coldef,
None, None, self.reject, EOF = True)
return