mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 10:31:02 +02:00
Support partial columns definition for COPY
This commit is contained in:
parent
7eed610ecb
commit
29934d7112
@ -49,3 +49,10 @@ trailing_sep = True
|
||||
newline_escapes = c:\
|
||||
field_count = 3
|
||||
columns = a:1, b:3, c:2
|
||||
|
||||
[serial]
|
||||
table = serial
|
||||
filename = serial/serial.data
|
||||
field_sep = ;
|
||||
partial_copy = True
|
||||
columns = b:2, c:1
|
||||
|
||||
7
examples/serial/serial.data
Normal file
7
examples/serial/serial.data
Normal file
@ -0,0 +1,7 @@
|
||||
some first row text;2006-11-11
|
||||
some second row text;2006-11-11
|
||||
some third row text;2006-10-12
|
||||
\ ;2006-10-4
|
||||
some fifth row text;2006-5-12
|
||||
some sixth row text;2006-7-10
|
||||
some null date to play with;
|
||||
5
examples/serial/serial.sql
Normal file
5
examples/serial/serial.sql
Normal file
@ -0,0 +1,5 @@
|
||||
CREATE TABLE serial (
|
||||
a serial primary key,
|
||||
b date,
|
||||
c text
|
||||
);
|
||||
@ -44,7 +44,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsynopsisdiv>
|
||||
|
||||
<refsect1>
|
||||
<title>description</title>
|
||||
<title>DESCRIPTION</title>
|
||||
<para>
|
||||
<command>pgloader</command> imports data from a flat file and
|
||||
insert it into a database table. It uses a flat file per
|
||||
@ -81,7 +81,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>options</title>
|
||||
<title>OPTIONS</title>
|
||||
|
||||
<para>
|
||||
In order for <command>pgloader</command> to run, you have to
|
||||
@ -263,7 +263,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>configuration</title>
|
||||
<title>CONFIGURATION</title>
|
||||
<para>
|
||||
The configuration file has a .ini file syntax, its first section
|
||||
has to be the <command>pgsql</command> one, defining how to
|
||||
@ -590,6 +590,21 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>partial_copy</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If your columns definition does not contain all of the
|
||||
PostgreSQL table definition, set this parameter to
|
||||
<command>True</command>.
|
||||
</para>
|
||||
<para>
|
||||
This parameter is optionnal and defaults to
|
||||
<command>False</command>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>newline_escapes</option></term>
|
||||
<listitem>
|
||||
@ -693,7 +708,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>confifuration example</title>
|
||||
<title>CONFIGURATION EXAMPLE</title>
|
||||
<para>
|
||||
Please see the given configuration example which should be distributed
|
||||
in
|
||||
@ -702,7 +717,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>History</title>
|
||||
<title>HISTORY</title>
|
||||
<para>
|
||||
<command>pgloader</command> was at first an Informix to
|
||||
PostgreSQL migration helper which imported Informix large
|
||||
@ -720,7 +735,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Bugs</title>
|
||||
<title>BUGS</title>
|
||||
<para>
|
||||
Please report bugs to Dimitri Fontaine <dim@dalibo.com>.
|
||||
</para>
|
||||
@ -732,7 +747,7 @@ Import CSV data and Large Object to PostgreSQL
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Authors</title>
|
||||
<title>AUTHORS</title>
|
||||
<para>
|
||||
<command>pgloader</command> is written by <author>Dimitri
|
||||
Fontaine</author> <email>dim@dalibo.com</email>.
|
||||
|
||||
@ -82,6 +82,7 @@ class db:
|
||||
self.running_commands = 0
|
||||
self.last_commit_time = time.time()
|
||||
self.first_commit_time = self.last_commit_time
|
||||
self.partial_coldef = None
|
||||
|
||||
if DEBUG:
|
||||
if self.dbconn is not None:
|
||||
@ -256,11 +257,17 @@ class db:
|
||||
print "--- COPY data buffer saved in %s ---" % n
|
||||
return n
|
||||
|
||||
def copy_from(self, table, columns, input_line, reject, EOF = False):
|
||||
def copy_from(self, table, partial_coldef, columns, input_line,
|
||||
reject, EOF = False):
|
||||
""" Generate some COPY SQL for PostgreSQL """
|
||||
ok = True
|
||||
if not self.copy: self.copy = True
|
||||
|
||||
if partial_coldef is not None:
|
||||
# we prefer not having to mess table param on the caller side
|
||||
# as it's an implementation detail concerning db class
|
||||
table = "%s (%s) " % (table, partial_coldef)
|
||||
|
||||
if EOF or self.running_commands == self.copy_every \
|
||||
and self.buffer is not None:
|
||||
# time to copy data to PostgreSQL table
|
||||
|
||||
@ -136,6 +136,17 @@ class PGLoader:
|
||||
print 'columns', self.columns
|
||||
print 'blob_columns', self.blob_cols
|
||||
|
||||
|
||||
# optionnal partial loading option (sequences case)
|
||||
self.partial_copy = False
|
||||
self.partial_coldef = None
|
||||
|
||||
if config.has_option(name, 'partial_copy'):
|
||||
self.partial_copy = config.get(name, 'partial_copy') == 'True'
|
||||
|
||||
if self.partial_copy:
|
||||
self.partial_coldef = [name for (name, pos) in self.columns]
|
||||
|
||||
# optionnal newline escaped option
|
||||
self.newline_escapes = []
|
||||
if config.has_option(name, 'newline_escapes'):
|
||||
@ -352,6 +363,14 @@ class PGLoader:
|
||||
def csv_import(self):
|
||||
""" import CSV data, using COPY """
|
||||
|
||||
##
|
||||
# Inform database about optionnal partial columns definition
|
||||
# usage for COPY (sequences case, e.g.)
|
||||
if self.partial_coldef is not None:
|
||||
partial_copy_coldef = ", ".join(self.partial_coldef)
|
||||
else:
|
||||
partial_copy_coldef = None
|
||||
|
||||
for line, columns in self.read_data():
|
||||
if self.blob_cols is not None:
|
||||
columns, rowids = self.read_blob(line, columns)
|
||||
@ -374,14 +393,17 @@ class PGLoader:
|
||||
print line
|
||||
print c_ordered
|
||||
print len(c_ordered)
|
||||
print self.db.partial_coldef
|
||||
print
|
||||
|
||||
if not DRY_RUN:
|
||||
self.db.copy_from(self.table, c_ordered, line, self.reject)
|
||||
self.db.copy_from(self.table, partial_copy_coldef,
|
||||
c_ordered, line, self.reject)
|
||||
|
||||
if not DRY_RUN:
|
||||
# we may need a last COPY for the rest of data
|
||||
self.db.copy_from(self.table, None, None, self.reject, EOF = True)
|
||||
self.db.copy_from(self.table, partial_copy_coldef,
|
||||
None, None, self.reject, EOF = True)
|
||||
|
||||
return
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user