From 29934d7112e140615b1dd6c4451722ee75008d1f Mon Sep 17 00:00:00 2001 From: dim Date: Fri, 19 Jan 2007 11:30:09 +0000 Subject: [PATCH] Support partial columns definition for COPY --- examples/pgloader.conf | 7 +++++++ examples/serial/serial.data | 7 +++++++ examples/serial/serial.sql | 5 +++++ pgloader.1.sgml | 29 ++++++++++++++++++++++------- pgloader/db.py | 9 ++++++++- pgloader/pgloader.py | 26 ++++++++++++++++++++++++-- 6 files changed, 73 insertions(+), 10 deletions(-) create mode 100644 examples/serial/serial.data create mode 100644 examples/serial/serial.sql diff --git a/examples/pgloader.conf b/examples/pgloader.conf index 78473da..f4e398b 100644 --- a/examples/pgloader.conf +++ b/examples/pgloader.conf @@ -49,3 +49,10 @@ trailing_sep = True newline_escapes = c:\ field_count = 3 columns = a:1, b:3, c:2 + +[serial] +table = serial +filename = serial/serial.data +field_sep = ; +partial_copy = True +columns = b:2, c:1 diff --git a/examples/serial/serial.data b/examples/serial/serial.data new file mode 100644 index 0000000..43e3d5d --- /dev/null +++ b/examples/serial/serial.data @@ -0,0 +1,7 @@ +some first row text;2006-11-11 +some second row text;2006-11-11 +some third row text;2006-10-12 +\ ;2006-10-4 +some fifth row text;2006-5-12 +some sixth row text;2006-7-10 +some null date to play with; \ No newline at end of file diff --git a/examples/serial/serial.sql b/examples/serial/serial.sql new file mode 100644 index 0000000..916e00e --- /dev/null +++ b/examples/serial/serial.sql @@ -0,0 +1,5 @@ +CREATE TABLE serial ( + a serial primary key, + b date, + c text +); \ No newline at end of file diff --git a/pgloader.1.sgml b/pgloader.1.sgml index 046280a..159d67c 100644 --- a/pgloader.1.sgml +++ b/pgloader.1.sgml @@ -44,7 +44,7 @@ Import CSV data and Large Object to PostgreSQL - description + DESCRIPTION pgloader imports data from a flat file and insert it into a database table. It uses a flat file per @@ -81,7 +81,7 @@ Import CSV data and Large Object to PostgreSQL - options + OPTIONS In order for pgloader to run, you have to @@ -263,7 +263,7 @@ Import CSV data and Large Object to PostgreSQL - configuration + CONFIGURATION The configuration file has a .ini file syntax, its first section has to be the pgsql one, defining how to @@ -590,6 +590,21 @@ Import CSV data and Large Object to PostgreSQL + + + + + If your columns definition does not contain all of the + PostgreSQL table definition, set this parameter to + True. + + + This parameter is optionnal and defaults to + False. + + + + @@ -693,7 +708,7 @@ Import CSV data and Large Object to PostgreSQL - confifuration example + CONFIGURATION EXAMPLE Please see the given configuration example which should be distributed in @@ -702,7 +717,7 @@ Import CSV data and Large Object to PostgreSQL - History + HISTORY pgloader was at first an Informix to PostgreSQL migration helper which imported Informix large @@ -720,7 +735,7 @@ Import CSV data and Large Object to PostgreSQL - Bugs + BUGS Please report bugs to Dimitri Fontaine <dim@dalibo.com>. @@ -732,7 +747,7 @@ Import CSV data and Large Object to PostgreSQL - Authors + AUTHORS pgloader is written by Dimitri Fontaine dim@dalibo.com. diff --git a/pgloader/db.py b/pgloader/db.py index c62dc46..909cb28 100644 --- a/pgloader/db.py +++ b/pgloader/db.py @@ -82,6 +82,7 @@ class db: self.running_commands = 0 self.last_commit_time = time.time() self.first_commit_time = self.last_commit_time + self.partial_coldef = None if DEBUG: if self.dbconn is not None: @@ -256,11 +257,17 @@ class db: print "--- COPY data buffer saved in %s ---" % n return n - def copy_from(self, table, columns, input_line, reject, EOF = False): + def copy_from(self, table, partial_coldef, columns, input_line, + reject, EOF = False): """ Generate some COPY SQL for PostgreSQL """ ok = True if not self.copy: self.copy = True + if partial_coldef is not None: + # we prefer not having to mess table param on the caller side + # as it's an implementation detail concerning db class + table = "%s (%s) " % (table, partial_coldef) + if EOF or self.running_commands == self.copy_every \ and self.buffer is not None: # time to copy data to PostgreSQL table diff --git a/pgloader/pgloader.py b/pgloader/pgloader.py index d729574..61c5a20 100644 --- a/pgloader/pgloader.py +++ b/pgloader/pgloader.py @@ -136,6 +136,17 @@ class PGLoader: print 'columns', self.columns print 'blob_columns', self.blob_cols + + # optionnal partial loading option (sequences case) + self.partial_copy = False + self.partial_coldef = None + + if config.has_option(name, 'partial_copy'): + self.partial_copy = config.get(name, 'partial_copy') == 'True' + + if self.partial_copy: + self.partial_coldef = [name for (name, pos) in self.columns] + # optionnal newline escaped option self.newline_escapes = [] if config.has_option(name, 'newline_escapes'): @@ -352,6 +363,14 @@ class PGLoader: def csv_import(self): """ import CSV data, using COPY """ + ## + # Inform database about optionnal partial columns definition + # usage for COPY (sequences case, e.g.) + if self.partial_coldef is not None: + partial_copy_coldef = ", ".join(self.partial_coldef) + else: + partial_copy_coldef = None + for line, columns in self.read_data(): if self.blob_cols is not None: columns, rowids = self.read_blob(line, columns) @@ -374,14 +393,17 @@ class PGLoader: print line print c_ordered print len(c_ordered) + print self.db.partial_coldef print if not DRY_RUN: - self.db.copy_from(self.table, c_ordered, line, self.reject) + self.db.copy_from(self.table, partial_copy_coldef, + c_ordered, line, self.reject) if not DRY_RUN: # we may need a last COPY for the rest of data - self.db.copy_from(self.table, None, None, self.reject, EOF = True) + self.db.copy_from(self.table, partial_copy_coldef, + None, None, self.reject, EOF = True) return