mirror of
https://github.com/dimitri/pgloader.git
synced 2026-02-05 14:31:41 +01:00
pgloader version 2.2.3, with User Defined Columns
* User Defined Columns * Temporary files with copy data content now suffixed .pgloader * New option --version * Fix TextReader newline_escapes configuration option reading * Fix Reader reject initialisation * Skip database related settings when in DRY_RUN mode (-n) * List all command line options from man page synopsis
This commit is contained in:
parent
aacfc9d000
commit
dbaeef0ad2
16
debian/changelog
vendored
16
debian/changelog
vendored
@ -1,10 +1,20 @@
|
||||
pgloader (2.2.3) unstable; urgency=low
|
||||
|
||||
* User Defined Columns
|
||||
* Temporary files with copy data content now suffixed .pgloader
|
||||
* New option --version
|
||||
* Fix TextReader newline_escapes configuration option reading
|
||||
* Fix Reader reject initialisation
|
||||
* Skip database related settings when in DRY_RUN mode (-n)
|
||||
* List all command line options from man page synopsis
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Wed, 14 Nov 2007 21:57:39 +0100
|
||||
|
||||
pgloader (2.2.2) unstable; urgency=low
|
||||
|
||||
* New command line options --quiet and --summary (-qs for short)
|
||||
* Bugfix: temp filename no more contains columns (per user report)
|
||||
* debian package now properly include and install the man page
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 11 Nov 2007 20:44:23 +0100
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sat, 20 Oct 2007 16:20:18 +0200
|
||||
|
||||
pgloader (2.2.1) unstable; urgency=low
|
||||
|
||||
|
||||
5
examples/udc/udc.data
Normal file
5
examples/udc/udc.data
Normal file
@ -0,0 +1,5 @@
|
||||
1§5§foo§bar
|
||||
2§10§bar§toto
|
||||
3§4§toto§titi
|
||||
4§18§titi§baz
|
||||
5§2§baz§foo
|
||||
5
examples/udc/udc.sql
Normal file
5
examples/udc/udc.sql
Normal file
@ -0,0 +1,5 @@
|
||||
CREATE TABLE udc (
|
||||
b integer primary key,
|
||||
c text,
|
||||
d integer
|
||||
);
|
||||
@ -6,9 +6,11 @@ pgloader - Import CSV data and Large Object to PostgreSQL
|
||||
|
||||
== SYNOPSIS ==
|
||||
|
||||
pgloader [-c configuration file] [-p pedantic] [-d debug] [-v verbose]
|
||||
[-n dryrun] [-Cn count] [-Fn from] [-In from id] [-E input files encoding]
|
||||
[Section1 Section2]
|
||||
pgloader [--version] [-c configuration file]
|
||||
[-p pedantic] [-d debug] [-v verbose] [-q quiet] [-s summary]
|
||||
[-n dryrun] [-Cn count] [-Fn from] [-In from id]
|
||||
[-E input files encoding]
|
||||
[Section1 Section2]
|
||||
|
||||
== DESCRIPTION ==
|
||||
|
||||
@ -31,9 +33,9 @@ content properly escaped to the +COPY+ data.
|
||||
|
||||
+pgloader+ issue some timing statistics every +commit_every+ commits
|
||||
(see Configuration for this setting). At the end of each section
|
||||
processing, a summary of overall operations, numbers of updates and
|
||||
commits, time it took in seconds, errors logged and database errors is
|
||||
issued.
|
||||
processing, a summary of overall operations, numbers of rows copied
|
||||
and commits, time it took in seconds, errors logged and database
|
||||
errors is issued.
|
||||
|
||||
+pgloader+ is available from +pgfoundry+ at
|
||||
http://pgfoundry.org/projects/pgloader/[], where you'll find a debian
|
||||
@ -45,6 +47,10 @@ In order for pgloader to run, you have to edit a configuration file
|
||||
(see Configuration) consisting of Section definitions. Each section
|
||||
refers to a PostgreSQL table into which some data is to be loaded.
|
||||
|
||||
--version::
|
||||
|
||||
print out pgloader version, then quit.
|
||||
|
||||
-c, --config::
|
||||
|
||||
specifies the configuration file to use. The default file name is
|
||||
@ -184,6 +190,7 @@ single big +COPY+ attempt, but copy copy_every lines at a time.
|
||||
+
|
||||
This parameter is optionnal and defaults to 10000.
|
||||
|
||||
//////////////////////////////////////////
|
||||
commit_every::
|
||||
+
|
||||
PostgreSQL +COMMIT+ frequency, exprimed in +UPDATE+ orders. A good
|
||||
@ -194,6 +201,7 @@ input lines.
|
||||
closing and when a SQL error occurs.
|
||||
+
|
||||
This parameter is optionnal and defaults to 1000.
|
||||
//////////////////////////////////////////
|
||||
|
||||
copy_delimiter::
|
||||
+
|
||||
@ -312,6 +320,7 @@ This parameter is optionnal and defaults to '\ ' (that is backslash
|
||||
followed by space). If defined on a table level, this local value will
|
||||
overwritte the global one.
|
||||
|
||||
//////////////////////////////////////////
|
||||
index::
|
||||
+
|
||||
Table index definition, to be used in blob +UPDATE+'ing. You define an
|
||||
@ -321,11 +330,15 @@ table has a composite key, then you can define multiple columns here,
|
||||
separated by a comma.
|
||||
+
|
||||
index = colname:3, other_colname:5
|
||||
//////////////////////////////////////////
|
||||
|
||||
columns::
|
||||
+
|
||||
You can define here table columns, with the same definition format as
|
||||
in previous index parameter.
|
||||
You can define here table columns, by giving their names and
|
||||
optionnaly column number (as found into your data file, and counting
|
||||
from 1) separated by a colon.
|
||||
+
|
||||
columns = x, y, a, b, d:6, c:5
|
||||
+
|
||||
Note you'll have to define here all the columns to be found in data
|
||||
file, whether you want to use them all or not. When not using them
|
||||
@ -335,18 +348,50 @@ As of +pgloader 2.2+ the column list used might not be the same as the
|
||||
table columns definition.
|
||||
+
|
||||
As of +pgloader 2.2.1+ you can omit column numbering if you want to, a
|
||||
counter is then maintained for you, starting from 1 and set to +last
|
||||
value + 1+ on each column, where +last value+ was either computed or
|
||||
counter is then maintained for you, starting from 1 and set to +$$last
|
||||
value + 1$$+ on each column, where +last value+ was either computed or
|
||||
given in the config. So you can even omit only 'some' columns in
|
||||
there.
|
||||
+
|
||||
columns = x, y, a, b, d:6, c:5
|
||||
+
|
||||
In case you have a lot a columns per table, you will want to use
|
||||
multiple lines for this parameter value. Python ConfigParser module
|
||||
knows how to read multi-line parameters, you don't have to escape
|
||||
anything.
|
||||
|
||||
user_defined_columns::
|
||||
+
|
||||
Those are special columns not found in the data file but which you
|
||||
want to load into the database. The configuration options beginning
|
||||
with +udc_+ are taken as column names with constant values. The
|
||||
following example define the column +c+ as having the value +constant
|
||||
value+ for each and every row of the input data file.
|
||||
+
|
||||
udc_c = constant value
|
||||
+
|
||||
The option +copy_columns+ is used to define the exact +columnsList+
|
||||
given to +COPY+.
|
||||
+
|
||||
A simple use case is the loading into the same database table of data
|
||||
coming from more than one file. If you need to keep track of the data
|
||||
origin, add a column to the table model and define a 'udc_' for
|
||||
+pgloader+ to add a constant value in the database.
|
||||
+
|
||||
Using user-defined columns require defining +copy_columns+ and is not
|
||||
compatible with +only_cols+ usage.
|
||||
+
|
||||
|
||||
copy_columns::
|
||||
+
|
||||
This options defines the columns to load from the input data file and
|
||||
the user defined columns, and in which order to do this. Place here
|
||||
the column names separated by commas.
|
||||
+
|
||||
copy_columns = b, c, d
|
||||
+
|
||||
This option is required if any user column is defined, and conflicts
|
||||
with the +only_cols+ option. It won't have any effect when used in a
|
||||
section where no user column is defined.
|
||||
|
||||
only_cols::
|
||||
+
|
||||
If you want to only load a part of the columns you have into the data
|
||||
@ -358,6 +403,9 @@ following example.
|
||||
+
|
||||
This parameter is optionnal and defaults to the list of all columns
|
||||
given on the columns parameter list, in the colname order.
|
||||
+
|
||||
This option conflicts with user defined columns and +copy_columns+
|
||||
option.
|
||||
|
||||
blob_columns::
|
||||
+
|
||||
|
||||
@ -36,6 +36,11 @@ def parse_options():
|
||||
usage = "%prog [-c <config_filename>] Section [Section ...]"
|
||||
parser = OptionParser(usage = usage)
|
||||
|
||||
parser.add_option("--version", action = "store_true",
|
||||
dest = "version",
|
||||
default = False,
|
||||
help = "show pgloader version")
|
||||
|
||||
parser.add_option("-c", "--config", dest = "config",
|
||||
default = "pgloader.conf",
|
||||
help = "configuration file, defauts to pgloader.conf")
|
||||
@ -98,6 +103,10 @@ def parse_options():
|
||||
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
if opts.version:
|
||||
print "PgLoader version %s" % pgloader.options.PGLOADER_VERSION
|
||||
sys.exit(0)
|
||||
|
||||
# check existence en read ability of config file
|
||||
if not os.path.exists(opts.config):
|
||||
print "Error: Configuration file %s does not exists" % opts.config
|
||||
|
||||
@ -267,7 +267,7 @@ class db:
|
||||
""" save copy buffer to a temporary file for further inspection """
|
||||
import tempfile
|
||||
(f, n) = tempfile.mkstemp(prefix='%s.' % tablename,
|
||||
suffix='.pgimport', dir='/tmp')
|
||||
suffix='.pgloader', dir='/tmp')
|
||||
os.write(f, self.buffer.getvalue())
|
||||
os.close(f)
|
||||
|
||||
@ -276,7 +276,7 @@ class db:
|
||||
print " -- COPY data buffer saved in %s --" % n
|
||||
return n
|
||||
|
||||
def copy_from(self, table, table_colspec, columns, input_line,
|
||||
def copy_from(self, table, columnlist, columns, input_line,
|
||||
reject, EOF = False):
|
||||
""" Generate some COPY SQL for PostgreSQL """
|
||||
ok = True
|
||||
@ -286,7 +286,7 @@ class db:
|
||||
# build the table colomns specs from parameters
|
||||
# ie. we always issue COPY table (col1, col2, ..., coln) commands
|
||||
tablename = table
|
||||
table = "%s (%s) " % (table, ", ".join(table_colspec))
|
||||
table = "%s (%s) " % (table, ", ".join(columnlist))
|
||||
if DEBUG:
|
||||
print 'COPY %s' % table
|
||||
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
#
|
||||
# Some common options, for each module to get them
|
||||
|
||||
PGLOADER_VERSION = '2.2.3'
|
||||
|
||||
INPUT_ENCODING = None
|
||||
PG_CLIENT_ENCODING = 'latin9'
|
||||
DATESTYLE = None
|
||||
@ -28,5 +30,4 @@ COUNT = None
|
||||
FROM_COUNT = None
|
||||
FROM_ID = None
|
||||
|
||||
|
||||
|
||||
UDC_PREFIX = 'udc_'
|
||||
|
||||
@ -18,6 +18,7 @@ from options import COUNT, FROM_COUNT, FROM_ID
|
||||
from options import INPUT_ENCODING, PG_CLIENT_ENCODING
|
||||
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
|
||||
from options import NEWLINE_ESCAPES
|
||||
from options import UDC_PREFIX
|
||||
|
||||
class PGLoader:
|
||||
"""
|
||||
@ -72,7 +73,7 @@ class PGLoader:
|
||||
self.db.client_encoding = parse_config_string(
|
||||
config.get(name, 'client_encoding'))
|
||||
|
||||
if DEBUG:
|
||||
if DEBUG and not DRY_RUN:
|
||||
print "client_encoding: '%s'" % self.db.client_encoding
|
||||
|
||||
|
||||
@ -80,7 +81,7 @@ class PGLoader:
|
||||
if config.has_option(name, 'datestyle'):
|
||||
self.db.datestyle = config.get(name, 'datestyle')
|
||||
|
||||
if DEBUG:
|
||||
if DEBUG and not DRY_RUN:
|
||||
print "datestyle: '%s'" % self.db.datestyle
|
||||
|
||||
|
||||
@ -112,22 +113,110 @@ class PGLoader:
|
||||
print 'blob_columns', self.blob_cols
|
||||
|
||||
|
||||
##
|
||||
# The config section can also provide user-defined colums
|
||||
# which are option beginning with options.UDC_PREFIX
|
||||
udcs = [o
|
||||
for o in config.options(name)
|
||||
if o[:len(UDC_PREFIX)] == UDC_PREFIX]
|
||||
|
||||
if len(udcs) > 0:
|
||||
self.udcs = []
|
||||
for udc in udcs:
|
||||
udc_name = udc[:]
|
||||
udc_name = udc_name[udc_name.find('_')+1:]
|
||||
udc_value = config.get(name, udc)
|
||||
|
||||
self.udcs.append((udc_name, udc_value))
|
||||
else:
|
||||
self.udcs = None
|
||||
|
||||
if DEBUG:
|
||||
print 'udcs:', self.udcs
|
||||
|
||||
# better check there's no user defined column overriding file
|
||||
# columns
|
||||
if self.udcs:
|
||||
errs = []
|
||||
cols = [c for (c, cn) in self.columns]
|
||||
for (udc_name, udc_value) in self.udcs:
|
||||
if udc_name in cols:
|
||||
errs.append(udc_name)
|
||||
|
||||
if errs:
|
||||
for c in errs:
|
||||
print 'Error: %s is configured both as a ' % c +\
|
||||
'%s.columns entry and as a user-defined column' \
|
||||
% name
|
||||
|
||||
self.config_errors += 1
|
||||
|
||||
# we need the copy_columns parameter if user-defined columns
|
||||
# are used
|
||||
if self.udcs:
|
||||
if config.has_option(name, 'copy_columns'):
|
||||
namelist = [n for (n, c) in self.columns] + \
|
||||
[n for (n, v) in self.udcs]
|
||||
|
||||
copy_columns = config.get(name, 'copy_columns').split(',')
|
||||
self.copy_columns = [x.strip()
|
||||
for x in copy_columns
|
||||
if x.strip() in namelist]
|
||||
|
||||
if len(self.copy_columns) != len(copy_columns):
|
||||
print 'Error: %s.copy_columns refers to ' % name +\
|
||||
'unconfigured columns '
|
||||
|
||||
self.config_errors += 1
|
||||
|
||||
else:
|
||||
print 'Error: section %s does not define ' % name +\
|
||||
'copy_columns but uses user-defined columns'
|
||||
|
||||
self.config_errors += 1
|
||||
|
||||
# in the copy_columns case, columnlist is that simple:
|
||||
self.columnlist = None
|
||||
if self.udcs:
|
||||
if self.copy_columns:
|
||||
self.columnlist = self.copy_columns
|
||||
|
||||
if DEBUG:
|
||||
print 'udcs', self.udcs
|
||||
if self.udcs:
|
||||
print 'copy_columns', self.copy_columns
|
||||
|
||||
##
|
||||
# We have for example columns = col1:2, col2:1
|
||||
# this means the order of input columns is not the same as the
|
||||
# awaited order of COPY, so we want a mapping index, here [2, 1]
|
||||
if self.columns is not None:
|
||||
self.col_mapping = [i for (c, i) in self.columns]
|
||||
#
|
||||
# The column mapping is to be done on all_columns, which
|
||||
# allows user to have their user-defined columns talken into
|
||||
# account in the COPY ordering.
|
||||
|
||||
self.col_mapping = [i for (c, i) in self.columns]
|
||||
|
||||
if self.col_mapping == range(1, len(self.columns)+1):
|
||||
# no mapping to do
|
||||
self.col_mapping = None
|
||||
|
||||
##
|
||||
# optionnal partial loading option (sequences case)
|
||||
# self.table_colspec is the column list to give to
|
||||
#
|
||||
# self.columnlist is the column list to give to
|
||||
# COPY table(...) command, either the cols given in
|
||||
# the only_cols config, or the columns directly
|
||||
|
||||
self.only_cols = None
|
||||
self.table_colspec = [n for (n, pos) in self.columns]
|
||||
|
||||
if config.has_option(name, 'only_cols'):
|
||||
if self.udcs:
|
||||
print 'Error: section %s defines both ' % name +\
|
||||
'user-defined columns and only_cols'
|
||||
|
||||
self.config_errors += 1
|
||||
|
||||
self.only_cols = config.get(name, 'only_cols')
|
||||
|
||||
##
|
||||
@ -147,17 +236,50 @@ class PGLoader:
|
||||
else:
|
||||
expanded.append(int(oc))
|
||||
|
||||
self.only_cols = expanded
|
||||
self.table_colspec = [self.columns[x-1][0] for x in expanded]
|
||||
# we have to find colspec based on self.columns
|
||||
self.only_cols = expanded
|
||||
self.columnlist = [self.columns[x-1][0] for x in expanded]
|
||||
|
||||
except Exception, e:
|
||||
print 'Error: section %s, only_cols: configured range is invalid' % name
|
||||
print 'Error: section %s, only_cols: ' % name +\
|
||||
'configured range is invalid'
|
||||
raise PGLoader_Error, e
|
||||
|
||||
if DEBUG:
|
||||
print "only_cols", self.only_cols
|
||||
print "table_colspec", self.table_colspec
|
||||
if self.only_cols is None:
|
||||
if self.columnlist is None:
|
||||
# default case, no user-defined cols, no restriction
|
||||
self.columnlist = [n for (n, pos) in self.columns]
|
||||
|
||||
if DEBUG:
|
||||
#print "columns", self.columns
|
||||
print "only_cols", self.only_cols
|
||||
#print "udcs", self.udcs
|
||||
print "columnlist", self.columnlist
|
||||
|
||||
##
|
||||
# This option is textreader specific, but being lazy and
|
||||
# short-timed, I don't make self._parse_fields() callable from
|
||||
# outside this class. Hence the code here.
|
||||
#
|
||||
# optionnal newline escaped option
|
||||
self.newline_escapes = []
|
||||
if config.has_option(name, 'newline_escapes'):
|
||||
if NEWLINE_ESCAPES is not None:
|
||||
# this parameter is globally set, will ignore local
|
||||
# definition
|
||||
if not QUIET:
|
||||
print "Warning: ignoring %s newline_escapes option" % name
|
||||
print " option is set to '%s' globally" \
|
||||
% NEWLINE_ESCAPES
|
||||
else:
|
||||
self._parse_fields('newline_escapes',
|
||||
config.get(name, 'newline_escapes'),
|
||||
argtype = 'char')
|
||||
|
||||
if NEWLINE_ESCAPES is not None:
|
||||
# set NEWLINE_ESCAPES for each table column
|
||||
self.newline_escapes = [(a, NEWLINE_ESCAPES)
|
||||
for (a, x) in self.columns]
|
||||
|
||||
##
|
||||
# data format, from which depend data reader
|
||||
@ -167,11 +289,16 @@ class PGLoader:
|
||||
|
||||
if self.format.lower() == 'csv':
|
||||
from csvreader import CSVReader
|
||||
self.reader = CSVReader(self.db, self.filename, self.table, self.columns)
|
||||
self.reader = CSVReader(self.db, self.reject,
|
||||
self.filename,
|
||||
self.table, self.columns)
|
||||
|
||||
elif self.format.lower() == 'text':
|
||||
from textreader import TextReader
|
||||
self.reader = TextReader(self.db, self.filename, self.table, self.columns)
|
||||
self.reader = TextReader(self.db, self.reject,
|
||||
self.filename,
|
||||
self.table, self.columns,
|
||||
self.newline_escapes)
|
||||
|
||||
if self.format is None:
|
||||
print 'Error: %s: format parameter needed' % name
|
||||
@ -342,32 +469,53 @@ class PGLoader:
|
||||
if self.blob_cols is not None:
|
||||
columns, rowids = self.read_blob(line, columns)
|
||||
|
||||
if DEBUG:
|
||||
print self.col_mapping
|
||||
print len(columns), len(self.col_mapping)
|
||||
data = columns
|
||||
|
||||
if self.udcs:
|
||||
dudcs = dict(self.udcs)
|
||||
ddict = dict(self.columns)
|
||||
data = []
|
||||
for c in self.copy_columns:
|
||||
if c in ddict:
|
||||
data.append(columns[ddict[c]-1])
|
||||
else:
|
||||
data.append(dudcs[c])
|
||||
|
||||
if DEBUG:
|
||||
print 'columns', columns
|
||||
print 'data ', data
|
||||
|
||||
##
|
||||
# Now we have to reorder the columns to match schema, and only
|
||||
# consider data matched by self.only_cols
|
||||
if self.only_cols is not None:
|
||||
c_ordered = [columns[self.col_mapping[i-1]-1] for i in self.only_cols]
|
||||
else:
|
||||
c_ordered = [columns[i-1] for i in self.col_mapping]
|
||||
if self.col_mapping:
|
||||
if DEBUG:
|
||||
print 'col_mapping', self.col_mapping
|
||||
|
||||
data = [columns[i-1] for i in self.col_mapping]
|
||||
|
||||
if DEBUG:
|
||||
print 'columns', columns
|
||||
print 'data ', data
|
||||
|
||||
if self.only_cols:
|
||||
# only consider data matched by self.only_cols
|
||||
if self.col_mapping:
|
||||
data = [columns[self.col_mapping[i-1]-1]
|
||||
for i in self.only_cols]
|
||||
else:
|
||||
data = [columns[i-1] for i in self.only_cols]
|
||||
|
||||
if DRY_RUN or DEBUG:
|
||||
print line
|
||||
print c_ordered
|
||||
print len(c_ordered)
|
||||
print self.table_colspec
|
||||
print self.columnlist, data
|
||||
print
|
||||
|
||||
if not DRY_RUN:
|
||||
self.db.copy_from(self.table, self.table_colspec,
|
||||
c_ordered, line, self.reject)
|
||||
self.db.copy_from(self.table, self.columnlist,
|
||||
data, line, self.reject)
|
||||
|
||||
if not DRY_RUN:
|
||||
# we may need a last COPY for the rest of data
|
||||
self.db.copy_from(self.table, self.table_colspec,
|
||||
self.db.copy_from(self.table, self.columnlist,
|
||||
None, None, self.reject, EOF = True)
|
||||
|
||||
return
|
||||
|
||||
@ -20,12 +20,13 @@ class DataReader:
|
||||
- multi-line support is explicit (via
|
||||
"""
|
||||
|
||||
def __init__(self, db, filename, table, columns):
|
||||
def __init__(self, db, reject, filename, table, columns):
|
||||
""" init internal variables """
|
||||
self.db = db
|
||||
self.filename = filename
|
||||
self.table = table
|
||||
self.columns = columns
|
||||
self.reject = reject
|
||||
|
||||
def readconfig(self, name, config):
|
||||
""" read configuration section for common options
|
||||
@ -37,18 +38,19 @@ class DataReader:
|
||||
|
||||
see textreader.py and csvreader.py
|
||||
"""
|
||||
# optionnal null and empty_string per table parameters
|
||||
if config.has_option(name, 'null'):
|
||||
self.db.null = parse_config_string(config.get(name, 'null'))
|
||||
else:
|
||||
self.db.null = NULL
|
||||
|
||||
if config.has_option(name, 'empty_string'):
|
||||
self.db.empty_string = parse_config_string(
|
||||
config.get(name, 'empty_string'))
|
||||
else:
|
||||
self.db.empty_string = EMPTY_STRING
|
||||
if not DRY_RUN:
|
||||
# optionnal null and empty_string per table parameters
|
||||
if config.has_option(name, 'null'):
|
||||
self.db.null = parse_config_string(config.get(name, 'null'))
|
||||
else:
|
||||
self.db.null = NULL
|
||||
|
||||
if config.has_option(name, 'empty_string'):
|
||||
self.db.empty_string = parse_config_string(
|
||||
config.get(name, 'empty_string'))
|
||||
else:
|
||||
self.db.empty_string = EMPTY_STRING
|
||||
|
||||
# optionnal field separator
|
||||
self.field_sep = FIELD_SEP
|
||||
@ -59,7 +61,7 @@ class DataReader:
|
||||
if self.db.copy_sep is None:
|
||||
self.db.copy_sep = self.field_sep
|
||||
|
||||
if DEBUG:
|
||||
if DEBUG and not DRY_RUN:
|
||||
print "null: '%s'" % self.db.null
|
||||
print "empty_string: '%s'" % self.db.empty_string
|
||||
|
||||
|
||||
@ -30,10 +30,17 @@ class TextReader(DataReader):
|
||||
- ...
|
||||
"""
|
||||
|
||||
def __init__(self, db, reject, filename, table, columns, newline_escapes):
|
||||
""" init textreader with a newline_escapes parameter """
|
||||
DataReader.__init__(self, db, reject, filename, table, columns)
|
||||
|
||||
self.newline_escapes = newline_escapes
|
||||
|
||||
|
||||
def readconfig(self, name, config):
|
||||
""" get this reader module configuration from config file """
|
||||
DataReader.readconfig(self, name, config)
|
||||
|
||||
|
||||
# optionnal number of columns per line
|
||||
self.field_count = None
|
||||
if config.has_option(name, 'field_count'):
|
||||
@ -44,27 +51,6 @@ class TextReader(DataReader):
|
||||
if config.has_option(name, 'trailing_sep'):
|
||||
self.trailing_sep = config.get(name, 'trailing_sep') == 'True'
|
||||
|
||||
# optionnal newline escaped option
|
||||
self.newline_escapes = []
|
||||
if config.has_option(name, 'newline_escapes'):
|
||||
if NEWLINE_ESCAPES is not None:
|
||||
# this parameter is globally set, will ignore local
|
||||
# definition
|
||||
if not QUIET:
|
||||
print "Warning: ignoring %s newline_escapes option" % name
|
||||
print " option is set to '%s' globally" \
|
||||
% NEWLINE_ESCAPES
|
||||
else:
|
||||
self._parse_fields('newline_escapes',
|
||||
config.get(name, 'newline_escapes'),
|
||||
argtype = 'char')
|
||||
|
||||
if NEWLINE_ESCAPES is not None:
|
||||
# set NEWLINE_ESCAPES for each table column
|
||||
self.newline_escapes = [(a, NEWLINE_ESCAPES)
|
||||
for (a, x) in self.columns]
|
||||
|
||||
|
||||
|
||||
def readlines(self):
|
||||
""" read data from configured file, and generate (yields) for
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user