mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-05 02:46:10 +02:00
Implement support for the CVS field_size_limit, new in python 2.5.
This commit is contained in:
parent
411a9a53d7
commit
1b6c0f4735
2
.gitignore
vendored
2
.gitignore
vendored
@ -15,3 +15,5 @@
|
||||
/reformat/pgtime.pyc
|
||||
/pgloader.1
|
||||
/pgloader.1.html
|
||||
/BUGS.html
|
||||
/TODO.html
|
||||
|
||||
3
debian/changelog
vendored
3
debian/changelog
vendored
@ -1,4 +1,4 @@
|
||||
pgloader (2.3.3-1) unstable; urgency=low
|
||||
pgloader (2.3.3~dev1-1) unstable; urgency=low
|
||||
|
||||
* Implement -f --field-sep to overwrite the default from command line
|
||||
* Add support for filename arguments, which use defaults
|
||||
@ -7,6 +7,7 @@ pgloader (2.3.3-1) unstable; urgency=low
|
||||
* Support setting any PG option (-o and config file)
|
||||
* Have --debug show a traceback
|
||||
* Fix a bug where pgloader would freeze on early error (no such file)
|
||||
* Implement an option to set csv field size limit
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 4 Apr 2010 19:34:39 +0200
|
||||
|
||||
|
||||
@ -121,12 +121,13 @@ fixed_specs = a:0:10, b:10:8, c:18:8, d:26:17
|
||||
reformat = c:pgtime:time
|
||||
|
||||
[csv]
|
||||
table = csv
|
||||
format = csv
|
||||
filename = csv/csv.data
|
||||
field_sep = ,
|
||||
quotechar = "
|
||||
columns = x, y, a, b, d:6, c:5
|
||||
only_cols = 3-6
|
||||
skip_head_lines = 1
|
||||
table = csv
|
||||
format = csv
|
||||
filename = csv/csv.data
|
||||
field_size_limit = 512kB
|
||||
field_sep = ,
|
||||
quotechar = "
|
||||
columns = x, y, a, b, d:6, c:5
|
||||
only_cols = 3-6
|
||||
skip_head_lines = 1
|
||||
|
||||
|
||||
@ -777,6 +777,13 @@ skipinitialspace::
|
||||
When +True+, whitespace immediately following the +delimiter+ is
|
||||
ignored. The default is +False+.
|
||||
|
||||
field_size_limit::
|
||||
|
||||
Sets the maximum field size allowed by the python +CSV+ parser. Accepts
|
||||
an number of bytes (integer), or a string containing a number then one
|
||||
of those units (case sensitive): +kB+, +MB+, +GB+, +TB+. Requires a at
|
||||
least python 2.5.
|
||||
|
||||
== FIXED FORMAT CONFIGURATION PARAMETERS ==
|
||||
|
||||
fixed_specs::
|
||||
|
||||
@ -44,8 +44,10 @@ class CSVReader(DataReader):
|
||||
if self.skipinitialspace is not False:
|
||||
self.skipinitialspace = self.skipinitialspace == 'True'
|
||||
|
||||
self._getopt('field_size_limit', config, name, template, -1, "mem")
|
||||
|
||||
for opt in ['doublequote', 'escapechar',
|
||||
'quotechar', 'skipinitialspace']:
|
||||
'quotechar', 'skipinitialspace', 'field_size_limit']:
|
||||
|
||||
self.log.debug("reader.readconfig %s: '%s'" \
|
||||
% (opt, self.__dict__[opt]))
|
||||
@ -78,6 +80,17 @@ class CSVReader(DataReader):
|
||||
begin_linenb = None
|
||||
last_line_nb = 1
|
||||
|
||||
# set the field_size_limit, from python 2.5
|
||||
if self.field_size_limit != -1:
|
||||
try:
|
||||
csv.field_size_limit(self.field_size_limit)
|
||||
self.log.debug("csv.field_size_limit(%d)" \
|
||||
% self.field_size_limit)
|
||||
except AttributeError:
|
||||
#'module' object has no attribute 'field_size_limit'
|
||||
self.log.warning("field_size_limit is new in python version 2.5")
|
||||
pass
|
||||
|
||||
# now read the lines
|
||||
for columns in csv.reader(self.fd, dialect = 'pgloader'):
|
||||
# we count logical lines
|
||||
|
||||
@ -32,6 +32,10 @@ class DataReader:
|
||||
self.table = table
|
||||
self.columns = columns
|
||||
self.reject = reject
|
||||
self.mem_units = {'kB': 1024,
|
||||
'MB': 1024*1024,
|
||||
'GB': 1024*1024*1024,
|
||||
'TB': 1024*1024*1024*1024}
|
||||
|
||||
if self.input_encoding is None:
|
||||
if INPUT_ENCODING is not None:
|
||||
@ -117,6 +121,21 @@ class DataReader:
|
||||
% (section, option, self.__dict__[option]))
|
||||
raise PGLoader_Error, e
|
||||
|
||||
elif opt_type == 'mem' and self.__dict__[option] is not None:
|
||||
try:
|
||||
opt = self.__dict__[option]
|
||||
if type(opt) == type("string") \
|
||||
and len(opt) > 2 and opt [-2:] in self.mem_units:
|
||||
unit = opt[-2:]
|
||||
size = int(opt[:-2]) * self.mem_units[unit]
|
||||
self.__dict__[option] = int(size)
|
||||
else:
|
||||
self.__dict__[option] = int(self.__dict__[option])
|
||||
except ValueError, e:
|
||||
self.log.error('Configuration option %s.%s is not a memsize: %s' \
|
||||
% (section, option, self.__dict__[option]))
|
||||
raise PGLoader_Error, e
|
||||
|
||||
return self.__dict__[option]
|
||||
|
||||
def readlines(self):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user