Implement --boundaries to load a part of a file only (given in bytes).

This commit is contained in:
Dimitri Fontaine 2010-04-15 16:59:06 +02:00
parent 1738aaa6aa
commit f154d9bf83
6 changed files with 18 additions and 4 deletions

1
debian/changelog vendored
View File

@ -9,6 +9,7 @@ pgloader (2.3.3~dev2-1) unstable; urgency=low
* Fix a bug where pgloader would freeze on early error (no such file) * Fix a bug where pgloader would freeze on early error (no such file)
* Implement an option to set csv field size limit * Implement an option to set csv field size limit
* Implement --load-from-stdin * Implement --load-from-stdin
* Implement --boundaries
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 4 Apr 2010 19:34:39 +0200 -- Dimitri Fontaine <dim@tapoueh.org> Sun, 4 Apr 2010 19:34:39 +0200

View File

@ -12,7 +12,7 @@ client_min_messages = WARNING
;client_encoding = 'utf-8' ;client_encoding = 'utf-8'
client_encoding = 'latin1' client_encoding = 'latin1'
lc_messages = C lc_messages = C
pg_option_client_encoding = 'utf-8' ;pg_option_client_encoding = 'utf-8'
pg_option_standard_conforming_strings = on pg_option_standard_conforming_strings = on
; This setting has no effect other than allowing to check option precedence ; This setting has no effect other than allowing to check option precedence
pg_option_work_mem = 12MB pg_option_work_mem = 12MB

View File

@ -234,6 +234,11 @@ for them.
loading the data to, it's useful when you want to load from +stdin+ and loading the data to, it's useful when you want to load from +stdin+ and
avoid editing a full configuration section. avoid editing a full configuration section.
--boundaries::
Allow for limiting the range of bytes to read and process, must be given
in the X..Y format, with X and Y integers.
== GLOBAL CONFIGURATION SECTION == == GLOBAL CONFIGURATION SECTION ==
The configuration file has a +.ini+ file syntax, its first section has The configuration file has a +.ini+ file syntax, its first section has

View File

@ -56,3 +56,5 @@ REJECT_DATA_FILE = '%s.rej'
LOAD_FROM_STDIN = None LOAD_FROM_STDIN = None
LOAD_TO_TABLE = None LOAD_TO_TABLE = None
FILE_BOUNDARIES = None # (start, end) --- file positions in bytes

View File

@ -289,9 +289,14 @@ class PGLoader(threading.Thread):
self.log.debug('%s.%s: %s', name, opt, config.get(name, opt)) self.log.debug('%s.%s: %s', name, opt, config.get(name, opt))
self.__dict__[opt] = config.get(name, opt) self.__dict__[opt] = config.get(name, opt)
else: else:
if not self.template and not self.__dict__[opt]: if not self.template and opt not in self.__dict__:
msg = "Error: Please configure %s.%s" % (name, opt)
raise PGLoader_Error, msg
elif not self.template and not self.__dict__[opt]:
self.log.error('Error: please configure %s.%s', name, opt) self.log.error('Error: please configure %s.%s', name, opt)
self.config_errors += 1 self.config_errors += 1
else: else:
# Reading Configuration Template section # Reading Configuration Template section
# we want the attribute to exists for further usage # we want the attribute to exists for further usage

View File

@ -198,7 +198,6 @@ class UnbufferedFileReader:
else: else:
f = open(self.filename, self.mode, self.bufsize) f = open(self.filename, self.mode, self.bufsize)
self.log.warning('PHOQUE "%s"', f)
self.fd = codecs.getreader(self.encoding)(f) self.fd = codecs.getreader(self.encoding)(f)
self.log.info("Opened '%s' with encoding '%s'" \ self.log.info("Opened '%s' with encoding '%s'" \
% (self.filename, self.encoding)) % (self.filename, self.encoding))
@ -222,7 +221,7 @@ class UnbufferedFileReader:
self.fd.seek(self.start) self.fd.seek(self.start)
self.position = self.fd.tell() self.position = self.fd.tell()
self.log.debug("Opened '%s' in %s (fileno %s), ftell %d" \ self.log.info("Opened '%s' in %s (fileno %s), ftell %d" \
% (self.filename, self.fd, % (self.filename, self.fd,
self.fd.fileno(), self.position)) self.fd.fileno(), self.position))
return return
@ -285,6 +284,8 @@ class UnbufferedFileReader:
if self.end is not None and self.fd.tell() >= self.end: if self.end is not None and self.fd.tell() >= self.end:
# we want to process current line and stop at next # we want to process current line and stop at next
# iteration # iteration
self.log.info("Reached position %d, reading last line" \
% self.fd.tell())
last_line_read = True last_line_read = True
yield line yield line