new logging options documentation

This commit is contained in:
dim 2007-12-07 14:37:28 +00:00
parent f0d6a8a37e
commit efecde9f5b
10 changed files with 248 additions and 255 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
pgloader (2.2.5~dev.1-2) unstable; urgency=low
* Now using proper python logging module
* New client_min_messages and log_min_messages options
-- Dimitri Fontaine <dim@tapoueh.org> Fri, 07 Dec 2007 14:48:08 +0100
pgloader (2.2.5~dev-1) unstable; urgency=low
* Configuration now supports templates

View File

@ -73,6 +73,10 @@ refers to a PostgreSQL table into which some data is to be loaded.
makes pgloader very quiet about what it does: only output errors.
-l, --loglevel::
log level to use when reporting to the console, see +client_min_messages+.
-s, --summary::
makes pgloader print a 'nice' summary at the end of operations.
@ -264,6 +268,18 @@ modules.
If the +-R+ or +--reformat_path+ command line option is used, it will
have precedence over configuration file setting.
client_min_messages::
Minimum level of messages to print to the console while
running. Defined levels are +DEBUG+, +INFO+, +WARNING+, +ERROR+,
+CRITICAL+, from min to max. +
log_min_messages::
Minimum level of messages to print out to the log file, which
defaults to +/tmp/pgloader.log+. See +client_min_messages+ for
available levels.
== COMMON FORMAT CONFIGURATION PARAMETERS ==
You then can define any number of data section, and give them an arbitrary

View File

@ -49,6 +49,10 @@ def parse_options():
default = False,
help = "be quiet, only print out errors")
parser.add_option("-l", "--level", dest = "loglevel",
default = None,
help = "loglevel to use: ERROR, WARNING, INFO, DEBUG")
parser.add_option("-s", "--summary", action = "store_true",
dest = "summary",
default = False,
@ -142,7 +146,10 @@ def parse_options():
pgloader.options.REFORMAT_PATH = opts.reformat_path
import logging
if opts.debug:
if opts.loglevel:
loglevel = pgloader.logger.level(opts.loglevel)
pgloader.options.CLIENT_MIN_MESSAGES = loglevel
elif opts.debug:
pgloader.options.CLIENT_MIN_MESSAGES = logging.DEBUG
elif opts.verbose:
pgloader.options.CLIENT_MIN_MESSAGES = logging.INFO
@ -190,16 +197,16 @@ def parse_config(conffile):
else:
pgloader.options.LOG_MIN_MESSAGES = NOTICE
log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES,
pgloader.options.LOG_MIN_MESSAGES,
'/tmp/pgloader.log')
pgloader.logger.log = log
pgloader.log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES,
pgloader.options.LOG_MIN_MESSAGES,
'/tmp/pgloader.log')
pgloader.log.info("Logger initialized")
pgloader.log.debug("PHOQUE")
log.info("Logger initialized")
log.debug("PHOQUE")
if DRY_RUN:
pgloader.log.info("dry run mode, not connecting to database")
log.info("dry run mode, not connecting to database")
return config, None
try:
@ -259,7 +266,7 @@ def parse_config(conffile):
pgloader.options.REFORMAT_PATH = rpath
except Exception, error:
pgloader.log.error("Could not initialize PostgreSQL connection:")
log.error("Could not initialize PostgreSQL connection:")
print error
sys.exit(6)
@ -383,6 +390,7 @@ def load_data():
# now init db connection
config, dbconn = parse_config(conffile)
from pgloader.logger import log
from pgloader.tools import read_path, check_path
from pgloader.options import VERBOSE
import pgloader.options
@ -405,7 +413,7 @@ def load_data():
else:
pgloader.options.REFORMAT_PATH = rpath
pgloader.log.info('Reformat path is %s', pgloader.options.REFORMAT_PATH)
log.info('Reformat path is %s', pgloader.options.REFORMAT_PATH)
# load some pgloader package modules
from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY
@ -427,9 +435,9 @@ def load_data():
if s != 'pgsql':
sections.append(s)
pgloader.log.info('Will consider following sections:')
log.info('Will consider following sections:')
for line in myprint(sections):
pgloader.log.info(line)
log.info(line)
# we count time passed from now on
begin = time.time()
@ -444,24 +452,24 @@ def load_data():
loader.run()
summary[s] = (loader.table,) + loader.summary()
else:
pgloader.log.info("Skipping section %s, which is a template" \
log.info("Skipping section %s, which is a template" \
% s)
except PGLoader_Error, e:
if e == '':
pgloader.log.error('[%s] Please correct previous errors' % s)
log.error('[%s] Please correct previous errors' % s)
else:
pgloader.log.error('%s' % e)
log.error('%s' % e)
if PEDANTIC:
pgloader.print_stats()
except UnicodeDecodeError, e:
pgloader.log.error("can't open '%s' with given input encoding '%s'" \
log.error("can't open '%s' with given input encoding '%s'" \
% (loader.filename, loader.input_encoding))
except KeyboardInterrupt:
pgloader.log.warning("Aborting on user demand (Interrupt)")
log.warning("Aborting on user demand (Interrupt)")
# total duration
td = time.time() - begin
@ -472,10 +480,10 @@ def load_data():
retcode = print_summary(dbconn, sections, summary, td)
print
except PGLoader_Error, e:
pgloader.log.error("Can't print summary: %s" % e)
log.error("Can't print summary: %s" % e)
if VACUUM and not DRY_RUN:
pgloader.log.info('vacuumdb... ')
log.info('vacuumdb... ')
try:
dbconn.vacuum()
except KeyboardInterrupt:

View File

@ -12,12 +12,12 @@ from options import INPUT_ENCODING, PG_CLIENT_ENCODING, DATESTYLE
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
from tools import PGLoader_Error
from logger import log
try:
import psycopg2.psycopg1 as psycopg
except ImportError:
if VERBOSE:
print 'No psycopg2 module found, trying psycopg1'
log.info('No psycopg2 module found, trying psycopg1')
import psycopg
class db:
@ -27,6 +27,7 @@ class db:
client_encoding = PG_CLIENT_ENCODING,
copy_every = 10000, commit_every = 1000, connect = True):
""" Connects to the specified database """
self.log = log
self.dbconn = None
self.dsn = "host=%s port=%d user=%s dbname=%s password=%s" \
% (host, port, user, base, passwd)
@ -61,11 +62,9 @@ class db:
def set_encoding(self):
""" set connection encoding to self.client_encoding """
if DEBUG:
# debug only cause reconnecting happens on every
# configured section
print 'Setting client encoding to %s' % self.client_encoding
# debug only cause reconnecting happens on every
# configured section
self.log.debug('Setting client encoding to %s', self.client_encoding)
sql = 'set session client_encoding to %s'
cursor = self.dbconn.cursor()
@ -81,10 +80,9 @@ class db:
if self.datestyle is None:
return
if DEBUG:
# debug only cause reconnecting happens on every
# configured section
print 'Setting datestyle to %s' % self.datestyle
# debug only cause reconnecting happens on every
# configured section
self.log.debug('Setting datestyle to %s', self.datestyle)
sql = 'set session datestyle to %s'
cursor = self.dbconn.cursor()
@ -103,13 +101,11 @@ class db:
self.first_commit_time = self.last_commit_time
self.partial_coldef = None
if DEBUG:
if self.dbconn is not None:
print 'Debug: closing current connection'
self.dbconn.close()
if self.dbconn is not None:
self.log.debug('Debug: closing current connection')
self.dbconn.close()
if DEBUG:
print 'Debug: connecting to dns %s' % self.dsn
self.log.debug('Debug: connecting to dns %s', self.dsn)
self.dbconn = psycopg.connect(self.dsn)
self.set_encoding()
@ -120,15 +116,15 @@ class db:
d = time.time() - self.first_commit_time
u = self.commited_rows
c = self.commits
print " %d updates in %d commits took %5.3f seconds" % (u, c, d)
self.log.info(" %d updates in %d commits took %5.3f seconds", u, c, d)
if self.errors > 0:
print " %d database errors occured" % self.errors
self.log.error("%d database errors occured", self.errors)
if self.copy and not VACUUM:
print " Please do VACUUM your database to recover space"
self.log.info("Please VACUUM your database to recover space")
else:
if u > 0:
print " No database error occured"
self.log.info("No database error occured")
return
def is_null(self, value):
@ -142,43 +138,36 @@ class db:
def truncate(self, table):
""" issue an SQL TRUNCATE TABLE on given table """
if DRY_RUN:
if VERBOSE:
print "Notice: won't truncate tables on dry-run mode"
self.log.info("Won't truncate tables on dry-run mode")
return
sql = "TRUNCATE TABLE %s;" % table
if VERBOSE:
print 'Notice: %s' % sql
self.log.info('%s' % sql)
try:
cursor = self.dbconn.cursor()
cursor.execute(sql)
self.dbconn.commit()
except Exception, error:
if VERBOSE:
print error
self.log.error(error)
raise PGLoader_Error, "Couldn't truncate table %s" % table
def vacuum(self):
""" issue an vacuumdb -fvz database """
if DRY_RUN:
if VERBOSE:
print
print 'Notice: no vacuum in dry-run mode'
self.log.info('no vacuum in dry-run mode')
return -1
command = "/usr/bin/vacuumdb %s -fvz %s 2>&1" \
% (self.connect, self.base)
if VERBOSE:
print command
self.log.info(command)
out = os.popen(command)
for line in out.readlines():
if DEBUG:
# don't print \n
print line[:-1]
# don't print \n
self.log.debug(line[:-1])
return out.close()
@ -213,8 +202,7 @@ class db:
values.append(rowids[name])
sql += ";"
if DEBUG:
print 'Debug: %s' % sql
self.log.debug('%s' % sql)
try:
cursor = self.dbconn.cursor()
@ -229,8 +217,8 @@ class db:
for i,v in rowids.items():
if str_rowids != "": str_rowids += ", "
str_rowids += "%s:%s" % (i, v)
print '%s %s %s %6do' \
% (table, str_rowids, blob_cname, len(data))
self.log.debug('%s %s %s %6do',
table, str_rowids, blob_cname, len(data))
if self.running_commands == self.commit_every:
now = time.time()
@ -240,9 +228,8 @@ class db:
duration = now - self.last_commit_time
self.last_commit_time = now
if not QUIET:
print "-- commit %d: %d updates in %5.3fs --" \
% (self.commits, self.running_commands, duration)
self.log.info("commit %d: %d updates in %5.3fs",
self.commits, self.running_commands, duration)
self.commited_rows += self.running_commands
self.running_commands = 1
@ -256,9 +243,9 @@ class db:
self.dbconn.commit()
# don't use self.commited_rows here, it's only updated
# after a commit
print "Error: update %d rejected: commiting (read log file %s)" \
% (self.commits * self.commit_every + self.running_commands,
reject.reject_log)
self.log.error("update %d rejected: commiting (read log file %s)",
self.commits * self.commit_every + self.running_commands,
reject.reject_log)
reject.log(str(e), input_line)
self.errors += 1
@ -273,10 +260,7 @@ class db:
suffix='.pgloader', dir='/tmp')
os.write(f, self.buffer.getvalue())
os.close(f)
# systematicaly write about this
if not QUIET:
print " -- COPY data buffer saved in %s --" % n
self.log.warning("COPY data buffer saved in %s" % n)
return n
def copy_from(self, table, columnlist, columns, input_line,
@ -296,8 +280,7 @@ class db:
# time to copy data to PostgreSQL table
if self.buffer is None:
if VERBOSE:
print "Error: no data to COPY"
self.log.warning("no data to COPY")
return False
if DEBUG:
@ -315,9 +298,8 @@ class db:
duration = now - self.last_commit_time
self.last_commit_time = now
if not QUIET:
print " - COPY %d: %d rows copied in %5.3fs --" \
% (self.commits, self.running_commands, duration)
self.log.info("COPY %d: %d rows copied in %5.3fs",
self.commits, self.running_commands, duration)
# prepare next run
self.buffer.close()
@ -329,12 +311,11 @@ class db:
# rollback current transaction
self.dbconn.rollback()
if VERBOSE:
print 'Notice: COPY error, trying to find on which line'
if not DEBUG:
# in DEBUG mode, copy buffer has already been saved
# to file
self.save_copy_buffer(tablename)
self.log.warning('COPY error, trying to find on which line')
if not DEBUG:
# in DEBUG mode, copy buffer has already been saved
# to file
self.save_copy_buffer(tablename)
# copy recovery process
now = time.time()
@ -347,9 +328,8 @@ class db:
self.commited_rows += ok
self.errors += ko
if VERBOSE:
print 'Notice: COPY error recovery done (%d/%d) in %5.3fs'\
% (ko, ok, duration)
self.log.warning('COPY error recovery done (%d/%d) in %5.3fs',
ko, ok, duration)
# commit this transaction
self.dbconn.commit()
@ -379,8 +359,7 @@ class db:
if count == 1:
reject.log('COPY error on this line', buff.getvalue())
buff.close()
if DEBUG:
print '--- Notice: found one more line in error'
self.log.debug('found one more line in error')
# returns commits, ok, ko
return 0, 0, 1
@ -407,9 +386,8 @@ class db:
# we don't need no more orgininal buff
buff.close()
if DEBUG:
print '--- Trying to find errors, dividing %d lines in %d and %d' \
% (count, m, n-m)
self.log.debug('Trying to find errors, dividing %d lines in %d and %d',
count, m, n-m)
# now we have two buffers to copy to PostgreSQL database
cursor = self.dbconn.cursor()
@ -420,9 +398,8 @@ class db:
self.dbconn.commit()
x.close()
if DEBUG:
print " -- COPY ERROR handling progress: %d rows copied"\
% (xcount)
self.log.debug("COPY ERROR handling progress: %d rows copied",
xcount)
x.close()
commits += 1
@ -435,9 +412,8 @@ class db:
if xcount == 1:
ko += 1
reject.log('COPY error: %s' % error, x.getvalue())
if DEBUG:
print '--- Notice: found one more line in error'
print x.getvalue()
self.log.debug('Notice: found one more line in error')
self.log.debug(x.getvalue())
else:
_c, _o, _k = self.copy_from_buff(table, x, xcount, reject)
@ -464,8 +440,7 @@ class db:
elif self.is_empty(c):
# empty string has been read
if DEBUG:
print "empty string read: '%s'" % c
self.log.debug("empty string read: '%s'" % c)
self.buffer.write('')
else:

View File

@ -38,8 +38,9 @@ class ifx_lo:
class ifx_clob(ifx_lo):
""" Informix Text Large Object file """
def __init__(self, filename, input_encoding):
def __init__(self, log, filename, input_encoding):
""" init a clob object """
self.log = log
self.file = None
self.filename = filename
@ -51,14 +52,14 @@ class ifx_clob(ifx_lo):
else:
self.file = open(self.filename, 'r')
if VERBOSE:
print "Notice: Opening informix clob file:", self.filename
self.log.info("Opening informix clob file: %s", self.filename)
class ifx_blob(ifx_lo):
""" Informix Binary Large Object file """
def __init__(self, filename, field_sep):
def __init__(self, log, filename, field_sep):
""" init a clob object """
self.log = log
self.file = None
self.filename = filename
self.field_sep = field_sep # used by bytea_escape
@ -69,8 +70,7 @@ class ifx_blob(ifx_lo):
if self.file is None:
self.file = open(self.filename, 'rb')
if VERBOSE:
print "Notice: Opening informix blob file:", self.filename
self.log.info("Opening informix blob file: %s", self.filename)
def bytea_escape(self, bitstring):
""" escape chars from bitstring for PostgreSQL bytea input

View File

@ -22,12 +22,16 @@ def init(client_min_messages = logging.INFO,
console = logging.StreamHandler()
console.setLevel(client_min_messages)
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
formatter = logging.Formatter('%(name)-12s %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
return logging.getLogger('pgloader')
def getLogger(name):
""" return a new logger instance named name, properly configured """
return logging.getLogger(name)
def level(name):
""" return a logging level from user string """
@ -38,13 +42,13 @@ def level(name):
return logging.INFO
elif name.upper() == 'WARNING':
return logging.INFO
return logging.WARNING
elif name.upper() == 'ERROR':
return logging.INFO
return logging.ERROR
elif name.upper() == 'CRITICAL':
return logging.INFO
return logging.CRITICAL
else:
return logging.NOTSET

View File

@ -8,6 +8,7 @@
import os, sys, os.path, time, codecs
from cStringIO import StringIO
from logger import log, getLogger
from tools import PGLoader_Error, Reject, parse_config_string
from db import db
from lo import ifx_clob, ifx_blob
@ -30,8 +31,9 @@ class PGLoader:
def __init__(self, name, config, db):
""" Init with a configuration section """
# Some settings
self.name = name
self.db = db
self.name = name
self.db = db
self.log = getLogger(name)
self.template = None
self.use_template = None
@ -51,13 +53,10 @@ class PGLoader:
if config.has_option(name, 'template'):
self.template = True
# just skip it here
if VERBOSE:
print "[%s] is a template" % self.name
self.log.info("[%s] is a template", self.name)
if not self.template and VERBOSE:
print "[%s] parse configuration" % self.name
if not self.template:
self.log.info("[%s] parse configuration", self.name)
if not self.template:
# check if the section wants to use a template
@ -71,43 +70,43 @@ class PGLoader:
raise PGLoader_Error, m
# first load template configuration
if VERBOSE:
print "Reading configuration from template section [%s]" \
% self.template
self.log.info("Reading configuration from template " +\
"section [%s]", self.template)
self.real_log = self.log
self.log = getLogger(self.template)
try:
self.__read_conf__(self.template, config, db,
want_template = True)
except PGLoader_Error, e:
print
print e
self.log.error(e)
m = "%s.use_template does not refer to a template section"\
% name
raise PGLoader_Error, m
# reinit self.template now its relative config section is read
self.template = None
self.log = self.real_log
# now load specific configuration
if VERBOSE:
print "Reading configuration from section [%s]" % name
self.log.info("Reading configuration from section [%s]", name)
self.__read_conf__(name, config, db)
# force reinit of self.reader, which depends on template and
# specific options
if 'reader' in self.__dict__:
self.reader.__init__(self.db, self.reject,
self.reader.__init__(self.log, self.db, self.reject,
self.filename, self.input_encoding,
self.table, self.columns)
# Now reset database connection
if not DRY_RUN:
self.db.log = self.log
self.db.reset()
if DEBUG:
print '%s init done' % name
print
self.log.debug('%s init done' % name)
def __read_conf__(self, name, config, db, want_template = False):
""" init self from config section name """
@ -132,11 +131,10 @@ class PGLoader:
# reject logging
if not self.template:
self.reject = Reject(self.reject_log, self.reject_data)
self.reject = Reject(self.log, self.reject_log, self.reject_data)
if VERBOSE:
print 'Notice: reject log in %s' % self.reject.reject_log
print 'Notice: rejected data in %s' % self.reject.reject_data
self.log.info('reject log in %s', self.reject.reject_log)
self.log.info('rejected data in %s', self.reject.reject_data)
else:
# needed to instanciate self.reader while in template section
@ -147,8 +145,8 @@ class PGLoader:
self.db.client_encoding = parse_config_string(
config.get(name, 'client_encoding'))
if DEBUG and not DRY_RUN:
print "client_encoding: '%s'" % self.db.client_encoding
if not DRY_RUN:
self.log.debug("client_encoding: '%s'", self.db.client_encoding)
# optionnal local option input_encoding
self.input_encoding = INPUT_ENCODING
@ -156,28 +154,25 @@ class PGLoader:
self.input_encoding = parse_config_string(
config.get(name, 'input_encoding'))
if DEBUG:
print "input_encoding: '%s'" % self.input_encoding
self.log.debug("input_encoding: '%s'", self.input_encoding)
# optionnal local option datestyle
if not DRY_RUN and config.has_option(name, 'datestyle'):
self.db.datestyle = parse_config_string(
config.get(name, 'datestyle'))
if DEBUG and not DRY_RUN:
print "datestyle: '%s'" % self.db.datestyle
if not DRY_RUN:
self.log.debug("datestyle: '%s'", self.db.datestyle)
##
# data filename
for opt in ('table', 'filename'):
if config.has_option(name, opt):
if DEBUG:
print '%s.%s: %s' % (name, opt, config.get(name, opt))
self.log.debug('%s.%s: %s', name, opt, config.get(name, opt))
self.__dict__[opt] = config.get(name, opt)
else:
if not self.template and opt not in self.__dict__:
print 'Error: please configure %s.%s' % (name, opt)
if not self.template and not self.__dict__[opt]:
self.log.error('Error: please configure %s.%s', name, opt)
self.config_errors += 1
else:
# Reading Configuration Template section
@ -198,14 +193,13 @@ class PGLoader:
config.get(name, 'blob_columns'),
btype = True)
if DEBUG:
print 'index', self.index
print 'columns', self.columns
print 'blob_columns', self.blob_cols
self.log.debug('index %s', str(self.index))
self.log.debug('columns %s', str(self.columns))
self.log.debug('blob_columns %s', str(self.blob_cols))
if self.columns is None:
if not self.template:
print 'Error: %s has no columns defined' % name
self.log.error('%s has no columns defined', name)
self.config_errors += 1
else:
@ -231,8 +225,7 @@ class PGLoader:
else:
self.udcs = None
if DEBUG:
print 'udcs:', self.udcs
self.log.debug('udcs: %s', str(self.udcs))
# better check there's no user defined column overriding file
# columns
@ -245,9 +238,9 @@ class PGLoader:
if errs:
for c in errs:
print 'Error: %s is configured both as a ' % c +\
'%s.columns entry and as a user-defined column' \
% name
self.log.error('%s is configured both as a %s.columns ' +\
'entry and as a user-defined column',
c, name)
self.config_errors += 1
@ -271,9 +264,9 @@ class PGLoader:
# just add all the given columns in this pass
self.copy_columns.append(x)
if DEBUG:
print 'config copy_columns', self.config_copy_columns
print 'copy_columns', self.copy_columns
self.log.debug('config copy_columns %s',
str(self.config_copy_columns))
self.log.debug('copy_columns %s', str(self.copy_columns))
if not self.template:
# check for errors time!
@ -283,20 +276,20 @@ class PGLoader:
for x in self.copy_columns:
if x not in namelist:
print 'Error: "%s" not in %s column list, ' \
% (x, name) +\
'including user defined columns'
self.log.error('"%s" not in %s column list, ' + \
'including user defined columns',
x, name)
self.config_errors += 1
if len(self.copy_columns) != len(self.config_copy_columns):
print 'Error: %s.copy_columns refers to ' % name +\
'unconfigured columns '
self.log.error('%s.copy_columns refers to ' +\
'unconfigured columns', name)
self.config_errors += 1
elif self.udcs:
print 'Error: section %s does not define ' % name +\
'copy_columns but uses user-defined columns'
self.log.error('section %s does not define copy_columns ' +\
'but uses user-defined columns', name)
self.config_errors += 1
@ -306,10 +299,9 @@ class PGLoader:
if 'copy_columns' in self.__dict__ and self.copy_columns:
self.columnlist = self.copy_columns
if DEBUG:
print 'udcs:', self.udcs
if self.udcs and 'copy_columns' in self.__dict__:
print 'copy_columns', self.copy_columns
self.log.debug('udcs: %s', str(self.udcs))
if self.udcs and 'copy_columns' in self.__dict__:
self.log.debug('copy_columns %s', str(self.copy_columns))
##
# We have for example columns = col1:2, col2:1
@ -336,8 +328,8 @@ class PGLoader:
if config.has_option(name, 'only_cols'):
if self.udcs:
print 'Error: section %s defines both ' % name +\
'user-defined columns and only_cols'
self.log.error('section %s defines both ' +\
'user-defined columns and only_cols', name)
self.config_errors += 1
@ -365,8 +357,8 @@ class PGLoader:
self.columnlist = [self.columns[x-1][0] for x in expanded]
except Exception, e:
print 'Error: section %s, only_cols: ' % name +\
'configured range is invalid'
self.log.error('section %s, only_cols: ' +\
'configured range is invalid', name)
raise PGLoader_Error, e
if self.only_cols is None:
@ -374,9 +366,8 @@ class PGLoader:
# default case, no user-defined cols, no restriction
self.columnlist = [n for (n, pos) in self.columns]
if DEBUG:
print "only_cols", self.only_cols
print "columnlist", self.columnlist
self.log.debug("only_cols %s", str(self.only_cols))
self.log.debug("columnlist %s", str(self.columnlist))
##
# This option is textreader specific, but being lazy and
@ -389,10 +380,9 @@ class PGLoader:
if NEWLINE_ESCAPES is not None:
# this parameter is globally set, will ignore local
# definition
if not QUIET:
print "Warning: ignoring %s newline_escapes option" % name
print " option is set to '%s' globally" \
% NEWLINE_ESCAPES
self.log.warning("ignoring %s newline_escapes option" +\
"option is set to '%s' globally",
name, NEWLINE_ESCAPES)
else:
self._parse_fields('newline_escapes',
config.get(name, 'newline_escapes'),
@ -408,26 +398,25 @@ class PGLoader:
if self.format.lower() == 'csv':
from csvreader import CSVReader
self.reader = CSVReader(self.db, self.reject,
self.reader = CSVReader(self.log, self.db, self.reject,
self.filename, self.input_encoding,
self.table, self.columns)
elif self.format.lower() == 'text':
from textreader import TextReader
self.reader = TextReader(self.db, self.reject,
self.reader = TextReader(self.log, self.db, self.reject,
self.filename, self.input_encoding,
self.table, self.columns,
self.newline_escapes)
if 'reader' in self.__dict__:
if DEBUG:
print 'reader.readconfig()'
self.log.debug('reader.readconfig()')
self.reader.readconfig(name, config)
if not self.template and \
('format' not in self.__dict__ or self.format is None):
# error only when not loading the Template part
print 'Error: %s: format parameter needed' % name
self.log.Error('%s: format parameter needed', name)
raise PGLoader_Error
##
@ -440,8 +429,7 @@ class PGLoader:
if 'c_reformat' not in self.__dict__:
self.c_reformat = self.reformat = None
if DEBUG:
print 'reformat', self.c_reformat
self.log.debug('reformat %s', str(self.c_reformat))
# check the configure reformating is available
if not self.template and self.c_reformat:
@ -450,8 +438,8 @@ class PGLoader:
for r_colname, r_module, r_function in self.c_reformat:
if r_colname not in self.columnlist:
print 'Error: %s.reformat refers to unknown column %s' \
% ( name, r_colname )
self.log.error('%s.reformat refers to unknown column %s',
name, r_colname)
self.config_errors += 1
# load the given module name and function
@ -460,17 +448,16 @@ class PGLoader:
fp, pathname, description = \
imp.find_module(r_module, REFORMAT_PATH)
if DEBUG:
print 'Found %s at %s' % (r_module, pathname)
self.log.debug('Found %s at %s', r_module, str(pathname))
module = imp.load_module(r_module,
fp, pathname, description)
except ImportError, e:
print 'Error: %s failed to import reformat module "%s"' \
% (name, r_module)
print ' from %s' % str(REFORMAT_PATH)
print ' %s' % e
self.log.error('%s failed to import reformat module "%s"',
name, r_module)
self.log.error('from %s', str(REFORMAT_PATH))
self.log.error(e)
self.config_errors += 1
@ -479,12 +466,12 @@ class PGLoader:
self.reformat.append((r_colname,
module.__dict__[r_function]))
else:
print 'Error: reformat module %s has no %s function'%\
(r_module, r_function)
self.log.error('reformat module %s has no %s function',
r_module, r_function)
self.config_errors += 1
if DEBUG and not self.template:
print 'reformat', self.reformat
if not self.template:
self.log.debug('reformat %s', str(self.reformat))
##
# How can we mix those columns definitions ?
@ -530,7 +517,7 @@ class PGLoader:
'Error: unable to parse given key %s' % FROM_ID)
raise PGLoader_Error
print 'Notice: composite key found, -I evaluated to %s' % FROM_ID
self.log.info('composite key found, -I evaluated to %s', FROM_ID)
if self.config_errors > 0:
mesg = 'Configuration errors for section %s' % self.name
@ -620,22 +607,18 @@ class PGLoader:
""" depending on configuration, do given job """
# Announce the beginning of the work
if not QUIET:
print
print "[%s]" % self.name
self.log.info("[%s]" % self.name)
if TRUNCATE and not DRY_RUN:
self.db.truncate(self.table)
if self.columns is not None:
if not QUIET:
print "Notice: COPY csv data"
self.log.info("COPY csv data")
self.data_import()
elif self.blob_cols is not None:
# elif: COPY process also blob data
if not QUIET:
print "Notice: UPDATE blob data"
self.log.info("UPDATE blob data")
# then show up some stats
self.print_stats()
@ -666,10 +649,9 @@ class PGLoader:
else:
data.append(columns[cpos-1])
if DEBUG:
print 'reformat'
print 'columns', columns
print 'data ', data
self.log.debug('reformat')
self.log.debug('columns %s', str(columns))
self.log.debug('data %s', str(data))
# we want next steps to take reformated data as input
columns = data
@ -683,23 +665,20 @@ class PGLoader:
else:
data.append(dudcs[c])
if DEBUG:
print 'udcs'
print 'columns', columns
print 'data ', data
self.log.debug('udcs')
self.log.debug('columns %s', str(columns))
self.log.debug('data %s', str(data))
columns = data
else:
if self.col_mapping:
if DEBUG:
print 'col_mapping', self.col_mapping
self.log.debug('col_mapping %s', str(self.col_mapping))
data = [columns[i-1] for i in self.col_mapping]
if DEBUG:
print 'columns', columns
print 'data ', data
self.log.debug('columns %s', str(columns))
self.log.debug('data %s', str(data))
columns = data
@ -712,11 +691,14 @@ class PGLoader:
and not self.only_cols:
data = columns
if DRY_RUN or DEBUG:
print '<', line
print ' ', self.columnlist
print '>', data
print
if DRY_RUN:
self.log.info('< %s', line)
self.log.info(' %s', str(self.columnlist))
self.log.info('> %s', str(data))
else:
self.log.debug('< %s', line)
self.log.debug(' %s', str(self.columnlist))
self.log.debug('> %s', str(data))
if not DRY_RUN:
self.db.copy_from(self.table, self.columnlist,
@ -742,10 +724,8 @@ class PGLoader:
columns, rowids = self.read_blob(line, columns)
# and insert it into database
if DRY_RUN or DEBUG:
print line
print columns
print
self.log.debug(line)
self.log.debug(str(line))
if not DRY_RUN:
self.db.insert_blob(self.table, self.index,
@ -810,12 +790,14 @@ class PGLoader:
raise PGLoader_Error, msg
elif btype == 'ifx_blob':
self.blobs[abs_blobname] = ifx_blob(abs_blobname,
self.blobs[abs_blobname] = ifx_blob(self.log,
abs_blobname,
self.field_sep)
elif btype == 'ifx_clob':
self.blobs[abs_blobname] = \
ifx_clob(abs_blobname,
ifx_clob(self.log,
abs_blobname,
self.input_encoding)
blob = self.blobs[abs_blobname]
@ -828,11 +810,9 @@ class PGLoader:
data = blob.extract(rowids, c, begin, length)
columns[blob_col] = data
if DEBUG:
print 'Debug: read blob data'
print rowids
print data
print
self.log.debug('read blob data')
self.log.debug(str(rowids))
self.log.debug(str(data))
# no we have read all defined blob data, returns new columns
return columns, rowids

View File

@ -20,11 +20,12 @@ class DataReader:
- multi-line support is explicit (via
"""
def __init__(self, db, reject, filename, input_encoding, table, columns):
def __init__(self, log, db, reject,
filename, input_encoding, table, columns):
""" init internal variables """
if DEBUG:
print 'reader __init__', filename, table, columns
log.debug('reader __init__ %s %s %s', filename, table, columns)
self.log = log
self.db = db
self.filename = filename
self.input_encoding = input_encoding
@ -52,13 +53,15 @@ class DataReader:
if config.has_option(name, 'null'):
self.db.null = parse_config_string(config.get(name, 'null'))
else:
self.db.null = NULL
if 'null' not in self.__dict__:
self.db.null = NULL
if config.has_option(name, 'empty_string'):
self.db.empty_string = parse_config_string(
config.get(name, 'empty_string'))
else:
self.db.empty_string = EMPTY_STRING
if 'empty_string' not in self.__dict__:
self.db.empty_string = EMPTY_STRING
# optionnal field separator, could be defined from template
if 'field_sep' not in self.__dict__:
@ -71,11 +74,11 @@ class DataReader:
if self.db.copy_sep is None:
self.db.copy_sep = self.field_sep
if DEBUG and not DRY_RUN:
print "reader.readconfig null: '%s'" % self.db.null
print "reader.readconfig empty_string: '%s'" \
% self.db.empty_string
print "reader.readconfig field_sep: '%s'" % self.field_sep
if not DRY_RUN:
self.log.debug("reader.readconfig null: '%s'" % self.db.null)
self.log.debug("reader.readconfig empty_string: '%s'",
self.db.empty_string)
self.log.debug("reader.readconfig field_sep: '%s'", self.field_sep)
def readlines(self):
""" read data from configured file, and generate (yields) for

View File

@ -30,10 +30,10 @@ class TextReader(DataReader):
- ...
"""
def __init__(self, db, reject, filename, input_encoding,
def __init__(self, log, db, reject, filename, input_encoding,
table, columns, newline_escapes = None):
""" init textreader with a newline_escapes parameter """
DataReader.__init__(self, db, reject,
DataReader.__init__(self, log, db, reject,
filename, input_encoding, table, columns)
if 'newline_escapes' not in self.__dict__:
@ -61,9 +61,8 @@ class TextReader(DataReader):
if config.has_option(name, 'trailing_sep'):
self.trailing_sep = config.get(name, 'trailing_sep') == 'True'
if DEBUG:
print 'reader.readconfig: field_count', self.field_count
print 'reader.readconfig: trailing_sep', self.trailing_sep
self.log.debug('reader.readconfig: field_count %s', self.field_count)
self.log.debug('reader.readconfig: trailing_sep %s', self.trailing_sep)
def readlines(self):
""" read data from configured file, and generate (yields) for
@ -79,8 +78,7 @@ class TextReader(DataReader):
##
# if neither -I nor -F was used, we can state that begin = 0
if FROM_ID is None and FROM_COUNT == 0:
if VERBOSE:
print 'Notice: beginning on first line'
self.log.info('beginning on first line')
begin_linenb = 1
if self.input_encoding is not None:

View File

@ -18,8 +18,9 @@ class Reject:
reject_data contains input lines which this tool couldn't manage
"""
def __init__(self, reject_log, reject_data):
def __init__(self, log, reject_log, reject_data):
""" Constructor, with file names """
self._log = log
self.reject_log = reject_log
self.reject_data = reject_data
@ -32,12 +33,13 @@ class Reject:
return
if self.errors == 0:
if not quiet:
print " No data were rejected"
self._log.info("No data were rejected")
else:
print " %d errors found into [%s] data" % (self.errors, name)
print " please read %s for errors log" % self.reject_log
print " and %s for data still to process" % self.reject_data
self._log.error("%d errors found into [%s] data",
self.errors, name)
self._log.error("please read %s for errors log", self.reject_log)
self._log.error("and %s for data still to process",
self.reject_data)
def log(self, messages, data = None):
""" log the messages into reject_log, and the data into reject_data