diff --git a/debian/changelog b/debian/changelog index aa0f80c..612b500 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +pgloader (2.2.5~dev.1-2) unstable; urgency=low + + * Now using proper python logging module + * New client_min_messages and log_min_messages options + + -- Dimitri Fontaine Fri, 07 Dec 2007 14:48:08 +0100 + pgloader (2.2.5~dev-1) unstable; urgency=low * Configuration now supports templates diff --git a/pgloader.1.txt b/pgloader.1.txt index 63f59aa..aade895 100644 --- a/pgloader.1.txt +++ b/pgloader.1.txt @@ -73,6 +73,10 @@ refers to a PostgreSQL table into which some data is to be loaded. makes pgloader very quiet about what it does: only output errors. +-l, --loglevel:: + + log level to use when reporting to the console, see +client_min_messages+. + -s, --summary:: makes pgloader print a 'nice' summary at the end of operations. @@ -264,6 +268,18 @@ modules. If the +-R+ or +--reformat_path+ command line option is used, it will have precedence over configuration file setting. +client_min_messages:: + + Minimum level of messages to print to the console while + running. Defined levels are +DEBUG+, +INFO+, +WARNING+, +ERROR+, + +CRITICAL+, from min to max. + + +log_min_messages:: + + Minimum level of messages to print out to the log file, which + defaults to +/tmp/pgloader.log+. See +client_min_messages+ for + available levels. + == COMMON FORMAT CONFIGURATION PARAMETERS == You then can define any number of data section, and give them an arbitrary diff --git a/pgloader.py b/pgloader.py index 41ae7fb..95bba24 100644 --- a/pgloader.py +++ b/pgloader.py @@ -49,6 +49,10 @@ def parse_options(): default = False, help = "be quiet, only print out errors") + parser.add_option("-l", "--level", dest = "loglevel", + default = None, + help = "loglevel to use: ERROR, WARNING, INFO, DEBUG") + parser.add_option("-s", "--summary", action = "store_true", dest = "summary", default = False, @@ -142,7 +146,10 @@ def parse_options(): pgloader.options.REFORMAT_PATH = opts.reformat_path import logging - if opts.debug: + if opts.loglevel: + loglevel = pgloader.logger.level(opts.loglevel) + pgloader.options.CLIENT_MIN_MESSAGES = loglevel + elif opts.debug: pgloader.options.CLIENT_MIN_MESSAGES = logging.DEBUG elif opts.verbose: pgloader.options.CLIENT_MIN_MESSAGES = logging.INFO @@ -190,16 +197,16 @@ def parse_config(conffile): else: pgloader.options.LOG_MIN_MESSAGES = NOTICE + log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, + pgloader.options.LOG_MIN_MESSAGES, + '/tmp/pgloader.log') + pgloader.logger.log = log - pgloader.log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, - pgloader.options.LOG_MIN_MESSAGES, - '/tmp/pgloader.log') - - pgloader.log.info("Logger initialized") - pgloader.log.debug("PHOQUE") + log.info("Logger initialized") + log.debug("PHOQUE") if DRY_RUN: - pgloader.log.info("dry run mode, not connecting to database") + log.info("dry run mode, not connecting to database") return config, None try: @@ -259,7 +266,7 @@ def parse_config(conffile): pgloader.options.REFORMAT_PATH = rpath except Exception, error: - pgloader.log.error("Could not initialize PostgreSQL connection:") + log.error("Could not initialize PostgreSQL connection:") print error sys.exit(6) @@ -383,6 +390,7 @@ def load_data(): # now init db connection config, dbconn = parse_config(conffile) + from pgloader.logger import log from pgloader.tools import read_path, check_path from pgloader.options import VERBOSE import pgloader.options @@ -405,7 +413,7 @@ def load_data(): else: pgloader.options.REFORMAT_PATH = rpath - pgloader.log.info('Reformat path is %s', pgloader.options.REFORMAT_PATH) + log.info('Reformat path is %s', pgloader.options.REFORMAT_PATH) # load some pgloader package modules from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY @@ -427,9 +435,9 @@ def load_data(): if s != 'pgsql': sections.append(s) - pgloader.log.info('Will consider following sections:') + log.info('Will consider following sections:') for line in myprint(sections): - pgloader.log.info(line) + log.info(line) # we count time passed from now on begin = time.time() @@ -444,24 +452,24 @@ def load_data(): loader.run() summary[s] = (loader.table,) + loader.summary() else: - pgloader.log.info("Skipping section %s, which is a template" \ + log.info("Skipping section %s, which is a template" \ % s) except PGLoader_Error, e: if e == '': - pgloader.log.error('[%s] Please correct previous errors' % s) + log.error('[%s] Please correct previous errors' % s) else: - pgloader.log.error('%s' % e) + log.error('%s' % e) if PEDANTIC: pgloader.print_stats() except UnicodeDecodeError, e: - pgloader.log.error("can't open '%s' with given input encoding '%s'" \ + log.error("can't open '%s' with given input encoding '%s'" \ % (loader.filename, loader.input_encoding)) except KeyboardInterrupt: - pgloader.log.warning("Aborting on user demand (Interrupt)") + log.warning("Aborting on user demand (Interrupt)") # total duration td = time.time() - begin @@ -472,10 +480,10 @@ def load_data(): retcode = print_summary(dbconn, sections, summary, td) print except PGLoader_Error, e: - pgloader.log.error("Can't print summary: %s" % e) + log.error("Can't print summary: %s" % e) if VACUUM and not DRY_RUN: - pgloader.log.info('vacuumdb... ') + log.info('vacuumdb... ') try: dbconn.vacuum() except KeyboardInterrupt: diff --git a/pgloader/db.py b/pgloader/db.py index da01fa4..94f624d 100644 --- a/pgloader/db.py +++ b/pgloader/db.py @@ -12,12 +12,12 @@ from options import INPUT_ENCODING, PG_CLIENT_ENCODING, DATESTYLE from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING from tools import PGLoader_Error +from logger import log try: import psycopg2.psycopg1 as psycopg except ImportError: - if VERBOSE: - print 'No psycopg2 module found, trying psycopg1' + log.info('No psycopg2 module found, trying psycopg1') import psycopg class db: @@ -27,6 +27,7 @@ class db: client_encoding = PG_CLIENT_ENCODING, copy_every = 10000, commit_every = 1000, connect = True): """ Connects to the specified database """ + self.log = log self.dbconn = None self.dsn = "host=%s port=%d user=%s dbname=%s password=%s" \ % (host, port, user, base, passwd) @@ -61,11 +62,9 @@ class db: def set_encoding(self): """ set connection encoding to self.client_encoding """ - - if DEBUG: - # debug only cause reconnecting happens on every - # configured section - print 'Setting client encoding to %s' % self.client_encoding + # debug only cause reconnecting happens on every + # configured section + self.log.debug('Setting client encoding to %s', self.client_encoding) sql = 'set session client_encoding to %s' cursor = self.dbconn.cursor() @@ -81,10 +80,9 @@ class db: if self.datestyle is None: return - if DEBUG: - # debug only cause reconnecting happens on every - # configured section - print 'Setting datestyle to %s' % self.datestyle + # debug only cause reconnecting happens on every + # configured section + self.log.debug('Setting datestyle to %s', self.datestyle) sql = 'set session datestyle to %s' cursor = self.dbconn.cursor() @@ -103,13 +101,11 @@ class db: self.first_commit_time = self.last_commit_time self.partial_coldef = None - if DEBUG: - if self.dbconn is not None: - print 'Debug: closing current connection' - self.dbconn.close() + if self.dbconn is not None: + self.log.debug('Debug: closing current connection') + self.dbconn.close() - if DEBUG: - print 'Debug: connecting to dns %s' % self.dsn + self.log.debug('Debug: connecting to dns %s', self.dsn) self.dbconn = psycopg.connect(self.dsn) self.set_encoding() @@ -120,15 +116,15 @@ class db: d = time.time() - self.first_commit_time u = self.commited_rows c = self.commits - print " %d updates in %d commits took %5.3f seconds" % (u, c, d) + self.log.info(" %d updates in %d commits took %5.3f seconds", u, c, d) if self.errors > 0: - print " %d database errors occured" % self.errors + self.log.error("%d database errors occured", self.errors) if self.copy and not VACUUM: - print " Please do VACUUM your database to recover space" + self.log.info("Please VACUUM your database to recover space") else: if u > 0: - print " No database error occured" + self.log.info("No database error occured") return def is_null(self, value): @@ -142,43 +138,36 @@ class db: def truncate(self, table): """ issue an SQL TRUNCATE TABLE on given table """ if DRY_RUN: - if VERBOSE: - print "Notice: won't truncate tables on dry-run mode" + self.log.info("Won't truncate tables on dry-run mode") return sql = "TRUNCATE TABLE %s;" % table - if VERBOSE: - print 'Notice: %s' % sql + self.log.info('%s' % sql) try: cursor = self.dbconn.cursor() cursor.execute(sql) self.dbconn.commit() except Exception, error: - if VERBOSE: - print error + self.log.error(error) raise PGLoader_Error, "Couldn't truncate table %s" % table def vacuum(self): """ issue an vacuumdb -fvz database """ if DRY_RUN: - if VERBOSE: - print - print 'Notice: no vacuum in dry-run mode' + self.log.info('no vacuum in dry-run mode') return -1 command = "/usr/bin/vacuumdb %s -fvz %s 2>&1" \ % (self.connect, self.base) - if VERBOSE: - print command + self.log.info(command) out = os.popen(command) for line in out.readlines(): - if DEBUG: - # don't print \n - print line[:-1] + # don't print \n + self.log.debug(line[:-1]) return out.close() @@ -213,8 +202,7 @@ class db: values.append(rowids[name]) sql += ";" - if DEBUG: - print 'Debug: %s' % sql + self.log.debug('%s' % sql) try: cursor = self.dbconn.cursor() @@ -229,8 +217,8 @@ class db: for i,v in rowids.items(): if str_rowids != "": str_rowids += ", " str_rowids += "%s:%s" % (i, v) - print '%s %s %s %6do' \ - % (table, str_rowids, blob_cname, len(data)) + self.log.debug('%s %s %s %6do', + table, str_rowids, blob_cname, len(data)) if self.running_commands == self.commit_every: now = time.time() @@ -240,9 +228,8 @@ class db: duration = now - self.last_commit_time self.last_commit_time = now - if not QUIET: - print "-- commit %d: %d updates in %5.3fs --" \ - % (self.commits, self.running_commands, duration) + self.log.info("commit %d: %d updates in %5.3fs", + self.commits, self.running_commands, duration) self.commited_rows += self.running_commands self.running_commands = 1 @@ -256,9 +243,9 @@ class db: self.dbconn.commit() # don't use self.commited_rows here, it's only updated # after a commit - print "Error: update %d rejected: commiting (read log file %s)" \ - % (self.commits * self.commit_every + self.running_commands, - reject.reject_log) + self.log.error("update %d rejected: commiting (read log file %s)", + self.commits * self.commit_every + self.running_commands, + reject.reject_log) reject.log(str(e), input_line) self.errors += 1 @@ -273,10 +260,7 @@ class db: suffix='.pgloader', dir='/tmp') os.write(f, self.buffer.getvalue()) os.close(f) - - # systematicaly write about this - if not QUIET: - print " -- COPY data buffer saved in %s --" % n + self.log.warning("COPY data buffer saved in %s" % n) return n def copy_from(self, table, columnlist, columns, input_line, @@ -296,8 +280,7 @@ class db: # time to copy data to PostgreSQL table if self.buffer is None: - if VERBOSE: - print "Error: no data to COPY" + self.log.warning("no data to COPY") return False if DEBUG: @@ -315,9 +298,8 @@ class db: duration = now - self.last_commit_time self.last_commit_time = now - if not QUIET: - print " - COPY %d: %d rows copied in %5.3fs --" \ - % (self.commits, self.running_commands, duration) + self.log.info("COPY %d: %d rows copied in %5.3fs", + self.commits, self.running_commands, duration) # prepare next run self.buffer.close() @@ -329,12 +311,11 @@ class db: # rollback current transaction self.dbconn.rollback() - if VERBOSE: - print 'Notice: COPY error, trying to find on which line' - if not DEBUG: - # in DEBUG mode, copy buffer has already been saved - # to file - self.save_copy_buffer(tablename) + self.log.warning('COPY error, trying to find on which line') + if not DEBUG: + # in DEBUG mode, copy buffer has already been saved + # to file + self.save_copy_buffer(tablename) # copy recovery process now = time.time() @@ -347,9 +328,8 @@ class db: self.commited_rows += ok self.errors += ko - if VERBOSE: - print 'Notice: COPY error recovery done (%d/%d) in %5.3fs'\ - % (ko, ok, duration) + self.log.warning('COPY error recovery done (%d/%d) in %5.3fs', + ko, ok, duration) # commit this transaction self.dbconn.commit() @@ -379,8 +359,7 @@ class db: if count == 1: reject.log('COPY error on this line', buff.getvalue()) buff.close() - if DEBUG: - print '--- Notice: found one more line in error' + self.log.debug('found one more line in error') # returns commits, ok, ko return 0, 0, 1 @@ -407,9 +386,8 @@ class db: # we don't need no more orgininal buff buff.close() - if DEBUG: - print '--- Trying to find errors, dividing %d lines in %d and %d' \ - % (count, m, n-m) + self.log.debug('Trying to find errors, dividing %d lines in %d and %d', + count, m, n-m) # now we have two buffers to copy to PostgreSQL database cursor = self.dbconn.cursor() @@ -420,9 +398,8 @@ class db: self.dbconn.commit() x.close() - if DEBUG: - print " -- COPY ERROR handling progress: %d rows copied"\ - % (xcount) + self.log.debug("COPY ERROR handling progress: %d rows copied", + xcount) x.close() commits += 1 @@ -435,9 +412,8 @@ class db: if xcount == 1: ko += 1 reject.log('COPY error: %s' % error, x.getvalue()) - if DEBUG: - print '--- Notice: found one more line in error' - print x.getvalue() + self.log.debug('Notice: found one more line in error') + self.log.debug(x.getvalue()) else: _c, _o, _k = self.copy_from_buff(table, x, xcount, reject) @@ -464,8 +440,7 @@ class db: elif self.is_empty(c): # empty string has been read - if DEBUG: - print "empty string read: '%s'" % c + self.log.debug("empty string read: '%s'" % c) self.buffer.write('') else: diff --git a/pgloader/lo.py b/pgloader/lo.py index 7422278..5ea140d 100644 --- a/pgloader/lo.py +++ b/pgloader/lo.py @@ -38,8 +38,9 @@ class ifx_lo: class ifx_clob(ifx_lo): """ Informix Text Large Object file """ - def __init__(self, filename, input_encoding): + def __init__(self, log, filename, input_encoding): """ init a clob object """ + self.log = log self.file = None self.filename = filename @@ -51,14 +52,14 @@ class ifx_clob(ifx_lo): else: self.file = open(self.filename, 'r') - if VERBOSE: - print "Notice: Opening informix clob file:", self.filename + self.log.info("Opening informix clob file: %s", self.filename) class ifx_blob(ifx_lo): """ Informix Binary Large Object file """ - def __init__(self, filename, field_sep): + def __init__(self, log, filename, field_sep): """ init a clob object """ + self.log = log self.file = None self.filename = filename self.field_sep = field_sep # used by bytea_escape @@ -69,8 +70,7 @@ class ifx_blob(ifx_lo): if self.file is None: self.file = open(self.filename, 'rb') - if VERBOSE: - print "Notice: Opening informix blob file:", self.filename + self.log.info("Opening informix blob file: %s", self.filename) def bytea_escape(self, bitstring): """ escape chars from bitstring for PostgreSQL bytea input diff --git a/pgloader/logger.py b/pgloader/logger.py index b7207fc..6f5eea7 100644 --- a/pgloader/logger.py +++ b/pgloader/logger.py @@ -22,12 +22,16 @@ def init(client_min_messages = logging.INFO, console = logging.StreamHandler() console.setLevel(client_min_messages) - formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') + formatter = logging.Formatter('%(name)-12s %(levelname)-8s %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) return logging.getLogger('pgloader') +def getLogger(name): + """ return a new logger instance named name, properly configured """ + return logging.getLogger(name) + def level(name): """ return a logging level from user string """ @@ -38,13 +42,13 @@ def level(name): return logging.INFO elif name.upper() == 'WARNING': - return logging.INFO + return logging.WARNING elif name.upper() == 'ERROR': - return logging.INFO + return logging.ERROR elif name.upper() == 'CRITICAL': - return logging.INFO + return logging.CRITICAL else: return logging.NOTSET diff --git a/pgloader/pgloader.py b/pgloader/pgloader.py index 465da89..f498ddf 100644 --- a/pgloader/pgloader.py +++ b/pgloader/pgloader.py @@ -8,6 +8,7 @@ import os, sys, os.path, time, codecs from cStringIO import StringIO +from logger import log, getLogger from tools import PGLoader_Error, Reject, parse_config_string from db import db from lo import ifx_clob, ifx_blob @@ -30,8 +31,9 @@ class PGLoader: def __init__(self, name, config, db): """ Init with a configuration section """ # Some settings - self.name = name - self.db = db + self.name = name + self.db = db + self.log = getLogger(name) self.template = None self.use_template = None @@ -51,13 +53,10 @@ class PGLoader: if config.has_option(name, 'template'): self.template = True - - # just skip it here - if VERBOSE: - print "[%s] is a template" % self.name + self.log.info("[%s] is a template", self.name) - if not self.template and VERBOSE: - print "[%s] parse configuration" % self.name + if not self.template: + self.log.info("[%s] parse configuration", self.name) if not self.template: # check if the section wants to use a template @@ -71,43 +70,43 @@ class PGLoader: raise PGLoader_Error, m # first load template configuration - if VERBOSE: - print "Reading configuration from template section [%s]" \ - % self.template + self.log.info("Reading configuration from template " +\ + "section [%s]", self.template) + + self.real_log = self.log + self.log = getLogger(self.template) try: self.__read_conf__(self.template, config, db, want_template = True) except PGLoader_Error, e: - print - print e + self.log.error(e) m = "%s.use_template does not refer to a template section"\ % name raise PGLoader_Error, m # reinit self.template now its relative config section is read self.template = None + self.log = self.real_log # now load specific configuration - if VERBOSE: - print "Reading configuration from section [%s]" % name + self.log.info("Reading configuration from section [%s]", name) self.__read_conf__(name, config, db) # force reinit of self.reader, which depends on template and # specific options if 'reader' in self.__dict__: - self.reader.__init__(self.db, self.reject, + self.reader.__init__(self.log, self.db, self.reject, self.filename, self.input_encoding, self.table, self.columns) # Now reset database connection if not DRY_RUN: + self.db.log = self.log self.db.reset() - if DEBUG: - print '%s init done' % name - print + self.log.debug('%s init done' % name) def __read_conf__(self, name, config, db, want_template = False): """ init self from config section name """ @@ -132,11 +131,10 @@ class PGLoader: # reject logging if not self.template: - self.reject = Reject(self.reject_log, self.reject_data) + self.reject = Reject(self.log, self.reject_log, self.reject_data) - if VERBOSE: - print 'Notice: reject log in %s' % self.reject.reject_log - print 'Notice: rejected data in %s' % self.reject.reject_data + self.log.info('reject log in %s', self.reject.reject_log) + self.log.info('rejected data in %s', self.reject.reject_data) else: # needed to instanciate self.reader while in template section @@ -147,8 +145,8 @@ class PGLoader: self.db.client_encoding = parse_config_string( config.get(name, 'client_encoding')) - if DEBUG and not DRY_RUN: - print "client_encoding: '%s'" % self.db.client_encoding + if not DRY_RUN: + self.log.debug("client_encoding: '%s'", self.db.client_encoding) # optionnal local option input_encoding self.input_encoding = INPUT_ENCODING @@ -156,28 +154,25 @@ class PGLoader: self.input_encoding = parse_config_string( config.get(name, 'input_encoding')) - if DEBUG: - print "input_encoding: '%s'" % self.input_encoding + self.log.debug("input_encoding: '%s'", self.input_encoding) # optionnal local option datestyle if not DRY_RUN and config.has_option(name, 'datestyle'): self.db.datestyle = parse_config_string( config.get(name, 'datestyle')) - if DEBUG and not DRY_RUN: - print "datestyle: '%s'" % self.db.datestyle - + if not DRY_RUN: + self.log.debug("datestyle: '%s'", self.db.datestyle) ## # data filename for opt in ('table', 'filename'): if config.has_option(name, opt): - if DEBUG: - print '%s.%s: %s' % (name, opt, config.get(name, opt)) + self.log.debug('%s.%s: %s', name, opt, config.get(name, opt)) self.__dict__[opt] = config.get(name, opt) else: - if not self.template and opt not in self.__dict__: - print 'Error: please configure %s.%s' % (name, opt) + if not self.template and not self.__dict__[opt]: + self.log.error('Error: please configure %s.%s', name, opt) self.config_errors += 1 else: # Reading Configuration Template section @@ -198,14 +193,13 @@ class PGLoader: config.get(name, 'blob_columns'), btype = True) - if DEBUG: - print 'index', self.index - print 'columns', self.columns - print 'blob_columns', self.blob_cols + self.log.debug('index %s', str(self.index)) + self.log.debug('columns %s', str(self.columns)) + self.log.debug('blob_columns %s', str(self.blob_cols)) if self.columns is None: if not self.template: - print 'Error: %s has no columns defined' % name + self.log.error('%s has no columns defined', name) self.config_errors += 1 else: @@ -231,8 +225,7 @@ class PGLoader: else: self.udcs = None - if DEBUG: - print 'udcs:', self.udcs + self.log.debug('udcs: %s', str(self.udcs)) # better check there's no user defined column overriding file # columns @@ -245,9 +238,9 @@ class PGLoader: if errs: for c in errs: - print 'Error: %s is configured both as a ' % c +\ - '%s.columns entry and as a user-defined column' \ - % name + self.log.error('%s is configured both as a %s.columns ' +\ + 'entry and as a user-defined column', + c, name) self.config_errors += 1 @@ -271,9 +264,9 @@ class PGLoader: # just add all the given columns in this pass self.copy_columns.append(x) - if DEBUG: - print 'config copy_columns', self.config_copy_columns - print 'copy_columns', self.copy_columns + self.log.debug('config copy_columns %s', + str(self.config_copy_columns)) + self.log.debug('copy_columns %s', str(self.copy_columns)) if not self.template: # check for errors time! @@ -283,20 +276,20 @@ class PGLoader: for x in self.copy_columns: if x not in namelist: - print 'Error: "%s" not in %s column list, ' \ - % (x, name) +\ - 'including user defined columns' + self.log.error('"%s" not in %s column list, ' + \ + 'including user defined columns', + x, name) self.config_errors += 1 if len(self.copy_columns) != len(self.config_copy_columns): - print 'Error: %s.copy_columns refers to ' % name +\ - 'unconfigured columns ' + self.log.error('%s.copy_columns refers to ' +\ + 'unconfigured columns', name) self.config_errors += 1 elif self.udcs: - print 'Error: section %s does not define ' % name +\ - 'copy_columns but uses user-defined columns' + self.log.error('section %s does not define copy_columns ' +\ + 'but uses user-defined columns', name) self.config_errors += 1 @@ -306,10 +299,9 @@ class PGLoader: if 'copy_columns' in self.__dict__ and self.copy_columns: self.columnlist = self.copy_columns - if DEBUG: - print 'udcs:', self.udcs - if self.udcs and 'copy_columns' in self.__dict__: - print 'copy_columns', self.copy_columns + self.log.debug('udcs: %s', str(self.udcs)) + if self.udcs and 'copy_columns' in self.__dict__: + self.log.debug('copy_columns %s', str(self.copy_columns)) ## # We have for example columns = col1:2, col2:1 @@ -336,8 +328,8 @@ class PGLoader: if config.has_option(name, 'only_cols'): if self.udcs: - print 'Error: section %s defines both ' % name +\ - 'user-defined columns and only_cols' + self.log.error('section %s defines both ' +\ + 'user-defined columns and only_cols', name) self.config_errors += 1 @@ -365,8 +357,8 @@ class PGLoader: self.columnlist = [self.columns[x-1][0] for x in expanded] except Exception, e: - print 'Error: section %s, only_cols: ' % name +\ - 'configured range is invalid' + self.log.error('section %s, only_cols: ' +\ + 'configured range is invalid', name) raise PGLoader_Error, e if self.only_cols is None: @@ -374,9 +366,8 @@ class PGLoader: # default case, no user-defined cols, no restriction self.columnlist = [n for (n, pos) in self.columns] - if DEBUG: - print "only_cols", self.only_cols - print "columnlist", self.columnlist + self.log.debug("only_cols %s", str(self.only_cols)) + self.log.debug("columnlist %s", str(self.columnlist)) ## # This option is textreader specific, but being lazy and @@ -389,10 +380,9 @@ class PGLoader: if NEWLINE_ESCAPES is not None: # this parameter is globally set, will ignore local # definition - if not QUIET: - print "Warning: ignoring %s newline_escapes option" % name - print " option is set to '%s' globally" \ - % NEWLINE_ESCAPES + self.log.warning("ignoring %s newline_escapes option" +\ + "option is set to '%s' globally", + name, NEWLINE_ESCAPES) else: self._parse_fields('newline_escapes', config.get(name, 'newline_escapes'), @@ -408,26 +398,25 @@ class PGLoader: if self.format.lower() == 'csv': from csvreader import CSVReader - self.reader = CSVReader(self.db, self.reject, + self.reader = CSVReader(self.log, self.db, self.reject, self.filename, self.input_encoding, self.table, self.columns) elif self.format.lower() == 'text': from textreader import TextReader - self.reader = TextReader(self.db, self.reject, + self.reader = TextReader(self.log, self.db, self.reject, self.filename, self.input_encoding, self.table, self.columns, self.newline_escapes) if 'reader' in self.__dict__: - if DEBUG: - print 'reader.readconfig()' + self.log.debug('reader.readconfig()') self.reader.readconfig(name, config) if not self.template and \ ('format' not in self.__dict__ or self.format is None): # error only when not loading the Template part - print 'Error: %s: format parameter needed' % name + self.log.Error('%s: format parameter needed', name) raise PGLoader_Error ## @@ -440,8 +429,7 @@ class PGLoader: if 'c_reformat' not in self.__dict__: self.c_reformat = self.reformat = None - if DEBUG: - print 'reformat', self.c_reformat + self.log.debug('reformat %s', str(self.c_reformat)) # check the configure reformating is available if not self.template and self.c_reformat: @@ -450,8 +438,8 @@ class PGLoader: for r_colname, r_module, r_function in self.c_reformat: if r_colname not in self.columnlist: - print 'Error: %s.reformat refers to unknown column %s' \ - % ( name, r_colname ) + self.log.error('%s.reformat refers to unknown column %s', + name, r_colname) self.config_errors += 1 # load the given module name and function @@ -460,17 +448,16 @@ class PGLoader: fp, pathname, description = \ imp.find_module(r_module, REFORMAT_PATH) - if DEBUG: - print 'Found %s at %s' % (r_module, pathname) + self.log.debug('Found %s at %s', r_module, str(pathname)) module = imp.load_module(r_module, fp, pathname, description) except ImportError, e: - print 'Error: %s failed to import reformat module "%s"' \ - % (name, r_module) - print ' from %s' % str(REFORMAT_PATH) - print ' %s' % e + self.log.error('%s failed to import reformat module "%s"', + name, r_module) + self.log.error('from %s', str(REFORMAT_PATH)) + self.log.error(e) self.config_errors += 1 @@ -479,12 +466,12 @@ class PGLoader: self.reformat.append((r_colname, module.__dict__[r_function])) else: - print 'Error: reformat module %s has no %s function'%\ - (r_module, r_function) + self.log.error('reformat module %s has no %s function', + r_module, r_function) self.config_errors += 1 - if DEBUG and not self.template: - print 'reformat', self.reformat + if not self.template: + self.log.debug('reformat %s', str(self.reformat)) ## # How can we mix those columns definitions ? @@ -530,7 +517,7 @@ class PGLoader: 'Error: unable to parse given key %s' % FROM_ID) raise PGLoader_Error - print 'Notice: composite key found, -I evaluated to %s' % FROM_ID + self.log.info('composite key found, -I evaluated to %s', FROM_ID) if self.config_errors > 0: mesg = 'Configuration errors for section %s' % self.name @@ -620,22 +607,18 @@ class PGLoader: """ depending on configuration, do given job """ # Announce the beginning of the work - if not QUIET: - print - print "[%s]" % self.name + self.log.info("[%s]" % self.name) if TRUNCATE and not DRY_RUN: self.db.truncate(self.table) if self.columns is not None: - if not QUIET: - print "Notice: COPY csv data" + self.log.info("COPY csv data") self.data_import() elif self.blob_cols is not None: # elif: COPY process also blob data - if not QUIET: - print "Notice: UPDATE blob data" + self.log.info("UPDATE blob data") # then show up some stats self.print_stats() @@ -666,10 +649,9 @@ class PGLoader: else: data.append(columns[cpos-1]) - if DEBUG: - print 'reformat' - print 'columns', columns - print 'data ', data + self.log.debug('reformat') + self.log.debug('columns %s', str(columns)) + self.log.debug('data %s', str(data)) # we want next steps to take reformated data as input columns = data @@ -683,23 +665,20 @@ class PGLoader: else: data.append(dudcs[c]) - if DEBUG: - print 'udcs' - print 'columns', columns - print 'data ', data - + self.log.debug('udcs') + self.log.debug('columns %s', str(columns)) + self.log.debug('data %s', str(data)) + columns = data else: if self.col_mapping: - if DEBUG: - print 'col_mapping', self.col_mapping + self.log.debug('col_mapping %s', str(self.col_mapping)) data = [columns[i-1] for i in self.col_mapping] - if DEBUG: - print 'columns', columns - print 'data ', data + self.log.debug('columns %s', str(columns)) + self.log.debug('data %s', str(data)) columns = data @@ -712,11 +691,14 @@ class PGLoader: and not self.only_cols: data = columns - if DRY_RUN or DEBUG: - print '<', line - print ' ', self.columnlist - print '>', data - print + if DRY_RUN: + self.log.info('< %s', line) + self.log.info(' %s', str(self.columnlist)) + self.log.info('> %s', str(data)) + else: + self.log.debug('< %s', line) + self.log.debug(' %s', str(self.columnlist)) + self.log.debug('> %s', str(data)) if not DRY_RUN: self.db.copy_from(self.table, self.columnlist, @@ -742,10 +724,8 @@ class PGLoader: columns, rowids = self.read_blob(line, columns) # and insert it into database - if DRY_RUN or DEBUG: - print line - print columns - print + self.log.debug(line) + self.log.debug(str(line)) if not DRY_RUN: self.db.insert_blob(self.table, self.index, @@ -810,12 +790,14 @@ class PGLoader: raise PGLoader_Error, msg elif btype == 'ifx_blob': - self.blobs[abs_blobname] = ifx_blob(abs_blobname, + self.blobs[abs_blobname] = ifx_blob(self.log, + abs_blobname, self.field_sep) elif btype == 'ifx_clob': self.blobs[abs_blobname] = \ - ifx_clob(abs_blobname, + ifx_clob(self.log, + abs_blobname, self.input_encoding) blob = self.blobs[abs_blobname] @@ -828,11 +810,9 @@ class PGLoader: data = blob.extract(rowids, c, begin, length) columns[blob_col] = data - if DEBUG: - print 'Debug: read blob data' - print rowids - print data - print + self.log.debug('read blob data') + self.log.debug(str(rowids)) + self.log.debug(str(data)) # no we have read all defined blob data, returns new columns return columns, rowids diff --git a/pgloader/reader.py b/pgloader/reader.py index 3f4d255..1c70d2f 100644 --- a/pgloader/reader.py +++ b/pgloader/reader.py @@ -20,11 +20,12 @@ class DataReader: - multi-line support is explicit (via """ - def __init__(self, db, reject, filename, input_encoding, table, columns): + def __init__(self, log, db, reject, + filename, input_encoding, table, columns): """ init internal variables """ - if DEBUG: - print 'reader __init__', filename, table, columns - + log.debug('reader __init__ %s %s %s', filename, table, columns) + + self.log = log self.db = db self.filename = filename self.input_encoding = input_encoding @@ -52,13 +53,15 @@ class DataReader: if config.has_option(name, 'null'): self.db.null = parse_config_string(config.get(name, 'null')) else: - self.db.null = NULL + if 'null' not in self.__dict__: + self.db.null = NULL if config.has_option(name, 'empty_string'): self.db.empty_string = parse_config_string( config.get(name, 'empty_string')) else: - self.db.empty_string = EMPTY_STRING + if 'empty_string' not in self.__dict__: + self.db.empty_string = EMPTY_STRING # optionnal field separator, could be defined from template if 'field_sep' not in self.__dict__: @@ -71,11 +74,11 @@ class DataReader: if self.db.copy_sep is None: self.db.copy_sep = self.field_sep - if DEBUG and not DRY_RUN: - print "reader.readconfig null: '%s'" % self.db.null - print "reader.readconfig empty_string: '%s'" \ - % self.db.empty_string - print "reader.readconfig field_sep: '%s'" % self.field_sep + if not DRY_RUN: + self.log.debug("reader.readconfig null: '%s'" % self.db.null) + self.log.debug("reader.readconfig empty_string: '%s'", + self.db.empty_string) + self.log.debug("reader.readconfig field_sep: '%s'", self.field_sep) def readlines(self): """ read data from configured file, and generate (yields) for diff --git a/pgloader/textreader.py b/pgloader/textreader.py index bc585ad..830ecf8 100644 --- a/pgloader/textreader.py +++ b/pgloader/textreader.py @@ -30,10 +30,10 @@ class TextReader(DataReader): - ... """ - def __init__(self, db, reject, filename, input_encoding, + def __init__(self, log, db, reject, filename, input_encoding, table, columns, newline_escapes = None): """ init textreader with a newline_escapes parameter """ - DataReader.__init__(self, db, reject, + DataReader.__init__(self, log, db, reject, filename, input_encoding, table, columns) if 'newline_escapes' not in self.__dict__: @@ -61,9 +61,8 @@ class TextReader(DataReader): if config.has_option(name, 'trailing_sep'): self.trailing_sep = config.get(name, 'trailing_sep') == 'True' - if DEBUG: - print 'reader.readconfig: field_count', self.field_count - print 'reader.readconfig: trailing_sep', self.trailing_sep + self.log.debug('reader.readconfig: field_count %s', self.field_count) + self.log.debug('reader.readconfig: trailing_sep %s', self.trailing_sep) def readlines(self): """ read data from configured file, and generate (yields) for @@ -79,8 +78,7 @@ class TextReader(DataReader): ## # if neither -I nor -F was used, we can state that begin = 0 if FROM_ID is None and FROM_COUNT == 0: - if VERBOSE: - print 'Notice: beginning on first line' + self.log.info('beginning on first line') begin_linenb = 1 if self.input_encoding is not None: diff --git a/pgloader/tools.py b/pgloader/tools.py index fc7140e..8f18b41 100644 --- a/pgloader/tools.py +++ b/pgloader/tools.py @@ -18,8 +18,9 @@ class Reject: reject_data contains input lines which this tool couldn't manage """ - def __init__(self, reject_log, reject_data): + def __init__(self, log, reject_log, reject_data): """ Constructor, with file names """ + self._log = log self.reject_log = reject_log self.reject_data = reject_data @@ -32,12 +33,13 @@ class Reject: return if self.errors == 0: - if not quiet: - print " No data were rejected" + self._log.info("No data were rejected") else: - print " %d errors found into [%s] data" % (self.errors, name) - print " please read %s for errors log" % self.reject_log - print " and %s for data still to process" % self.reject_data + self._log.error("%d errors found into [%s] data", + self.errors, name) + self._log.error("please read %s for errors log", self.reject_log) + self._log.error("and %s for data still to process", + self.reject_data) def log(self, messages, data = None): """ log the messages into reject_log, and the data into reject_data