From a530dedd66cfa02e4bc5b98654e926b925fe77c7 Mon Sep 17 00:00:00 2001 From: dim Date: Fri, 7 Dec 2007 21:13:35 +0000 Subject: [PATCH] * Rework DatabaseError and consider dichotomy in this case too. * New options lc_messages and log_file --- debian/changelog | 6 +++-- examples/pgloader.conf | 2 ++ pgloader.1.txt | 32 +++++++++++++++++++----- pgloader.py | 57 ++++++++++++++++++++++++++++++++++++------ pgloader/db.py | 29 +++++++++++++++------ pgloader/logger.py | 14 +++++++---- pgloader/options.py | 3 +++ pgloader/tools.py | 31 +++++++++++++++++------ 8 files changed, 137 insertions(+), 37 deletions(-) diff --git a/debian/changelog b/debian/changelog index 612b500..21d2208 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,9 @@ -pgloader (2.2.5~dev.1-2) unstable; urgency=low +pgloader (2.2.5-1) unstable; urgency=low * Now using proper python logging module - * New client_min_messages and log_min_messages options + * New client_min_messages, log_min_messages, log_file and lc_messages options + * Better reporting of DatabaseError (pkey violation, e.g.) + * Have logging of error data work again in all cases (DatabaseError) -- Dimitri Fontaine Fri, 07 Dec 2007 14:48:08 +0100 diff --git a/examples/pgloader.conf b/examples/pgloader.conf index a4e936c..c8d50f5 100644 --- a/examples/pgloader.conf +++ b/examples/pgloader.conf @@ -5,8 +5,10 @@ base = pgloader user = dim pass = None +log_file = /tmp/pgloader.log log_min_messages = DEBUG client_min_messages = WARNING +lc_messages = C ;client_encoding = 'utf-8' client_encoding = 'latin1' diff --git a/pgloader.1.txt b/pgloader.1.txt index aade895..8b6047a 100644 --- a/pgloader.1.txt +++ b/pgloader.1.txt @@ -8,6 +8,7 @@ pgloader - Import CSV data and Large Object to PostgreSQL pgloader [--version] [-c configuration file] [-p pedantic] [-d debug] [-v verbose] [-q quiet] [-s summary] + [-l loglevel] [-L logfile] [-n dryrun] [-Cn count] [-Fn from] [-In from id] [-E input files encoding] [-R reformat:path] [Section1 Section2] @@ -77,6 +78,10 @@ refers to a PostgreSQL table into which some data is to be loaded. log level to use when reporting to the console, see +client_min_messages+. +-L, --logfile:: + + file where to log messages, see +log_min_messages+. + -s, --summary:: makes pgloader print a 'nice' summary at the end of operations. @@ -280,15 +285,30 @@ log_min_messages:: defaults to +/tmp/pgloader.log+. See +client_min_messages+ for available levels. +log_file:: + + Relative or absolute path to the +log_file+ where to log messages + of level of at least +log_min_messages+ level. The 'dirname' of + the given +log_file+, if it doesn't exists, will be created by + +pgloader+. If any error prevents +pgloader+ to use the + +log_file+, it will default to using +/tmp/pgloader.log+ and say + so. + +lc_messages:: + + The PostgreSQL session will use this +LC_MESSAGES+ setting if + given, defaults to server configuration by not issuing anything + with respect to this setting when not set. + == COMMON FORMAT CONFIGURATION PARAMETERS == -You then can define any number of data section, and give them an arbitrary -name. Some options are required, some are actually optionnals, in which case it -is said so thereafter. +You then can define any number of data section, and give them an +arbitrary name. Some options are required, some are actually optional, +in which case it is said so thereafter. -First, we'll go through common parameters, applicable whichever format of data -you're refering to. Then text-format only parameters will be presented, -followed by csv-only parameters. +First, we'll go through common parameters, applicable whichever format +of data you're referring to. Then text-format only parameters will be +presented, followed by csv-only parameters. template:: + diff --git a/pgloader.py b/pgloader.py index 95bba24..4268adb 100644 --- a/pgloader.py +++ b/pgloader.py @@ -11,6 +11,7 @@ from cStringIO import StringIO import pgloader.options import pgloader.tools import pgloader.logger +from pgloader.tools import PGLoader_Error def parse_options(): """ Parse given options """ @@ -53,6 +54,10 @@ def parse_options(): default = None, help = "loglevel to use: ERROR, WARNING, INFO, DEBUG") + parser.add_option("-L", "--logfile", dest = "logfile", + default = "/tmp/pgloader.log", + help = "log file, defauts to /tmp/pgloader.log") + parser.add_option("-s", "--summary", action = "store_true", dest = "summary", default = False, @@ -99,7 +104,7 @@ def parse_options(): print "PGLoader version %s" % pgloader.options.PGLOADER_VERSION sys.exit(0) - # check existence en read ability of config file + # check existence and read ability of config file if not os.path.exists(opts.config): print >>sys.stderr, \ "Error: Configuration file %s does not exists" % opts.config @@ -145,6 +150,8 @@ def parse_options(): if opts.reformat_path: pgloader.options.REFORMAT_PATH = opts.reformat_path + pgloader.options.LOG_FILE = opts.logfile + import logging if opts.loglevel: loglevel = pgloader.logger.level(opts.loglevel) @@ -180,7 +187,8 @@ def parse_config(conffile): # this has to be done after command line parsing from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC from pgloader.options import NULL, EMPTY_STRING - from pgloader.options import CLIENT_MIN_MESSAGES + from pgloader.options import CLIENT_MIN_MESSAGES, LOG_FILE + from pgloader.tools import check_dirname # first read the logging configuration if not CLIENT_MIN_MESSAGES: @@ -196,14 +204,42 @@ def parse_config(conffile): pgloader.options.LOG_MIN_MESSAGES = pgloader.logger.level(lmm) else: pgloader.options.LOG_MIN_MESSAGES = NOTICE - - log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, - pgloader.options.LOG_MIN_MESSAGES, - '/tmp/pgloader.log') + + if config.has_option(section, 'log_file'): + # don't overload the command line -L option if given + if not pgloader.options.LOG_FILE: + pgloader.options.LOG_FILE = config.get(section, 'log_file') + + if pgloader.options.LOG_FILE: + ok, logdir_mesg = check_dirname(pgloader.options.LOG_FILE) + if not ok: + # force default setting + pgloader.options.LOG_FILE = pgloader.options.DEFAULT_LOG_FILE + + try: + log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, + pgloader.options.LOG_MIN_MESSAGES, + pgloader.options.LOG_FILE) + except PGLoader_Error, e: + try: + log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, + pgloader.options.LOG_MIN_MESSAGES, + pgloader.options.DEFAULT_LOG_FILE) + + log.warning(e) + log.warning("Using default logfile %s", + pgloader.options.DEFAULT_LOG_FILE) + except PGLoader_Error, e: + print e + sys.exit(8) + pgloader.logger.log = log log.info("Logger initialized") - log.debug("PHOQUE") + if logdir_mesg: + log.error(logdir_mesg) + log.error("Default logfile %s has been used instead", + pgloader.options.LOG_FILE) if DRY_RUN: log.info("dry run mode, not connecting to database") @@ -224,6 +260,11 @@ def parse_config(conffile): config.get(section, 'client_encoding')) dbconn.client_encoding = client_encoding + if config.has_option(section, 'lc_messages'): + lc_messages = pgloader.tools.parse_config_string( + config.get(section, 'lc_messages')) + dbconn.lc_messages = lc_messages + if config.has_option(section, 'input_encoding'): input_encoding = pgloader.tools.parse_config_string( config.get(section, 'input_encoding')) @@ -311,7 +352,6 @@ def print_summary(dbconn, sections, summary, td): from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY from pgloader.options import DRY_RUN, PEDANTIC, VACUUM from pgloader.pgloader import PGLoader - from pgloader.tools import PGLoader_Error retcode = 0 @@ -393,6 +433,7 @@ def load_data(): from pgloader.logger import log from pgloader.tools import read_path, check_path from pgloader.options import VERBOSE + import pgloader.options if pgloader.options.REFORMAT_PATH: rpath = read_path(pgloader.options.REFORMAT_PATH, check = False) diff --git a/pgloader/db.py b/pgloader/db.py index c7111ae..ae9ff61 100644 --- a/pgloader/db.py +++ b/pgloader/db.py @@ -43,6 +43,7 @@ class db: self.datestyle = DATESTYLE self.null = NULL self.empty_string = EMPTY_STRING + self.lc_messages = None if connect: self.reset() @@ -89,6 +90,23 @@ class db: cursor.execute(sql, [self.datestyle]) cursor.close() + def set_lc_messages(self): + """ set lc_messages to self.lc_messages """ + if self.lc_messages is None: + return + + # debug only cause reconnecting happens on every + # configured section + self.log.debug('Setting lc_messages to %s', self.lc_messages) + + sql = 'set session lc_messages to %s' + cursor = self.dbconn.cursor() + try: + cursor.execute(sql, [self.lc_messages]) + except psycopg.ProgrammingError, e: + raise PGLoader_Error, e + cursor.close() + def reset(self): """ reset internal counters and open a new database connection """ self.buffer = None @@ -110,6 +128,7 @@ class db: self.dbconn = psycopg.connect(self.dsn) self.set_encoding() self.set_datestyle() + self.set_lc_messages() def print_stats(self): """ output some stats about recent activity """ @@ -307,7 +326,8 @@ class db: self.commited_rows += self.running_commands self.running_commands = 0 - except psycopg.ProgrammingError, error: + except (psycopg.ProgrammingError, + psycopg.DatabaseError), error: # rollback current transaction self.dbconn.rollback() @@ -338,13 +358,6 @@ class db: self.buffer = None self.running_commands = 0 - except psycopg.DatabaseError, error: - # non recoverable error - mesg = "\n".join(["Please check PostgreSQL logs", - "HINT: double check your client_encoding,"+ - " datestyle and copy_delimiter settings"]) - raise PGLoader_Error, mesg - # prepare next run if self.buffer is None: self.buffer = StringIO() diff --git a/pgloader/logger.py b/pgloader/logger.py index 6f5eea7..535e027 100644 --- a/pgloader/logger.py +++ b/pgloader/logger.py @@ -5,6 +5,7 @@ # standard error levels are used for code and configuration error messages # data error logging is managed by tools.Reject class +from tools import PGLoader_Error import logging def init(client_min_messages = logging.INFO, @@ -13,11 +14,14 @@ def init(client_min_messages = logging.INFO, fmt = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' - logging.basicConfig(level = log_min_messages, - format = fmt, - datefmt = '%d-%m-%Y %H:%M:%S', - filename = filename, - filemode = 'w') + try: + logging.basicConfig(level = log_min_messages, + format = fmt, + datefmt = '%d-%m-%Y %H:%M:%S', + filename = filename, + filemode = 'w') + except IOError, e: + raise PGLoader_Error, e console = logging.StreamHandler() console.setLevel(client_min_messages) diff --git a/pgloader/options.py b/pgloader/options.py index 4aa7288..8256e5a 100644 --- a/pgloader/options.py +++ b/pgloader/options.py @@ -37,3 +37,6 @@ DEFAULT_REFORMAT_PATH = ['/usr/share/python-support/pgloader/reformat'] CLIENT_MIN_MESSAGES = None LOG_MIN_MESSAGES = DEBUG +DEFAULT_LOG_FILE = "/tmp/pgloader.log" +LOG_FILE = None + diff --git a/pgloader/tools.py b/pgloader/tools.py index 304dba5..f13d06c 100644 --- a/pgloader/tools.py +++ b/pgloader/tools.py @@ -121,7 +121,7 @@ def parse_config_string(str): -def read_path(strpath, verbose = False, path = [], check = True): +def read_path(strpath, log, path = [], check = True): """ read a path configuration element, discarding non-existing entries """ import os.path @@ -129,11 +129,11 @@ def read_path(strpath, verbose = False, path = [], check = True): path.append(p) if check: - return check_path(path, verbose) + return check_path(path, log) else: return path -def check_path(path, verbose = False): +def check_path(path, log): """ removes non existant and non {directories, symlink} entries from path """ path_ok = [] @@ -144,11 +144,26 @@ def check_path(path, verbose = False): (os.path.islink(p) and os.path.isdir(os.path.realpath(p))): path_ok.append(p) else: - if verbose: - print "Warning: path entry '%s' " % p + \ - "is not a directory or does not link to a directory" + log.warning("path entry '%s' is not a directory " + \ + "or does not link to a directory", p) else: - if verbose: - print "Warning: path entry '%s' does not exists, ignored" % p + log.warning("path entry '%s' does not exists, ignored" % p) return path_ok + + +def check_dirname(path): + """ check if given path dirname exists, try to create it if if doesn't """ + + # try to create the log file and the directory where it lives + logdir = os.path.dirname(path) + if logdir and not os.path.exists(logdir): + # logdir is not empty (not CWD) and does not exists + try: + os.makedirs(logdir) + except (IOError, OSError), e: + return False, e + + return True, None + +