* Rework DatabaseError and consider dichotomy in this case too.

* New options lc_messages and log_file
This commit is contained in:
dim 2007-12-07 21:13:35 +00:00
parent aed566bb0d
commit a530dedd66
8 changed files with 137 additions and 37 deletions

6
debian/changelog vendored
View File

@ -1,7 +1,9 @@
pgloader (2.2.5~dev.1-2) unstable; urgency=low pgloader (2.2.5-1) unstable; urgency=low
* Now using proper python logging module * Now using proper python logging module
* New client_min_messages and log_min_messages options * New client_min_messages, log_min_messages, log_file and lc_messages options
* Better reporting of DatabaseError (pkey violation, e.g.)
* Have logging of error data work again in all cases (DatabaseError)
-- Dimitri Fontaine <dim@tapoueh.org> Fri, 07 Dec 2007 14:48:08 +0100 -- Dimitri Fontaine <dim@tapoueh.org> Fri, 07 Dec 2007 14:48:08 +0100

View File

@ -5,8 +5,10 @@ base = pgloader
user = dim user = dim
pass = None pass = None
log_file = /tmp/pgloader.log
log_min_messages = DEBUG log_min_messages = DEBUG
client_min_messages = WARNING client_min_messages = WARNING
lc_messages = C
;client_encoding = 'utf-8' ;client_encoding = 'utf-8'
client_encoding = 'latin1' client_encoding = 'latin1'

View File

@ -8,6 +8,7 @@ pgloader - Import CSV data and Large Object to PostgreSQL
pgloader [--version] [-c configuration file] pgloader [--version] [-c configuration file]
[-p pedantic] [-d debug] [-v verbose] [-q quiet] [-s summary] [-p pedantic] [-d debug] [-v verbose] [-q quiet] [-s summary]
[-l loglevel] [-L logfile]
[-n dryrun] [-Cn count] [-Fn from] [-In from id] [-n dryrun] [-Cn count] [-Fn from] [-In from id]
[-E input files encoding] [-R reformat:path] [-E input files encoding] [-R reformat:path]
[Section1 Section2] [Section1 Section2]
@ -77,6 +78,10 @@ refers to a PostgreSQL table into which some data is to be loaded.
log level to use when reporting to the console, see +client_min_messages+. log level to use when reporting to the console, see +client_min_messages+.
-L, --logfile::
file where to log messages, see +log_min_messages+.
-s, --summary:: -s, --summary::
makes pgloader print a 'nice' summary at the end of operations. makes pgloader print a 'nice' summary at the end of operations.
@ -280,15 +285,30 @@ log_min_messages::
defaults to +/tmp/pgloader.log+. See +client_min_messages+ for defaults to +/tmp/pgloader.log+. See +client_min_messages+ for
available levels. available levels.
log_file::
Relative or absolute path to the +log_file+ where to log messages
of level of at least +log_min_messages+ level. The 'dirname' of
the given +log_file+, if it doesn't exists, will be created by
+pgloader+. If any error prevents +pgloader+ to use the
+log_file+, it will default to using +/tmp/pgloader.log+ and say
so.
lc_messages::
The PostgreSQL session will use this +LC_MESSAGES+ setting if
given, defaults to server configuration by not issuing anything
with respect to this setting when not set.
== COMMON FORMAT CONFIGURATION PARAMETERS == == COMMON FORMAT CONFIGURATION PARAMETERS ==
You then can define any number of data section, and give them an arbitrary You then can define any number of data section, and give them an
name. Some options are required, some are actually optionnals, in which case it arbitrary name. Some options are required, some are actually optional,
is said so thereafter. in which case it is said so thereafter.
First, we'll go through common parameters, applicable whichever format of data First, we'll go through common parameters, applicable whichever format
you're refering to. Then text-format only parameters will be presented, of data you're referring to. Then text-format only parameters will be
followed by csv-only parameters. presented, followed by csv-only parameters.
template:: template::
+ +

View File

@ -11,6 +11,7 @@ from cStringIO import StringIO
import pgloader.options import pgloader.options
import pgloader.tools import pgloader.tools
import pgloader.logger import pgloader.logger
from pgloader.tools import PGLoader_Error
def parse_options(): def parse_options():
""" Parse given options """ """ Parse given options """
@ -53,6 +54,10 @@ def parse_options():
default = None, default = None,
help = "loglevel to use: ERROR, WARNING, INFO, DEBUG") help = "loglevel to use: ERROR, WARNING, INFO, DEBUG")
parser.add_option("-L", "--logfile", dest = "logfile",
default = "/tmp/pgloader.log",
help = "log file, defauts to /tmp/pgloader.log")
parser.add_option("-s", "--summary", action = "store_true", parser.add_option("-s", "--summary", action = "store_true",
dest = "summary", dest = "summary",
default = False, default = False,
@ -99,7 +104,7 @@ def parse_options():
print "PGLoader version %s" % pgloader.options.PGLOADER_VERSION print "PGLoader version %s" % pgloader.options.PGLOADER_VERSION
sys.exit(0) sys.exit(0)
# check existence en read ability of config file # check existence and read ability of config file
if not os.path.exists(opts.config): if not os.path.exists(opts.config):
print >>sys.stderr, \ print >>sys.stderr, \
"Error: Configuration file %s does not exists" % opts.config "Error: Configuration file %s does not exists" % opts.config
@ -145,6 +150,8 @@ def parse_options():
if opts.reformat_path: if opts.reformat_path:
pgloader.options.REFORMAT_PATH = opts.reformat_path pgloader.options.REFORMAT_PATH = opts.reformat_path
pgloader.options.LOG_FILE = opts.logfile
import logging import logging
if opts.loglevel: if opts.loglevel:
loglevel = pgloader.logger.level(opts.loglevel) loglevel = pgloader.logger.level(opts.loglevel)
@ -180,7 +187,8 @@ def parse_config(conffile):
# this has to be done after command line parsing # this has to be done after command line parsing
from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
from pgloader.options import NULL, EMPTY_STRING from pgloader.options import NULL, EMPTY_STRING
from pgloader.options import CLIENT_MIN_MESSAGES from pgloader.options import CLIENT_MIN_MESSAGES, LOG_FILE
from pgloader.tools import check_dirname
# first read the logging configuration # first read the logging configuration
if not CLIENT_MIN_MESSAGES: if not CLIENT_MIN_MESSAGES:
@ -197,13 +205,41 @@ def parse_config(conffile):
else: else:
pgloader.options.LOG_MIN_MESSAGES = NOTICE pgloader.options.LOG_MIN_MESSAGES = NOTICE
log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, if config.has_option(section, 'log_file'):
pgloader.options.LOG_MIN_MESSAGES, # don't overload the command line -L option if given
'/tmp/pgloader.log') if not pgloader.options.LOG_FILE:
pgloader.options.LOG_FILE = config.get(section, 'log_file')
if pgloader.options.LOG_FILE:
ok, logdir_mesg = check_dirname(pgloader.options.LOG_FILE)
if not ok:
# force default setting
pgloader.options.LOG_FILE = pgloader.options.DEFAULT_LOG_FILE
try:
log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES,
pgloader.options.LOG_MIN_MESSAGES,
pgloader.options.LOG_FILE)
except PGLoader_Error, e:
try:
log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES,
pgloader.options.LOG_MIN_MESSAGES,
pgloader.options.DEFAULT_LOG_FILE)
log.warning(e)
log.warning("Using default logfile %s",
pgloader.options.DEFAULT_LOG_FILE)
except PGLoader_Error, e:
print e
sys.exit(8)
pgloader.logger.log = log pgloader.logger.log = log
log.info("Logger initialized") log.info("Logger initialized")
log.debug("PHOQUE") if logdir_mesg:
log.error(logdir_mesg)
log.error("Default logfile %s has been used instead",
pgloader.options.LOG_FILE)
if DRY_RUN: if DRY_RUN:
log.info("dry run mode, not connecting to database") log.info("dry run mode, not connecting to database")
@ -224,6 +260,11 @@ def parse_config(conffile):
config.get(section, 'client_encoding')) config.get(section, 'client_encoding'))
dbconn.client_encoding = client_encoding dbconn.client_encoding = client_encoding
if config.has_option(section, 'lc_messages'):
lc_messages = pgloader.tools.parse_config_string(
config.get(section, 'lc_messages'))
dbconn.lc_messages = lc_messages
if config.has_option(section, 'input_encoding'): if config.has_option(section, 'input_encoding'):
input_encoding = pgloader.tools.parse_config_string( input_encoding = pgloader.tools.parse_config_string(
config.get(section, 'input_encoding')) config.get(section, 'input_encoding'))
@ -311,7 +352,6 @@ def print_summary(dbconn, sections, summary, td):
from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY
from pgloader.options import DRY_RUN, PEDANTIC, VACUUM from pgloader.options import DRY_RUN, PEDANTIC, VACUUM
from pgloader.pgloader import PGLoader from pgloader.pgloader import PGLoader
from pgloader.tools import PGLoader_Error
retcode = 0 retcode = 0
@ -393,6 +433,7 @@ def load_data():
from pgloader.logger import log from pgloader.logger import log
from pgloader.tools import read_path, check_path from pgloader.tools import read_path, check_path
from pgloader.options import VERBOSE from pgloader.options import VERBOSE
import pgloader.options import pgloader.options
if pgloader.options.REFORMAT_PATH: if pgloader.options.REFORMAT_PATH:
rpath = read_path(pgloader.options.REFORMAT_PATH, check = False) rpath = read_path(pgloader.options.REFORMAT_PATH, check = False)

View File

@ -43,6 +43,7 @@ class db:
self.datestyle = DATESTYLE self.datestyle = DATESTYLE
self.null = NULL self.null = NULL
self.empty_string = EMPTY_STRING self.empty_string = EMPTY_STRING
self.lc_messages = None
if connect: if connect:
self.reset() self.reset()
@ -89,6 +90,23 @@ class db:
cursor.execute(sql, [self.datestyle]) cursor.execute(sql, [self.datestyle])
cursor.close() cursor.close()
def set_lc_messages(self):
""" set lc_messages to self.lc_messages """
if self.lc_messages is None:
return
# debug only cause reconnecting happens on every
# configured section
self.log.debug('Setting lc_messages to %s', self.lc_messages)
sql = 'set session lc_messages to %s'
cursor = self.dbconn.cursor()
try:
cursor.execute(sql, [self.lc_messages])
except psycopg.ProgrammingError, e:
raise PGLoader_Error, e
cursor.close()
def reset(self): def reset(self):
""" reset internal counters and open a new database connection """ """ reset internal counters and open a new database connection """
self.buffer = None self.buffer = None
@ -110,6 +128,7 @@ class db:
self.dbconn = psycopg.connect(self.dsn) self.dbconn = psycopg.connect(self.dsn)
self.set_encoding() self.set_encoding()
self.set_datestyle() self.set_datestyle()
self.set_lc_messages()
def print_stats(self): def print_stats(self):
""" output some stats about recent activity """ """ output some stats about recent activity """
@ -307,7 +326,8 @@ class db:
self.commited_rows += self.running_commands self.commited_rows += self.running_commands
self.running_commands = 0 self.running_commands = 0
except psycopg.ProgrammingError, error: except (psycopg.ProgrammingError,
psycopg.DatabaseError), error:
# rollback current transaction # rollback current transaction
self.dbconn.rollback() self.dbconn.rollback()
@ -338,13 +358,6 @@ class db:
self.buffer = None self.buffer = None
self.running_commands = 0 self.running_commands = 0
except psycopg.DatabaseError, error:
# non recoverable error
mesg = "\n".join(["Please check PostgreSQL logs",
"HINT: double check your client_encoding,"+
" datestyle and copy_delimiter settings"])
raise PGLoader_Error, mesg
# prepare next run # prepare next run
if self.buffer is None: if self.buffer is None:
self.buffer = StringIO() self.buffer = StringIO()

View File

@ -5,6 +5,7 @@
# standard error levels are used for code and configuration error messages # standard error levels are used for code and configuration error messages
# data error logging is managed by tools.Reject class # data error logging is managed by tools.Reject class
from tools import PGLoader_Error
import logging import logging
def init(client_min_messages = logging.INFO, def init(client_min_messages = logging.INFO,
@ -13,11 +14,14 @@ def init(client_min_messages = logging.INFO,
fmt = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' fmt = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
logging.basicConfig(level = log_min_messages, try:
format = fmt, logging.basicConfig(level = log_min_messages,
datefmt = '%d-%m-%Y %H:%M:%S', format = fmt,
filename = filename, datefmt = '%d-%m-%Y %H:%M:%S',
filemode = 'w') filename = filename,
filemode = 'w')
except IOError, e:
raise PGLoader_Error, e
console = logging.StreamHandler() console = logging.StreamHandler()
console.setLevel(client_min_messages) console.setLevel(client_min_messages)

View File

@ -37,3 +37,6 @@ DEFAULT_REFORMAT_PATH = ['/usr/share/python-support/pgloader/reformat']
CLIENT_MIN_MESSAGES = None CLIENT_MIN_MESSAGES = None
LOG_MIN_MESSAGES = DEBUG LOG_MIN_MESSAGES = DEBUG
DEFAULT_LOG_FILE = "/tmp/pgloader.log"
LOG_FILE = None

View File

@ -121,7 +121,7 @@ def parse_config_string(str):
def read_path(strpath, verbose = False, path = [], check = True): def read_path(strpath, log, path = [], check = True):
""" read a path configuration element, discarding non-existing entries """ """ read a path configuration element, discarding non-existing entries """
import os.path import os.path
@ -129,11 +129,11 @@ def read_path(strpath, verbose = False, path = [], check = True):
path.append(p) path.append(p)
if check: if check:
return check_path(path, verbose) return check_path(path, log)
else: else:
return path return path
def check_path(path, verbose = False): def check_path(path, log):
""" removes non existant and non {directories, symlink} entries from path """ removes non existant and non {directories, symlink} entries from path
""" """
path_ok = [] path_ok = []
@ -144,11 +144,26 @@ def check_path(path, verbose = False):
(os.path.islink(p) and os.path.isdir(os.path.realpath(p))): (os.path.islink(p) and os.path.isdir(os.path.realpath(p))):
path_ok.append(p) path_ok.append(p)
else: else:
if verbose: log.warning("path entry '%s' is not a directory " + \
print "Warning: path entry '%s' " % p + \ "or does not link to a directory", p)
"is not a directory or does not link to a directory"
else: else:
if verbose: log.warning("path entry '%s' does not exists, ignored" % p)
print "Warning: path entry '%s' does not exists, ignored" % p
return path_ok return path_ok
def check_dirname(path):
""" check if given path dirname exists, try to create it if if doesn't """
# try to create the log file and the directory where it lives
logdir = os.path.dirname(path)
if logdir and not os.path.exists(logdir):
# logdir is not empty (not CWD) and does not exists
try:
os.makedirs(logdir)
except (IOError, OSError), e:
return False, e
return True, None