From 5da357c262dea9490f0bd9b45e784d7251d39c65 Mon Sep 17 00:00:00 2001 From: dim Date: Tue, 4 Dec 2007 23:08:30 +0000 Subject: [PATCH] Beginning of real logging implementation for pgloader messages --- examples/pgloader.conf | 3 + pgloader.py | 131 +++++++++++++++++++++++------------------ pgloader/logger.py | 50 ++++++++++++++++ pgloader/options.py | 3 + pgloader/pgloader.py | 4 +- pgloader/textreader.py | 3 +- 6 files changed, 133 insertions(+), 61 deletions(-) create mode 100644 pgloader/logger.py diff --git a/examples/pgloader.conf b/examples/pgloader.conf index da571df..38a1dff 100644 --- a/examples/pgloader.conf +++ b/examples/pgloader.conf @@ -5,6 +5,9 @@ base = pgloader user = dim pass = None +log_min_messages = DEBUG +client_min_messages = INFO + ;client_encoding = 'utf-8' client_encoding = 'latin1' copy_every = 5 diff --git a/pgloader.py b/pgloader.py index 0f7f535..41ae7fb 100644 --- a/pgloader.py +++ b/pgloader.py @@ -1,25 +1,8 @@ #! /usr/bin/env python -# Author: Dimitri Fontaine +# Author: Dimitri Fontaine """ -PostgreSQL data import tool, aimed to replace and extands pgloader. - -Important features : - - CSV file format import using COPY - - multi-line input file - - configurable amount of rows per COPY instruction - - large object to TEXT or BYTEA field handling - (only informix blobs and clobs supported as of now) - - trailing slash optionnal removal (support informix UNLOAD file format) - - begin processing at any line in the file, by number or row id - - dry-run option, to validate input reading without connecting to database - - pedantic option, to stop processing on warning - - reject log and reject data files: you can reprocess refused data later - - COPY errors recovery via redoing COPY with half files until file is - one line long, then reject log this line - -Please read the fine manual page pg_import(1) for command line usage -(options) and configuration file format. +PostgreSQL data import tool, see included man page. """ import os, sys, os.path, time, codecs @@ -27,6 +10,7 @@ from cStringIO import StringIO import pgloader.options import pgloader.tools +import pgloader.logger def parse_options(): """ Parse given options """ @@ -113,24 +97,25 @@ def parse_options(): # check existence en read ability of config file if not os.path.exists(opts.config): - print "Error: Configuration file %s does not exists" % opts.config - print parser.format_help() + print >>sys.stderr, \ + "Error: Configuration file %s does not exists" % opts.config + print >>sys.stderr, parser.format_help() sys.exit(1) if not os.access(opts.config, os.R_OK): - print "Error: Can't read configuration file %s" % opts.config - print parser.format_help() + print >>sys.stderr, \ + "Error: Can't read configuration file %s" % opts.config + print >>sys.stderr, parser.format_help() sys.exit(1) - if opts.verbose: - print 'Using %s configuration file' % opts.config - if opts.fromcount != 0 and opts.fromid is not None: - print "Error: Can't set both options fromcount (-F) AND fromid (-I)" + print >>sys.stderr, \ + "Error: Can't set both options fromcount (-F) AND fromid (-I)" sys.exit(1) if opts.quiet and (opts.verbose or opts.debug): - print "Error: Can't be verbose and quiet at the same time!" + print >>sys.stderr, \ + "Error: Can't be verbose and quiet at the same time!" sys.exit(1) # if debug, then verbose @@ -156,6 +141,14 @@ def parse_options(): if opts.reformat_path: pgloader.options.REFORMAT_PATH = opts.reformat_path + import logging + if opts.debug: + pgloader.options.CLIENT_MIN_MESSAGES = logging.DEBUG + elif opts.verbose: + pgloader.options.CLIENT_MIN_MESSAGES = logging.INFO + elif opts.quiet: + pgloader.options.CLIENT_MIN_MESSAGES = logging.ERROR + return opts.config, args def parse_config(conffile): @@ -169,21 +162,44 @@ def parse_config(conffile): try: config.read(conffile) except: - print "Error: Given file is not a configuration file" + print >>sys.stderr, "Error: Given file is not a configuration file" sys.exit(4) if not config.has_section(section): - print "Error: Please provide a [%s] section" % section + print >>sys.stderr, "Error: Please provide a [%s] section" % section sys.exit(5) # load some options # this has to be done after command line parsing from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC from pgloader.options import NULL, EMPTY_STRING + from pgloader.options import CLIENT_MIN_MESSAGES + + # first read the logging configuration + if not CLIENT_MIN_MESSAGES: + if config.has_option(section, 'client_min_messages'): + cmm = config.get(section, 'client_min_messages') + pgloader.options.CLIENT_MIN_MESSAGES = pgloader.logger.level(cmm) + else: + # CLIENT_MIN_MESSAGES has not been set at all + pgloader.options.CLIENT_MIN_MESSAGES = NOTICE + + if config.has_option(section, 'log_min_messages'): + lmm = config.get(section, 'log_min_messages') + pgloader.options.LOG_MIN_MESSAGES = pgloader.logger.level(lmm) + else: + pgloader.options.LOG_MIN_MESSAGES = NOTICE + + + pgloader.log = pgloader.logger.init(pgloader.options.CLIENT_MIN_MESSAGES, + pgloader.options.LOG_MIN_MESSAGES, + '/tmp/pgloader.log') + + pgloader.log.info("Logger initialized") + pgloader.log.debug("PHOQUE") if DRY_RUN: - if VERBOSE: - print "Notice: dry run mode, not connecting to database" + pgloader.log.info("dry run mode, not connecting to database") return config, None try: @@ -243,7 +259,7 @@ def parse_config(conffile): pgloader.options.REFORMAT_PATH = rpath except Exception, error: - print "Error: Could not initialize PostgreSQL connection:" + pgloader.log.error("Could not initialize PostgreSQL connection:") print error sys.exit(6) @@ -252,15 +268,20 @@ def parse_config(conffile): def myprint(l, line_prefix = " ", cols = 78): """ pretty print list l elements """ # some code for pretty print + lines = [] + tmp = line_prefix for e in l: if len(tmp) + len(e) > cols: - print tmp + lines.append(tmp) tmp = line_prefix if tmp != line_prefix: tmp += " " tmp += e - print tmp + + lines.append(tmp) + + return lines def duration_pprint(duration): """ pretty print duration (human readable information) """ @@ -384,9 +405,7 @@ def load_data(): else: pgloader.options.REFORMAT_PATH = rpath - if VERBOSE: - print 'Notice: Reformat path is', pgloader.options.REFORMAT_PATH - print + pgloader.log.info('Reformat path is %s', pgloader.options.REFORMAT_PATH) # load some pgloader package modules from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY @@ -408,9 +427,9 @@ def load_data(): if s != 'pgsql': sections.append(s) - if VERBOSE: - print 'Will consider following sections:' - myprint(sections) + pgloader.log.info('Will consider following sections:') + for line in myprint(sections): + pgloader.log.info(line) # we count time passed from now on begin = time.time() @@ -419,34 +438,30 @@ def load_data(): sections.sort() for s in sections: try: - if VERBOSE: - print - - pgloader = PGLoader(s, config, dbconn) + loader = PGLoader(s, config, dbconn) - if not pgloader.template: - pgloader.run() - summary[s] = (pgloader.table,) + pgloader.summary() + if not loader.template: + loader.run() + summary[s] = (loader.table,) + loader.summary() else: - if VERBOSE: - print "Skipping section %s, which is a template" % s + pgloader.log.info("Skipping section %s, which is a template" \ + % s) except PGLoader_Error, e: if e == '': - print '[%s] Please correct previous errors' % s + pgloader.log.error('[%s] Please correct previous errors' % s) else: - print - print 'Error: %s' % e + pgloader.log.error('%s' % e) if PEDANTIC: pgloader.print_stats() except UnicodeDecodeError, e: - print "Error: can't open '%s' with given input encoding '%s'" \ - % (pgloader.filename, pgloader.input_encoding) + pgloader.log.error("can't open '%s' with given input encoding '%s'" \ + % (loader.filename, loader.input_encoding)) except KeyboardInterrupt: - print "Aborting on user demand (Interrupt)" + pgloader.log.warning("Aborting on user demand (Interrupt)") # total duration td = time.time() - begin @@ -457,10 +472,10 @@ def load_data(): retcode = print_summary(dbconn, sections, summary, td) print except PGLoader_Error, e: - print "Can't print summary: %s" % e + pgloader.log.error("Can't print summary: %s" % e) if VACUUM and not DRY_RUN: - print 'vacuumdb... ' + pgloader.log.info('vacuumdb... ') try: dbconn.vacuum() except KeyboardInterrupt: diff --git a/pgloader/logger.py b/pgloader/logger.py new file mode 100644 index 0000000..b7207fc --- /dev/null +++ b/pgloader/logger.py @@ -0,0 +1,50 @@ +# Author: Dimitri Fontaine +# +# pgloader logging facility +# +# standard error levels are used for code and configuration error messages +# data error logging is managed by tools.Reject class + +import logging + +def init(client_min_messages = logging.INFO, + log_min_messages = logging.DEBUG, filename = '/tmp/pgloader.log'): + """ set the console logging """ + + fmt = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + + logging.basicConfig(level = log_min_messages, + format = fmt, + datefmt = '%d-%m-%Y %H:%M:%S', + filename = filename, + filemode = 'w') + + console = logging.StreamHandler() + console.setLevel(client_min_messages) + + formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') + console.setFormatter(formatter) + logging.getLogger('').addHandler(console) + + return logging.getLogger('pgloader') + +def level(name): + """ return a logging level from user string """ + + if name.upper() == 'DEBUG': + return logging.DEBUG + + elif name.upper() == 'INFO': + return logging.INFO + + elif name.upper() == 'WARNING': + return logging.INFO + + elif name.upper() == 'ERROR': + return logging.INFO + + elif name.upper() == 'CRITICAL': + return logging.INFO + + else: + return logging.NOTSET diff --git a/pgloader/options.py b/pgloader/options.py index 90edbf4..4aa7288 100644 --- a/pgloader/options.py +++ b/pgloader/options.py @@ -34,3 +34,6 @@ UDC_PREFIX = 'udc_' REFORMAT_PATH = None DEFAULT_REFORMAT_PATH = ['/usr/share/python-support/pgloader/reformat'] + +CLIENT_MIN_MESSAGES = None +LOG_MIN_MESSAGES = DEBUG diff --git a/pgloader/pgloader.py b/pgloader/pgloader.py index d608329..465da89 100644 --- a/pgloader/pgloader.py +++ b/pgloader/pgloader.py @@ -1,4 +1,4 @@ -# Author: Dimitri Fontaine +# Author: Dimitri Fontaine # # pgloader main class # @@ -160,7 +160,7 @@ class PGLoader: print "input_encoding: '%s'" % self.input_encoding # optionnal local option datestyle - if config.has_option(name, 'datestyle'): + if not DRY_RUN and config.has_option(name, 'datestyle'): self.db.datestyle = parse_config_string( config.get(name, 'datestyle')) diff --git a/pgloader/textreader.py b/pgloader/textreader.py index dac7672..bc585ad 100644 --- a/pgloader/textreader.py +++ b/pgloader/textreader.py @@ -286,7 +286,8 @@ class TextReader(DataReader): col_data = columns[ne_colnum] - if self.db.is_null(col_data) or self.db.is_empty(col_data): + if self.db and \ + (self.db.is_null(col_data) or self.db.is_empty(col_data)): if DEBUG: print 'Debug: skipping null or empty column' continue