diff --git a/pgloader.1.txt b/pgloader.1.txt index 25140b2..58d3aca 100644 --- a/pgloader.1.txt +++ b/pgloader.1.txt @@ -104,6 +104,18 @@ refers to a PostgreSQL table into which some data is to be loaded. file where to log messages, see +log_min_messages+. +-r, --reject-log:: + + Filename, with a single "%s" placeholder, where to store the bad data + logs (that's the error messages given by PostgreSQL). If you want to put + a percent in the file name, write it '%%'. + +-j, --reject-data:: + + Filename, with a single "%s" placeholder, where to store the bad data + (the exact lines that didn't make it from your input file). If you want + to put a percent in the file name, write it '%%'. + -s, --summary:: makes pgloader print a 'nice' summary at the end of operations. diff --git a/pgloader.py b/pgloader.py index f62b9fd..77cae58 100755 --- a/pgloader.py +++ b/pgloader.py @@ -58,6 +58,14 @@ def parse_options(): default = "/tmp/pgloader.log", help = "log file, defauts to /tmp/pgloader.log") + parser.add_option("-r", "--reject-log", dest = "reject_log", + default = None, + help = "log file for rejected data error messages") + + parser.add_option("-j", "--reject-data", dest = "reject_data", + default = None, + help = "log file for rejected data, bad input data") + parser.add_option("-s", "--summary", action = "store_true", dest = "summary", default = False, @@ -210,6 +218,30 @@ def parse_options(): pgloader.options.LOG_FILE = opts.logfile + # reject file names must contain one %s which will get replaced by the + # section name + if opts.reject_log: + try: + unused = opts.reject_log % "that would be a section name" + except TypeError, e: + # TypeError: not all arguments converted during string formatting + # TypeError: not enough arguments for format string + print >>sys.stderr, \ + "Error: reject log must contain a '%s' place holder for section name" + sys.exit(1) + pgloader.options.REJECT_LOG_FILE = opts.reject_log + + if opts.reject_data: + try: + unused = opts.reject_data % "that would be a section name" + except TypeError, e: + # TypeError: not all arguments converted during string formatting + # TypeError: not enough arguments for format string + print >>sys.stderr, \ + "Error: reject data must contain a '%s' place holder for section name" + sys.exit(1) + pgloader.options.REJECT_DATA_FILE = opts.reject_data + if opts.loglevel: loglevel = pgloader.logger.level(opts.loglevel) pgloader.options.CLIENT_MIN_MESSAGES = loglevel diff --git a/pgloader/options.py b/pgloader/options.py index a1356f0..1b3e3f5 100644 --- a/pgloader/options.py +++ b/pgloader/options.py @@ -50,3 +50,5 @@ LOG_MIN_MESSAGES = DEBUG DEFAULT_LOG_FILE = "/tmp/pgloader.log" LOG_FILE = None +REJECT_LOG_FILE = '%s.rej.log' +REJECT_DATA_FILE = '%s.rej' diff --git a/pgloader/pgloader.py b/pgloader/pgloader.py index 1207c86..2364f73 100644 --- a/pgloader/pgloader.py +++ b/pgloader/pgloader.py @@ -21,6 +21,7 @@ from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING from options import NEWLINE_ESCAPES from options import UDC_PREFIX from options import REFORMAT_PATH +from options import REJECT_LOG_FILE, REJECT_DATA_FILE from options import MAX_PARALLEL_SECTIONS from options import DEFAULT_SECTION_THREADS, SECTION_THREADS, SPLIT_FILE_READING from options import RRQUEUE_SIZE @@ -242,10 +243,10 @@ class PGLoader(threading.Thread): self.reject_data = config.get(name, 'reject_data') if not self.template and 'reject_log' not in self.__dict__: - self.reject_log = os.path.join('/tmp', '%s.rej.log' % name) + self.reject_log = os.path.join('/tmp', REJECT_LOG_FILE % name) if not self.template and 'reject_data' not in self.__dict__: - self.reject_data = os.path.join('/tmp', '%s.rej' % name) + self.reject_data = os.path.join('/tmp', REJECT_DATA_FILE % name) # reject logging if not self.template: