mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-05 02:46:10 +02:00
Add support for PG options as arguments and in the config file, with assorted fixes
This commit is contained in:
parent
d63b21c3f9
commit
411a9a53d7
2
.gitignore
vendored
2
.gitignore
vendored
@ -13,3 +13,5 @@
|
||||
/pgloader/RRRtools.pyc
|
||||
/reformat/mysql.pyc
|
||||
/reformat/pgtime.pyc
|
||||
/pgloader.1
|
||||
/pgloader.1.html
|
||||
|
||||
3
debian/changelog
vendored
3
debian/changelog
vendored
@ -4,6 +4,9 @@ pgloader (2.3.3-1) unstable; urgency=low
|
||||
* Add support for filename arguments, which use defaults
|
||||
* Implement --reject-log and --reject-data
|
||||
* Add support for --max-parallel-sections and --section-threads
|
||||
* Support setting any PG option (-o and config file)
|
||||
* Have --debug show a traceback
|
||||
* Fix a bug where pgloader would freeze on early error (no such file)
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 4 Apr 2010 19:34:39 +0200
|
||||
|
||||
|
||||
@ -8,10 +8,15 @@ pass = None
|
||||
log_file = /tmp/pgloader.log
|
||||
log_min_messages = DEBUG
|
||||
client_min_messages = WARNING
|
||||
lc_messages = C
|
||||
|
||||
;client_encoding = 'utf-8'
|
||||
client_encoding = 'latin1'
|
||||
lc_messages = C
|
||||
pg_option_client_encoding = 'utf-8'
|
||||
pg_option_standard_conforming_strings = on
|
||||
; This setting has no effect other than allowing to check option precedence
|
||||
pg_option_work_mem = 12MB
|
||||
|
||||
copy_every = 5
|
||||
commit_every = 5
|
||||
#copy_delimiter = %
|
||||
@ -45,6 +50,7 @@ format = csv
|
||||
filename = allcols/allcols.data
|
||||
field_sep = :
|
||||
columns = *
|
||||
pg_option_work_mem = 14MB
|
||||
|
||||
[errors]
|
||||
table = errors
|
||||
|
||||
@ -42,7 +42,7 @@ errors is issued.
|
||||
http://pgfoundry.org/projects/pgloader/[], where you'll find a debian
|
||||
package, a source package and an anonymous CVS.
|
||||
|
||||
== Arguments ==
|
||||
== ARGUMENTS ==
|
||||
|
||||
+pgloader+ as of version +2.3.3+ accepts two kinds of arguments, either
|
||||
section names of file names. If both a section and a file exist with the
|
||||
@ -178,6 +178,14 @@ You can't use both -F and -I at the same time.
|
||||
|
||||
Input data files encoding. Defaults to 'latin9'.
|
||||
|
||||
-o, --pg-options::
|
||||
+
|
||||
Any option to give to the PostgreSQL server by mean of the +SET+
|
||||
command. You can use this argument more than once to set more than one
|
||||
option.
|
||||
+
|
||||
Example: -o standard_conforming_strings=on -o client_encoding=utf8
|
||||
|
||||
-t, --section-threads::
|
||||
|
||||
How many threads per section to use, defaults to 1. The command line
|
||||
@ -245,18 +253,30 @@ pass::
|
||||
|
||||
client_encoding::
|
||||
+
|
||||
Set this parameter to have pgloader connects to PostgreSQL using this
|
||||
Set this parameter to have +pgloader+ connects to PostgreSQL using this
|
||||
encoding.
|
||||
+
|
||||
This parameter is optional and defaults to 'latin9'.
|
||||
+
|
||||
As of +pgloader 2.3.3+ you can also use +pg_option_client_encoding+ which is
|
||||
the more general approach.
|
||||
|
||||
datestyle::
|
||||
+
|
||||
Set this parameter to have pgloader connects to PostgreSQL using this
|
||||
Set this parameter to have +pgloader+ connects to PostgreSQL using this
|
||||
datestyle setting.
|
||||
+
|
||||
This parameter is optional and has no default value, thus pgloader will
|
||||
use whatever your PostgreSQL is configured to as default.
|
||||
+
|
||||
As of +pgloader 2.3.3+ you can also use +pg_option_datestyle+ which is
|
||||
the more general approach.
|
||||
|
||||
pg_option_<foo>::
|
||||
|
||||
Replace <foo> with any option you're allowed to setup for the session
|
||||
only with the +SET+ command, and +pgloader+ will do just that for
|
||||
you. Consider for example +pg_option_standard_conforming_strings = on+.
|
||||
|
||||
copy_every::
|
||||
+
|
||||
@ -436,19 +456,30 @@ This parameter is optional and defaults to pipe char +$$'|'$$+.
|
||||
|
||||
client_encoding::
|
||||
+
|
||||
Set this parameter to have pgloader connects to PostgreSQL using this
|
||||
Set this parameter to have +pgloader+ connects to PostgreSQL using this
|
||||
encoding.
|
||||
+
|
||||
This parameter is optional and defaults to 'latin9'. If defined on a
|
||||
table level, this local value will overwrite the global one.
|
||||
This parameter is optional and defaults to 'latin9'.
|
||||
+
|
||||
As of +pgloader 2.3.3+ you can also use +pg_option_client_encoding+ which is
|
||||
the more general approach.
|
||||
|
||||
datestyle::
|
||||
+
|
||||
Set this parameter to have pgloader connects to PostgreSQL using this
|
||||
+datestyle+ setting.
|
||||
Set this parameter to have +pgloader+ connects to PostgreSQL using this
|
||||
datestyle setting.
|
||||
+
|
||||
This parameter is optional and has no default. If defined on a table
|
||||
level, this local value will overwrite the global one.
|
||||
This parameter is optional and has no default value, thus pgloader will
|
||||
use whatever your PostgreSQL is configured to as default.
|
||||
+
|
||||
As of +pgloader 2.3.3+ you can also use +pg_option_datestyle+ which is
|
||||
the more general approach.
|
||||
|
||||
pg_option_<foo>::
|
||||
|
||||
Replace <foo> with any option you're allowed to setup for the session
|
||||
only with the +SET+ command, and +pgloader+ will do just that for
|
||||
you. Consider for example +pg_option_standard_conforming_strings = on+.
|
||||
|
||||
null::
|
||||
+
|
||||
|
||||
25
pgloader.py
25
pgloader.py
@ -111,6 +111,9 @@ def parse_options():
|
||||
default = None,
|
||||
help = "input files encoding")
|
||||
|
||||
parser.add_option("-o", "--pg-options", dest = "pg_options", action = "append",
|
||||
help = "list of PostgreSQL options you want to SET")
|
||||
|
||||
parser.add_option("-t", "--section-threads", dest = "section_threads",
|
||||
default = pgloader.options.SECTION_THREADS,
|
||||
type = "int",
|
||||
@ -252,6 +255,18 @@ def parse_options():
|
||||
elif opts.quiet:
|
||||
pgloader.options.CLIENT_MIN_MESSAGES = logging.ERROR
|
||||
|
||||
if opts.pg_options:
|
||||
pgloader.options.PG_OPTIONS = {}
|
||||
for o in opts.pg_options:
|
||||
try:
|
||||
n, v = [x.strip() for x in o.split('=')]
|
||||
if v == "":
|
||||
raise ValueError
|
||||
pgloader.options.PG_OPTIONS[n] = v
|
||||
except ValueError, e:
|
||||
print >>sys.stderr, \
|
||||
"Error: PostgreSQL options must have the form 'name=value'"
|
||||
sys.exit(1)
|
||||
|
||||
if opts.psycopg1:
|
||||
pgloader.options.PSYCOPG_VERSION = 1
|
||||
@ -285,6 +300,7 @@ def parse_config(conffile):
|
||||
from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
|
||||
from pgloader.options import NULL, EMPTY_STRING
|
||||
from pgloader.options import CLIENT_MIN_MESSAGES, LOG_FILE
|
||||
from pgloader.options import PG_OPTIONS
|
||||
from pgloader.tools import check_dirname
|
||||
|
||||
# first read the logging configuration
|
||||
@ -605,6 +621,8 @@ def load_data():
|
||||
started[s] .set()
|
||||
finished[s].set()
|
||||
log.error(e)
|
||||
if DEBUG:
|
||||
raise
|
||||
|
||||
except IOError, e:
|
||||
# No space left on device? can't log it
|
||||
@ -632,6 +650,9 @@ def load_data():
|
||||
else:
|
||||
log.error('%s' % e)
|
||||
|
||||
if DEBUG:
|
||||
raise
|
||||
|
||||
if PEDANTIC:
|
||||
# was: threads[s].print_stats()
|
||||
# but now thread[s] is no more alive
|
||||
@ -679,6 +700,10 @@ if __name__ == "__main__":
|
||||
try:
|
||||
ret = load_data()
|
||||
except Exception, e:
|
||||
from pgloader.options import DEBUG
|
||||
print DEBUG
|
||||
if DEBUG:
|
||||
raise
|
||||
sys.stderr.write(str(e) + '\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@ -11,6 +11,7 @@ from options import TRUNCATE, VACUUM
|
||||
from options import INPUT_ENCODING, PG_CLIENT_ENCODING, DATESTYLE
|
||||
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
|
||||
from options import PSYCOPG_VERSION
|
||||
from options import PG_OPTIONS
|
||||
|
||||
from tools import PGLoader_Error
|
||||
from logger import log
|
||||
@ -66,11 +67,9 @@ class db:
|
||||
self.copy_sep = COPY_SEP
|
||||
self.copy_every = copy_every
|
||||
self.commit_every = commit_every
|
||||
self.client_encoding = client_encoding
|
||||
self.datestyle = DATESTYLE
|
||||
self.null = NULL
|
||||
self.empty_string = EMPTY_STRING
|
||||
self.lc_messages = None
|
||||
self.pg_options = {}
|
||||
|
||||
# this allows to specify configuration has columns = *
|
||||
# when true, we don't include column list in COPY statements
|
||||
@ -107,51 +106,18 @@ class db:
|
||||
pass
|
||||
self.dbconn = None
|
||||
|
||||
def set_encoding(self):
|
||||
""" set connection encoding to self.client_encoding """
|
||||
# debug only cause reconnecting happens on every
|
||||
# configured section
|
||||
self.log.debug('Setting client encoding to %s', self.client_encoding)
|
||||
def set_pg_options(self):
|
||||
""" set pg_options """
|
||||
for opt, val in self.pg_options.items():
|
||||
self.log.debug('Setting %s to %s', opt, val)
|
||||
|
||||
sql = 'set session client_encoding to %s'
|
||||
cursor = self.dbconn.cursor()
|
||||
try:
|
||||
cursor.execute(sql, [self.client_encoding])
|
||||
except psycopg.ProgrammingError, e:
|
||||
raise PGLoader_Error, e
|
||||
cursor.close()
|
||||
|
||||
def set_datestyle(self):
|
||||
""" set session datestyle to self.datestyle """
|
||||
|
||||
if self.datestyle is None:
|
||||
return
|
||||
|
||||
# debug only cause reconnecting happens on every
|
||||
# configured section
|
||||
self.log.debug('Setting datestyle to %s', self.datestyle)
|
||||
|
||||
sql = 'set session datestyle to %s'
|
||||
cursor = self.dbconn.cursor()
|
||||
cursor.execute(sql, [self.datestyle])
|
||||
cursor.close()
|
||||
|
||||
def set_lc_messages(self):
|
||||
""" set lc_messages to self.lc_messages """
|
||||
if self.lc_messages is None:
|
||||
return
|
||||
|
||||
# debug only cause reconnecting happens on every
|
||||
# configured section
|
||||
self.log.debug('Setting lc_messages to %s', self.lc_messages)
|
||||
|
||||
sql = 'set session lc_messages to %s'
|
||||
cursor = self.dbconn.cursor()
|
||||
try:
|
||||
cursor.execute(sql, [self.lc_messages])
|
||||
except psycopg.ProgrammingError, e:
|
||||
raise PGLoader_Error, e
|
||||
cursor.close()
|
||||
sql = 'set session %s to %%s' % opt
|
||||
cursor = self.dbconn.cursor()
|
||||
try:
|
||||
cursor.execute(sql, [val])
|
||||
except (psycopg.ProgrammingError, psycopg.DataError), e:
|
||||
raise PGLoader_Error, e
|
||||
cursor.close()
|
||||
|
||||
def get_all_columns(self, tablename):
|
||||
""" select the columns name list from catalog """
|
||||
@ -214,9 +180,7 @@ ORDER BY attnum
|
||||
self.log.debug('Debug: connecting to dns %s', self.dsn)
|
||||
|
||||
self.dbconn = psycopg.connect(self.dsn)
|
||||
self.set_encoding()
|
||||
self.set_datestyle()
|
||||
self.set_lc_messages()
|
||||
self.set_pg_options()
|
||||
|
||||
except psycopg.OperationalError, e:
|
||||
# e.g. too many connections
|
||||
|
||||
@ -8,6 +8,7 @@ PSYCOPG_VERSION = None
|
||||
|
||||
INPUT_ENCODING = None
|
||||
PG_CLIENT_ENCODING = 'latin9'
|
||||
PG_OPTIONS = None
|
||||
DATESTYLE = None
|
||||
|
||||
COPY_SEP = None
|
||||
|
||||
@ -18,6 +18,7 @@ from options import TRUNCATE, VACUUM, TRIGGERS
|
||||
from options import COUNT, FROM_COUNT, FROM_ID
|
||||
from options import INPUT_ENCODING, PG_CLIENT_ENCODING
|
||||
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
|
||||
from options import PG_OPTIONS
|
||||
from options import NEWLINE_ESCAPES
|
||||
from options import UDC_PREFIX
|
||||
from options import REFORMAT_PATH
|
||||
@ -163,17 +164,15 @@ class PGLoader(threading.Thread):
|
||||
config.get(section, 'pass'),
|
||||
connect = False)
|
||||
|
||||
if config.has_option(section, 'client_encoding'):
|
||||
self.db.client_encoding = parse_config_string(
|
||||
config.get(section, 'client_encoding'))
|
||||
for opt in ['client_encoding', 'datestyle', 'lc_messages']:
|
||||
if config.has_option(section, opt):
|
||||
self.db.pg_options[opt] = \
|
||||
parse_config_string(config.get(section, opt))
|
||||
|
||||
if config.has_option(section, 'lc_messages'):
|
||||
self.db.lc_messages = parse_config_string(
|
||||
config.get(section, 'lc_messages'))
|
||||
|
||||
if config.has_option(section, 'datestyle'):
|
||||
self.db.datestyle = parse_config_string(
|
||||
config.get(section, 'datestyle'))
|
||||
# PostgreSQL options
|
||||
from tools import parse_pg_options
|
||||
parse_pg_options(self.log, config, section, self.db.pg_options)
|
||||
self.log.debug("_dbconfig: %s" % str(self.db.pg_options))
|
||||
|
||||
if config.has_option(section, 'copy_every'):
|
||||
self.db.copy_every = config.getint(section, 'copy_every')
|
||||
@ -260,29 +259,28 @@ class PGLoader(threading.Thread):
|
||||
# needed to instanciate self.reject while in template section
|
||||
self.reject = None
|
||||
|
||||
# optionnal local option client_encoding
|
||||
if config.has_option(name, 'client_encoding'):
|
||||
self.db.client_encoding = parse_config_string(
|
||||
config.get(name, 'client_encoding'))
|
||||
|
||||
if not DRY_RUN:
|
||||
self.log.debug("client_encoding: '%s'", self.db.client_encoding)
|
||||
|
||||
# optionnal local option input_encoding
|
||||
self.input_encoding = INPUT_ENCODING
|
||||
if config.has_option(name, 'input_encoding'):
|
||||
self.input_encoding = parse_config_string(
|
||||
config.get(name, 'input_encoding'))
|
||||
|
||||
self.log.debug("input_encoding: '%s'", self.input_encoding)
|
||||
|
||||
# optionnal local option datestyle
|
||||
if not DRY_RUN and config.has_option(name, 'datestyle'):
|
||||
self.db.datestyle = parse_config_string(
|
||||
config.get(name, 'datestyle'))
|
||||
# optionnal local option client_encoding and datestyle
|
||||
for opt in ['client_encoding', 'datestyle']:
|
||||
if config.has_option(name, opt):
|
||||
self.db.pg_options[opt] = parse_config_string(config.get(name, opt))
|
||||
|
||||
if not DRY_RUN:
|
||||
self.log.debug("datestyle: '%s'", self.db.datestyle)
|
||||
if not DRY_RUN:
|
||||
self.log.debug("%s: '%s'", opt, self.db.pg_options[opt])
|
||||
|
||||
# optionnal local pg_options
|
||||
# precedence is given to command line parsing, which is in PG_OPTIONS
|
||||
from tools import parse_pg_options
|
||||
parse_pg_options(log, config, name, self.db.pg_options, overwrite=True)
|
||||
if not self.template:
|
||||
if PG_OPTIONS:
|
||||
self.db.pg_options.update(PG_OPTIONS)
|
||||
|
||||
##
|
||||
# data filename
|
||||
@ -849,6 +847,7 @@ class PGLoader(threading.Thread):
|
||||
|
||||
except Exception, e:
|
||||
# resources get freed in self.terminate()
|
||||
self.terminate()
|
||||
self.log.error(e)
|
||||
raise
|
||||
|
||||
|
||||
@ -139,7 +139,25 @@ def parse_config_string(str):
|
||||
|
||||
return str
|
||||
|
||||
|
||||
|
||||
def parse_pg_options(log, config, section, pg_options, overwrite=False):
|
||||
""" Get all the pg_options_ prefixed options from the section"""
|
||||
# PostgreSQL options must begin with the prefix pg_option_
|
||||
for o in [x for x in config.options(section)
|
||||
if x.startswith('pg_option_')]:
|
||||
opt = o[len('pg_option_'):]
|
||||
val = config.get(section, o)
|
||||
|
||||
# hysterical raisins
|
||||
for compat in ['client_encoding', 'lc_messages', 'datestyle']:
|
||||
if opt == compat and config.has_option(section, compat):
|
||||
log.warning("Ignoring %s.%s for %s.%s" \
|
||||
% (section, o, section, opt))
|
||||
|
||||
if opt not in compat and (overwrite or opt not in pg_options):
|
||||
pg_options[opt] = val
|
||||
|
||||
return pg_options
|
||||
|
||||
def read_path(strpath, log, path = [], check = True):
|
||||
""" read a path configuration element, discarding non-existing entries """
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user