Support for processing a filename rather than a section, and add --field-separator

This commit is contained in:
Dimitri Fontaine 2010-04-04 20:42:40 +02:00
parent 99df251963
commit b7b0bbc62d
4 changed files with 70 additions and 9 deletions

12
.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
/pgloader/__init__.pyc
/pgloader/csvreader.pyc
/pgloader/db.pyc
/pgloader/fixedreader.pyc
/pgloader/lo.pyc
/pgloader/logger.pyc
/pgloader/options.pyc
/pgloader/pgloader.pyc
/pgloader/reader.pyc
/pgloader/textreader.pyc
/pgloader/tools.pyc
/pgloader.1.xml

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
pgloader (2.3.3-1) unstable; urgency=low
* Implement -f --field-sep to overwrite the default from command line
* Add support for filename arguments, which use defaults
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 4 Apr 2010 19:34:39 +0200
pgloader (2.3.2-1) unstable; urgency=low
* Implement --from support in all readers (Closes: #531034)

View File

@ -11,7 +11,7 @@ pgloader [--version] [-c configuration file]
[-l loglevel] [-L logfile]
[-n dryrun] [-Cn count] [-Fn from] [-In from id]
[-E input files encoding] [-R reformat:path]
[Section1 Section2]
[Section|Filename ...]
== DESCRIPTION ==
@ -42,6 +42,28 @@ errors is issued.
http://pgfoundry.org/projects/pgloader/[], where you'll find a debian
package, a source package and an anonymous CVS.
== Arguments ==
+pgloader+ as of version +2.3.3+ accepts two kinds of arguments, either
section names of file names. If both a section and a file exist with the
same name, preference is given to the section, where you can edit your
settings rather than using default ones.
Section::
+
is the name of a configured Section describing some data to load
+
Section arguments are optional, if no section is given all configured
sections are processed.
Filename::
The name of a file containing the data to load. +pgloader+ will
internally setup a +Section+ for this filename, with the default field
separator or the given +--field-separator+ and the +columns+ parameter
set to '*', and more importantly the format set to +CSV+. It's the only
supported format with sane enough defaults to apply here.
== OPTIONS ==
In order for pgloader to run, you have to edit a configuration file
@ -135,6 +157,11 @@ preferring to use --from instead.
+
You can't use both -F and -I at the same time.
-f, --field-sep::
Default field separator to use, when not set +pgloader+ will use
'|'. Useful when using +filename+ arguments rather than +section+ ones.
-E, --encoding::
Input data files encoding. Defaults to 'latin9'.
@ -158,13 +185,6 @@ You can't use both -F and -I at the same time.
Force +pgloader+ to use given version of psycopg, either +1+ or
+2+.
Section::
+
is the name of a configured Section describing some data to load
+
Section arguments are optional, if no section is given all configured
sections are processed.
== GLOBAL CONFIGURATION SECTION ==
The configuration file has a +.ini+ file syntax, its first section has

24
pgloader.py Normal file → Executable file
View File

@ -95,6 +95,10 @@ def parse_options():
default = None,
help = "wait for given id on input to begin")
parser.add_option("-f", "--field-separator", dest = "fsep",
default = pgloader.options.FIELD_SEP,
help = "default field separator to use")
parser.add_option("-E", "--encoding", dest = "encoding",
default = None,
help = "input files encoding")
@ -183,6 +187,7 @@ def parse_options():
pgloader.options.COUNT = opts.count
pgloader.options.FROM_COUNT = opts.fromcount
pgloader.options.FROM_ID = opts.fromid
pgloader.options.FIELD_SEP = opts.fsep
pgloader.options.INPUT_ENCODING = opts.encoding
@ -482,13 +487,30 @@ def load_data():
sections = []
summary = {}
# args are meant to be configuration sections
# args are meant to be configuration sections, or filenames
if len(args) > 0:
for s in args:
if config.has_section(s):
sections.append(s)
else:
log.info("Creating a section for file '%s'" % s)
# a filename was given, apply [pgsql] defaults
# set the tablename as the filename sans extension
# consider columns = *
if not os.path.exists(s):
print >>sys.stderr, \
"Error: '%s' does not exists as a section nor as a file" % s
sys.exit(2)
config.add_section(s)
config.set(s, 'table', os.path.splitext(os.path.basename(s))[0])
config.set(s, 'filename', s)
config.set(s, 'columns', '*')
config.set(s, 'format', 'csv')
sections.append(s)
else:
log.debug("No argument on CLI, will consider all sections")
for s in config.sections():
if s != 'pgsql':
sections.append(s)