From 8ed1e0ff2ca08fea6629fc1f4e010eccb85d02ad Mon Sep 17 00:00:00 2001 From: dim Date: Sun, 19 Nov 2006 21:18:42 +0000 Subject: [PATCH] New version of pgloader (2.0.2): * new developpers * python code replaces tcl code * one config file for many tables (one command line for loading them all) * manpage * debian package * supports multi-line input file (without quotes) * Informix large objects support (loading to TEXT or BYTEA) * configurable amount of rows per COPY instruction * trailing slash optionnal removal (support informix UNLOAD file format) * begin processing at any line in the file, by number or row id * dry-run option, to validate input reading without connecting to database * pedantic option, to stop processing on warning --- LICENSE | 25 - Makefile | 23 + debian/changelog | 23 + debian/compat | 1 + debian/control | 21 + debian/copyright | 32 ++ debian/dirs | 4 + debian/docs | 1 + debian/files | 1 + debian/pgloader.links | 2 + debian/pgloader.manpages | 1 + debian/pycompat | 1 + debian/pyversions | 1 + debian/rules | 98 ++++ doc/Makefile | 43 -- doc/README | 34 -- doc/doc_pgloader.rest | 189 -------- doc/example/create_table.sql | 2 - doc/example/foo.conf | 21 - doc/example/foo.data | 4 - doc/lib/stylesheet.sty | 52 --- examples/README | 55 +++ examples/clob/clob.data | 7 + examples/clob/clob.out | 1 + examples/clob/clob.sql | 4 + examples/cluttered/cluttered.data | 16 + examples/cluttered/cluttered.sql | 5 + examples/errors/errors.data | 7 + examples/errors/errors.sql | 5 + examples/pgloader.conf | 51 ++ examples/simple/simple.data | 7 + examples/simple/simple.sql | 5 + pgloader.1.sgml | 742 ++++++++++++++++++++++++++++++ pgloader.conf | 36 -- pgloader.py | 362 +++++++++++++++ 35 files changed, 1476 insertions(+), 406 deletions(-) delete mode 100644 LICENSE create mode 100644 Makefile create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/dirs create mode 100644 debian/docs create mode 100644 debian/files create mode 100644 debian/pgloader.links create mode 100644 debian/pgloader.manpages create mode 100644 debian/pycompat create mode 100644 debian/pyversions create mode 100644 debian/rules delete mode 100644 doc/Makefile delete mode 100644 doc/README delete mode 100644 doc/doc_pgloader.rest delete mode 100644 doc/example/create_table.sql delete mode 100644 doc/example/foo.conf delete mode 100644 doc/example/foo.data delete mode 100644 doc/lib/stylesheet.sty create mode 100644 examples/README create mode 100644 examples/clob/clob.data create mode 100644 examples/clob/clob.out create mode 100644 examples/clob/clob.sql create mode 100644 examples/cluttered/cluttered.data create mode 100644 examples/cluttered/cluttered.sql create mode 100644 examples/errors/errors.data create mode 100644 examples/errors/errors.sql create mode 100644 examples/pgloader.conf create mode 100644 examples/simple/simple.data create mode 100644 examples/simple/simple.sql create mode 100644 pgloader.1.sgml delete mode 100644 pgloader.conf create mode 100644 pgloader.py diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 88db6ed..0000000 --- a/LICENSE +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2005, Jan Wieck -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the PostgreSQL Loader nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2c26c20 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +DOCS = pgloader.1.sgml +GARBAGE = manpage.links manpage.refs + +# debian setting +DESTDIR = + +libdir = $(DESTDIR)/usr/share/pgloader +exdir = $(DESTDIR)/usr/share/doc/pgloader + +pgloader = pgloader.py +examples = examples +libs = $(wildcard pgloader/*.py) + +install: + install -m 755 $(pgloader) $(libdir) + install -m 755 -d $(libdir)/pgloader + + cp -a $(libs) $(libdir)/pgloader + cp -a $(examples) $(exdir) + +man: $(DOCS) + docbook2man $(DOCS) 2>/dev/null + -rm -f $(GARBAGE) diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..f3c39db --- /dev/null +++ b/debian/changelog @@ -0,0 +1,23 @@ +pgloader (2.0.2) unstable; urgency=low + + * configurable null and empty_string representations + * bugfix on newline_escapes behavior when all lines are not escaped + * new global newline_escapes setting + * uses by default psycopg2, failback to psycopg1 if not available + * client_encoding can now be set on each table + * documentation (manpage) update + + -- Dimitri Fontaine Wed, 15 Nov 2006 22:26:46 +0100 + +pgloader (2.0.1-2) unstable; urgency=low + + * package cleaning (lintian warnings and error) + + -- Dimitri Fontaine Tue, 14 Nov 2006 18:14:57 +0100 + +pgloader (2.0.1-1) unstable; urgency=low + + * Initial release + + -- Dimitri Fontaine Mon, 13 Nov 2006 22:56:15 +0100 + diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +5 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..f330bbb --- /dev/null +++ b/debian/control @@ -0,0 +1,21 @@ +Source: pgloader +Section: misc +Priority: extra +Maintainer: Dimitri Fontaine +Build-Depends: debhelper (>= 5), docbook-to-man (>= 2.0.0), python-support (>= 0.3) +Standards-Version: 3.7.2 + +Package: pgloader +Architecture: all +Depends: python (>=2.4.4), python-psycopg2 | python-psycopg (<< 1.1.21) +Description: loads flat data files into PostgreSQL + pgloader imports data from a flat file and insert it into a database + table. It uses a flat file per database table, and you can configure as + many Sections as you want, each one associating a table name and a data + file. + . + Data are parsed and rewritten, then given to PostgreSQL COPY command. + Parsing is necessary for dealing with end of lines and eventual trailing + separator characters, and for column reordering: your flat data file may + not have the same column order as the databse table has. + diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..def199e --- /dev/null +++ b/debian/copyright @@ -0,0 +1,32 @@ +This package was debianized by Dimitri Fontaine on +Mon, 13 Nov 2006 22:56:15 +0100. + +It was downloaded from http://pgloader.dalibo.org/ + +Upstream Author: Dimitri Fontaine + +Copyright: 2005, Jan Wieck + 2006, Dimitri Fontaine + +License: + Redistribution and use in source and binary forms, with or without + modification, are permitted under the terms of the BSD License. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +On Debian systems, the complete text of the BSD License can be +found in `/usr/share/common-licenses/BSD'. + + +The Debian packaging is (C) 2006, Dimitri Fontaine and +is licensed under the GPL, see `/usr/share/common-licenses/GPL'. diff --git a/debian/dirs b/debian/dirs new file mode 100644 index 0000000..275a1b3 --- /dev/null +++ b/debian/dirs @@ -0,0 +1,4 @@ +usr/bin +usr/sbin +usr/share/pgloader +usr/share/doc/pgloader diff --git a/debian/docs b/debian/docs new file mode 100644 index 0000000..2312955 --- /dev/null +++ b/debian/docs @@ -0,0 +1 @@ +pgloader.1 \ No newline at end of file diff --git a/debian/files b/debian/files new file mode 100644 index 0000000..841a98a --- /dev/null +++ b/debian/files @@ -0,0 +1 @@ +pgloader_2.0.2_all.deb misc extra diff --git a/debian/pgloader.links b/debian/pgloader.links new file mode 100644 index 0000000..202fef5 --- /dev/null +++ b/debian/pgloader.links @@ -0,0 +1,2 @@ +/usr/share/pgloader/pgloader.py /usr/bin/pgloader + diff --git a/debian/pgloader.manpages b/debian/pgloader.manpages new file mode 100644 index 0000000..2312955 --- /dev/null +++ b/debian/pgloader.manpages @@ -0,0 +1 @@ +pgloader.1 \ No newline at end of file diff --git a/debian/pycompat b/debian/pycompat new file mode 100644 index 0000000..d8263ee --- /dev/null +++ b/debian/pycompat @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/debian/pyversions b/debian/pyversions new file mode 100644 index 0000000..cd06769 --- /dev/null +++ b/debian/pyversions @@ -0,0 +1 @@ +2.3- \ No newline at end of file diff --git a/debian/rules b/debian/rules new file mode 100644 index 0000000..781b540 --- /dev/null +++ b/debian/rules @@ -0,0 +1,98 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + + + + +CFLAGS = -Wall -g + +ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) + CFLAGS += -O0 +else + CFLAGS += -O2 +endif + +configure: configure-stamp +configure-stamp: + dh_testdir + # Add here commands to configure the package. + + touch configure-stamp + + +build: build-stamp + +build-stamp: configure-stamp + dh_testdir + + # Add here commands to compile the package. + #$(MAKE) + docbook-to-man pgloader.1.sgml > pgloader.1 + + touch $@ + +clean: + dh_testdir + dh_testroot + rm -f build-stamp configure-stamp + + # Add here commands to clean up after the build process. + -$(MAKE) clean + + dh_clean + +install: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + + # Add here commands to install the package into debian/pgloader. + $(MAKE) DESTDIR=$(CURDIR)/debian/pgloader install + + +# Build architecture-independent files here. +binary-indep: build install +# We have nothing to do by default. + +# Build architecture-dependent files here. +binary-arch: build install + dh_testdir + dh_testroot + dh_installchangelogs + dh_installdocs + dh_installexamples +# dh_install +# dh_installmenu +# dh_installdebconf +# dh_installlogrotate +# dh_installemacsen +# dh_installpam +# dh_installmime + dh_pysupport +# dh_installinit +# dh_installcron +# dh_installinfo + dh_installman + dh_link + dh_strip + dh_compress + dh_fixperms +# dh_perl +# dh_makeshlibs + dh_installdeb + dh_shlibdeps + dh_gencontrol + dh_md5sums + dh_builddeb + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary install configure diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index 79af31e..0000000 --- a/doc/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# $Id: Makefile,v 1.1 2005-11-21 16:05:50 jpargudo Exp $ - -rest = $(wildcard *.rest) -html = $(addsuffix .html, $(basename $(rest))) -pdf = $(addsuffix .pdf, $(basename $(rest))) - -pdf: $(pdf) clean - -html: $(html) - -dist-clean: clean - @rm -f $(pdf) $(html) - -clean: - @rm -f *.aux *.log *~ *.tex *.out *.toc *.dvi - -%.html: %.rest - rest2html --stylesheet lib/stylesheet.sty \ - --no-section-numbering \ - --language=fr \ - $< > $@ - -%.pdf: %.dvi - dvipdf $< - -%.dvi: %.tex - latex $< >> /dev/null - latex $< >> /dev/null - -%.tex: %.rest - rest2latex --use-latex-toc \ - --stylesheet lib/stylesheet.sty \ - --use-latex-footnotes \ - --no-section-numbering \ - --language=fr \ - --input-encoding=iso-8859-15 \ - --table-style=booktabs \ - --output-encoding=iso-8859-15 \ - $< > $@ -help: - @echo " Programmes nécessaires: docbook, latex, dvipdf, kpdf" - -.PHONY: pdf diff --git a/doc/README b/doc/README deleted file mode 100644 index 3c8ca86..0000000 --- a/doc/README +++ /dev/null @@ -1,34 +0,0 @@ -====== -README -====== - -How to compile documentation for pgloader ------------------------------------------ - -**doc_pgloader.rest** - -Some documentation for pgloader project, in english. - -ReST format (see -http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html for more info -on this format and tools). - -Since its plain text, you don't need to compile anything to read it ;-) - -But you can still get a PDF or an HTML file: - -compile PDF version : - - $ make doc_pgloader.pdf - -compile HTML version : - - $ make doc_pgloader.html - -Contact the authors -------------------- - -Feel free to send me questions / comments / patches / whatever : - -Jean-Paul Argudo - diff --git a/doc/doc_pgloader.rest b/doc/doc_pgloader.rest deleted file mode 100644 index 92578d6..0000000 --- a/doc/doc_pgloader.rest +++ /dev/null @@ -1,189 +0,0 @@ -======== -pgloader -======== - -:Author: - Jean-Paul Argudo - -:Version: - $Id: doc_pgloader.rest,v 1.2 2006-03-28 21:39:25 jpargudo Exp $ -:Type: - User manual - -:Comment: - pgLoader v.1.x documentation (install, usage and example) - -:Licence: - BSD - -About -===== - -pgloader (http://pgfoundry.org/projects/pgloader/) is a new project allowing -you to import data in a PostgreSQL database. - -You have to launch pgloader as many times you have tables. pgloader handles -just one table at a time. - -All bad records are put together in a file, with a logfile explaining origins -of errors. - - -Installation -============ - -Under Debian, the current installation is a bit tricky (as per 200510xx): :: - - wget http://debian.wow-vision.com.sg/debian/pool/main/p/postgresql libpgtcl_7.4.7-6sarge1_i386.deb - dpgk -i libpgtcl_7.4.7-6sarge1_i386.deb - apt-get install tcllib - wget http://pgfoundry.org/frs/download.php/233/pgloader-1.0.tar.gz - tar zxvf pgloader-1.0.tar.gz - -Then you can eventually put the binary into /usr/local/bin to facilitate -comandlines: :: - - $ cp pgloader-1.0/pgloader /usr/local/bin - -Principle -========= - -You must fill two files per table: - - * a parameter file, let's call it .conf - * a datafile, let's call it
.data - -You need also all necessary parameters to the db connexion you want to use: - -Common ones are the following: - - * host : name of the server where your PostgreSQL db lives (localhost ?) - * user : username (you?) - * password : username's password (mybigsecret) - * dbname : name of the PostgreSQL db - -This parameters are put together in a double-quoted string: - - "host=localhost user=me password=mybigsecret dbname=mydatabase" - -This string as the same type that PQconnectdb awaits for in the libpq. Its -complete documentation can be read at: -http://www.postgresql.org/docs/current/static/libpq.html#LIBPQ-CONNECT - -You can for sure add much more parameters, depending your db configuration. - -Example -======= - -We want to insert records in "foo" table: :: - - test=> \d foo - Table «public.foo» - Colonne | Type | Modificateurs - ---------+---------+--------------- - a | integer | not null - b | date | - c | text | - Index : - «foo_pkey» PRIMARY KEY, btree (a) - -The datafile ------------- - -Our datafile "foo.data" as following records: :: - - 1;1987-12-04;"This is a test of data file" - 2;2005-03-02;"diziz'another test with som'o'lil'quotes" - 42;;"No need to date this" - 67;1999-01-02;Oops I didn't escape this string?! - -Please note that: - - * fields are separated with a semicolon - * you can handle presence of empty data: the empty field is represented with - two semicolons following - * we have a record per line - * theres is no other line separator excepted \n - * dates are in ISO format: YYYY-MM-DD (a fix is coming to handle "set datestyle - to" in the conf file) - * you can escape strings, optionnaly, double quoting them - -Configuration file ------------------- - -The corresponding file "foo.conf" for the above datafile is the following: :: - - # ---- - # Conversion parameter file for pgloader - # - # Possible file formats: - # COPY native PostgreSQL COPY format (default) - # CSV Comma separated variables - # MSCSV Comma separated variables alternate format - # - # The COPY command is constructed from the table_name, the - # table_columns and the eventual nulls string definition. - # - # The default column separator character is comma. - # ---- - - table_name = foo - table_columns = a,b,c - file_format = CSV - group_size = 1000 - file_sepchar = ; - #nulls = NULL - quote = " - file_is_utf8 = 0 - -Note that separation character is set to ";" and that quoting is specifyied -with the character double-quote: " - -Inserts will be commited each 1000, per blocks of 1000 rows at a time. - -The datafile nor the database is in utf-8, so the parameter *file_is_utf8* is -set to 0. Set it to 1 otherwise: when both database and datafile are in utf-8. - -Since ``pgctl`` internals run in utf-8, the data must be converted *on the -fly* to utf-8 when reading the datafile, thats why pgloader needs to know how -is the datafile like, utf-8 or not. - - -pgloader execution ------------------- - -The execution is quite simple: :: - - $ pgloader foo.conf foo.data "host=localhost user=me password=mybigsecret \ - dbname=mydatabase" - - 4 row(s) loaded - 0 row(s) rejected - -A simple verification of what has been inserted: :: - - test=> select * from foo ; - a | b | c - ----+------------+------------------------------------------ - 1 | 1987-12-04 | This is a test of data file - 2 | 2005-03-02 | diziz'another test with som'o'lil'quotes - 42 | | No need to date this - 67 | 1999-01-02 | Oops I didn't escape this string?! - (4 lines) - -**Note**: You will find this example in the doc/example/ directory. - -when errors occurs ------------------- - -Check the following: - - * if your configuration file is not okay, pgloader will tell you whats wrong - - * if you have a problem with the data you try to import, you'll find in the - .rej file data that have bee rejected. In the .rejlog file given problems - will be explicited: a group of error messages per rejected row. - -Then you'll have to correct errors in .rej file and import *that* file like all -the others: don't reimport anything else, all the good data is already in the -box :) diff --git a/doc/example/create_table.sql b/doc/example/create_table.sql deleted file mode 100644 index 15cb8ad..0000000 --- a/doc/example/create_table.sql +++ /dev/null @@ -1,2 +0,0 @@ -drop table foo; -create table foo (a integer primary key, b date, c text); diff --git a/doc/example/foo.conf b/doc/example/foo.conf deleted file mode 100644 index 33d3e25..0000000 --- a/doc/example/foo.conf +++ /dev/null @@ -1,21 +0,0 @@ -# ---- -# Conversion parameter file for pgloader -# -# Possible file formats: -# COPY native PostgreSQL COPY format (default) -# CSV Comma separated variables -# MSCSV Comma separated variables alternate format -# -# The COPY command is constructed from the table_name, the -# table_columns and the eventual nulls string definition. -# -# The default column separator character is comma. -# ---- - -table_name = foo -table_columns = a,b,c -file_format = CSV -group_size = 1000 -file_sepchar = ; -#nulls = NULL -quote = " diff --git a/doc/example/foo.data b/doc/example/foo.data deleted file mode 100644 index 6ee9e96..0000000 --- a/doc/example/foo.data +++ /dev/null @@ -1,4 +0,0 @@ -1;1987-12-04;"This is a test of data file" -2;2005-03-02;"diziz'another test with som'o'lil'quotes" -42;;"No need to date this" -67;1999-01-02;Oops I didn't escape this string?! diff --git a/doc/lib/stylesheet.sty b/doc/lib/stylesheet.sty deleted file mode 100644 index 8147887..0000000 --- a/doc/lib/stylesheet.sty +++ /dev/null @@ -1,52 +0,0 @@ -\let\oldAuthor\author -\renewcommand{\author}[1]{\newcommand{\myAuthor}{#1}\oldAuthor{#1}} -\let\oldTitle\title -\renewcommand{\title}[1]{\newcommand{\myTitle}{#1}\oldTitle{#1}} - -\usepackage{eurosym} -\usepackage[latin9]{inputenc} -\let ¤ = \euro - -\usepackage{fancyhdr} -\pagestyle{fancy} - -\lhead{} -\chead{} -\rhead{\myTitle} -\lfoot{\textsf{pgFoundry} -} -\cfoot{\small{pgloader documentation \\ -http://pgfoundry.org/projects/pgloader/}} -\rfoot{\thepage\ / \pageref*{LastPage}} - -\renewcommand{\headrulewidth}{0.4pt} -\renewcommand{\footrulewidth}{0.4pt} - -\usepackage{helvet} - -\renewcommand{\familydefault}{phv} - -%Parametrage pour une feuille A4 pleine (merci SBI) -\evensidemargin = 30mm -\oddsidemargin = 30mm -\voffset=-1in -\topmargin = 17mm -\headheight = 14.5mm -\headsep = 15mm -\hoffset=-1in -\marginparsep = 0pt -\marginparwidth = 0pt -\footskip = 20mm -\textwidth=162mm -\textheight=200mm -\paperwidth=210mm -\paperheight=297mm -\parindent=0pt -\parskip=5pt -%fin parametrage A4 plein - -\usepackage{lastpage} - -\hypersetup{colorlinks=true} - -\usepackage{indentfirst} diff --git a/examples/README b/examples/README new file mode 100644 index 0000000..5889c20 --- /dev/null +++ b/examples/README @@ -0,0 +1,55 @@ +In this directory you'll find out some pgloader usage examples. + +To use them, please first create a pgloader database, then for each example +the tables it needs, then issue the pgloader command: + + $ createdb --encoding=utf-8 pgloader + $ cd examples + $ psql pgloader < simple/simple.sql + $ ../pgloader.py -Tvc examples/pgloader.conf simple + +If you want to load data from all examples, create tables for all of them +first, then run pgloader without argument. + +The provided examples are: + +. simple + + This dataset shows basic case, with trailing separator and data + reordering. + +. errors + + Same test, but with impossible dates. Should report some errors. It does +not report errors, check you're not using psycopg 1.1.21. + +. clob + + This dataset shows some text large object importing to PostgreSQL text + datatype. + +. cluttured + + A dataset with newline escaped and multi-line input (without quoting) + Beware of data reordering, too. + + +You can launch all those pgloader tests in one run, provided you created the +necessary tables: + + $ for test in simple clob cluttured; do psql pgloader < $test/$test.sql; done + $ ../pgloader.py -Tc pgloader.conf + + [...] + + Table name | duration | size | updates | errors + ==================================================================== + clob | 0.121s | 32 kB | 7 | 0 + cluttered | 0.041s | 32 kB | 3 | 0 + simple | 0.040s | 16 kB | 6 | 0 + ==================================================================== + Total | 0.369s | 80 kB | 16 | 0 + +And you then have a nice summary. + + diff --git a/examples/clob/clob.data b/examples/clob/clob.data new file mode 100644 index 0000000..ce2a1e8 --- /dev/null +++ b/examples/clob/clob.data @@ -0,0 +1,7 @@ +1|0,16,clob.out +2|16,20,clob.out +3|36,23,clob.out +4|59,15,clob.out +5|6e,13,clob.out +6|81,6,clob.out +7|87,d,clob.out \ No newline at end of file diff --git a/examples/clob/clob.out b/examples/clob/clob.out new file mode 100644 index 0000000..d9a15de --- /dev/null +++ b/examples/clob/clob.out @@ -0,0 +1 @@ +This is some clob dataTo be used on several data entryReferences to those are to be foundIn the clob.data fileThey refer to beginlengthand filename. \ No newline at end of file diff --git a/examples/clob/clob.sql b/examples/clob/clob.sql new file mode 100644 index 0000000..d59fb26 --- /dev/null +++ b/examples/clob/clob.sql @@ -0,0 +1,4 @@ +CREATE TABLE clob ( + a integer primary key, + b text +); \ No newline at end of file diff --git a/examples/cluttered/cluttered.data b/examples/cluttered/cluttered.data new file mode 100644 index 0000000..484374c --- /dev/null +++ b/examples/cluttered/cluttered.data @@ -0,0 +1,16 @@ +1^some multi\ +line text with\ +newline escaping^and some other data following^ +2^and another line^clean^ +3^and\ +a last multiline\ +escaped line +with a missing\ +escaping^just to test^ +4^\ ^empty value^ +5^^null value^ +6^multi line\ +escaped value\ +\ +with empty line\ +embeded^last line^ \ No newline at end of file diff --git a/examples/cluttered/cluttered.sql b/examples/cluttered/cluttered.sql new file mode 100644 index 0000000..d327cf6 --- /dev/null +++ b/examples/cluttered/cluttered.sql @@ -0,0 +1,5 @@ +CREATE TABLE cluttered ( + a integer primary key, + b text, + c text +); \ No newline at end of file diff --git a/examples/errors/errors.data b/examples/errors/errors.data new file mode 100644 index 0000000..33ca814 --- /dev/null +++ b/examples/errors/errors.data @@ -0,0 +1,7 @@ +1|some first row text|2006-13-11| +2|some second row text|2006-11-11| +3|some third row text|2006-10-12| +4|\ |2006-16-4| +5|some fifth row text|2006-5-12| +6|some sixth row text|2006-13-10| +7|some null date to play with|| \ No newline at end of file diff --git a/examples/errors/errors.sql b/examples/errors/errors.sql new file mode 100644 index 0000000..8daead7 --- /dev/null +++ b/examples/errors/errors.sql @@ -0,0 +1,5 @@ +CREATE TABLE errors ( + a integer primary key, + b date, + c text +); \ No newline at end of file diff --git a/examples/pgloader.conf b/examples/pgloader.conf new file mode 100644 index 0000000..78473da --- /dev/null +++ b/examples/pgloader.conf @@ -0,0 +1,51 @@ +[pgsql] +host = localhost +port = 5432 +base = pgloader +user = dim +pass = None + +client_encoding = 'utf-8' +copy_every = 5 +commit_every = 5 +#copy_delimiter = % + +null = "" +empty_string = "\ " + +newline_escapes = \ + +[simple] +table = simple +filename = simple/simple.data +field_sep = | +trailing_sep = True +columns = a:1, b:3, c:2 + +# those reject settings are defaults one +reject_log = /tmp/simple.rej.log +reject_data = /tmp/simple.rej + +[errors] +table = errors +filename = errors/errors.data +field_sep = | +trailing_sep = True +columns = a:1, b:3, c:2 + +[clob] +table = clob +filename = clob/clob.data +field_sep = | +columns = a:1, b:2 +index = a:1 +blob_columns = b:2:ifx_clob + +[cluttered] +table = cluttered +filename = cluttered/cluttered.data +field_sep = ^ +trailing_sep = True +newline_escapes = c:\ +field_count = 3 +columns = a:1, b:3, c:2 diff --git a/examples/simple/simple.data b/examples/simple/simple.data new file mode 100644 index 0000000..6ef3d1f --- /dev/null +++ b/examples/simple/simple.data @@ -0,0 +1,7 @@ +1|some first row text|2006-11-11| +2|some second row text|2006-11-11| +3|some third row text|2006-10-12| +4|\ |2006-10-4| +5|some fifth row text|2006-5-12| +6|some sixth row text|2006-7-10| +7|some null date to play with|| \ No newline at end of file diff --git a/examples/simple/simple.sql b/examples/simple/simple.sql new file mode 100644 index 0000000..981bd68 --- /dev/null +++ b/examples/simple/simple.sql @@ -0,0 +1,5 @@ +CREATE TABLE simple ( + a integer primary key, + b date, + c text +); \ No newline at end of file diff --git a/pgloader.1.sgml b/pgloader.1.sgml new file mode 100644 index 0000000..046280a --- /dev/null +++ b/pgloader.1.sgml @@ -0,0 +1,742 @@ + + + +
+ dim@dalibo.com +
+ + Dimitri + Fontaine + + August 2006 + + 2006 + Dimitri Fontaine + +
+ + + pgloader + 1 + + + + pgloader + +Import CSV data and Large Object to PostgreSQL + + + + + + pgloader + configuration file + pedantic + debug + verbose + dry run + count + from + from id + input files encoding + Section1 Section2 + + + + + description + + pgloader imports data from a flat file and + insert it into a database table. It uses a flat file per + database table, and you can configure as many Sections as you + want, each one associating a table name and a data file. + + + + Data are parsed and rewritten, then given to PostgreSQL + COPY command. Parsing is necessary for + dealing with end of lines and eventual trailing separator + characters, and for column reordering: your flat data file may + not have the same column order as the databse table has. + + + + pgloader is also able to load some large + objects data into PostgreSQL, as of now only Informix + UNLOAD data files are supported. This command + gives large objects data location information into the main data + file. pgloader parse it and produces and SQL + UPDATE order per large object, and commit those orders once + every commit_every configuration parameter. + + + + pgloader issue some timing statistics + every commit_every commits (see Configuration + for this setting). At the end of each section processing, a + summary of overall operations, numbers of updates and commits, + time it took in seconds, errors logged and database errors is + issued. + + + + + options + + + In order for pgloader to run, you have to + edit a configuration file (see Configuration) consisting of + Section definitions. Each section refers to a PostgreSQL table + into which some data is to be loaded. + + + + + + + + + specifies the configuration file to use. The default file + name is pgloader.conf, searched into + current working directory. + + + + + + + + + + activates the pedantic mode, where any + warning is considered as a fatal error, thus stopping the + processing of the input file. + + + + + + + + + + makes pgloader say it all about what it + does. debug implies verbose. + + + + + + + + + + makes pgloader very verbose about + what it does. + + + + + + + + + + makes pgloader simulate operations, + that implies no database connection and no data extraction + from blob files. + + + + + + + + + + makes pgloader issue a truncate SQL + command before importing data. + + + + + + + + + + makes pgloader issue a vacuum full + verbose analyse SQL command before importing data. + + + This vacuum is run from shell command + /usr/bin/vacuumdb with connection + informations taken from configuration file (see + Configuration section of this manual page), but without + password prompting. If you use this option, please + configure your pg_hba.conf in a way + no password is prompted (trust). + + + + + + + + + + Number of input lines to process, default is to process + all the input lines. + + + + + + + + + + Input line number from which we begin to process (and + count). pgloader will skip all + preceding lines. + + + You can't use both and + at the same time. + + + + + + + + + + From which id do we begin to process + (and count) input lines. + + + When a composite key is used, you have to give each column + of the key separated by comma, on the form col_name=value. + + + Please notice using the --from-id + option implies pgloader will try to get + row id of each row, it being on the interval processed or + not. This could have some performance impact, and you may + end up prefering to use --from instead. + + + Example: pgloader -I col1:val1,col2:val2 + + + You can't use both and + at the same time. + + + + + + + + + + Input data files encoding. Defaults to 'latin9'. + + + + + + + + + is the name of a configured Section describing some data + to load + + + Section arguments are optionnal, if no section is given + all configured sections are processed. + + + + + + + + configuration + + The configuration file has a .ini file syntax, its first section + has to be the pgsql one, defining how to + access to the PostgreSQL database server where to load + data. Then you may define any number of sections, each one + describing a data loading task to be performed by + pgloader. + + + + The [pgsql] section has the following + options, which all must be set. + + + + + + + PostgreSQL database server name, for example + localhost. + + + + + + + + + PostgreSQL database server listening port, 5432. You have + to fill this entry. + + + + + + + + + The name of the database you want to load data into. + + + + + + + + + Connecting PostgreSQL user name. + + + + + + + + + The password of the user. The better is to grant a + trust access privilege in PostgreSQL + pg_hba.conf. Then you can set this + entry to whatever value you want to. + + + + + + + + + Set this parameter to have pgloader + connects to PostgreSQL using this encoding. + + + This parameter is optionnal and defaults to 'latin9'. + + + + + + + + + When issuing COPY PostgreSQL commands, + pgloader will not make a single big + COPY attempt, but copy copy_every lines + at a time. + + + This parameter is optionnal and defaults to 10000. + + + + + + + + + PostgreSQL COMMIT frequency, exprimed + in UPDATE orders. A good value is 1000, + that means commiting the SQL transaction every 1000 input + lines. + + + pgloader issues commit every + commit_every updates, on connection closing and when a SQL + error occurs. + + + This parameter is optionnal and defaults to 1000. + + + + + + + + + The field separator to use in COPY FROM produced statements. If + you don't specify this, the same separator as the one given in + field_sep parameter will be used. + + + Please note PostgreSQL requires a single char + properly encoded (see your client_encoding + parameter), or it abort in error and even may crash. + + + This parameter is optionnal and defaults to + field_sep. + + + + + + + + + For parameter effect description, see below (same name, table + local setting). + + + You can setup here a global escape caracter, to be considered on + each and every column of each and every table defined + thereafter. + + + + + + + + + You can configure here how null value is + represented into your flat data file. + + + This parameter is optionnal and defaults to + '' (that is empty string). + + + + + + + + + You can configure here how empty values are represented into + your flat data file. + + + This parameter is optionnal and defaults to '\ + ' (that is backslash followed by space). + + + + + + + You then can define any number of data section, and give them an + arbitrary name. Some options are required, some are actually + optionnals, in which case it is said so thereafter. + + + + + + + The table name of the database where to load data. + + + + + + + + + The absolute path to the input data file. The large object + files are to be found into the same directory. Their name + can be in the form [bc]lob[0-9a-f]{4}.[0-9a-f]{3}, but + this information is not used by + pgloader. + + + + + + + + + In case of errors processing input data, a human readable + log per rejected input data line is produced into the + reject_log file. + + + + + + + + + In case of errors processing input data, the rejected + input line is appended to the reject_data file. + + + + + + + + + The field separator used into the data file. The same + separator will be used by the generated + COPY commands, thus + pgloader does not have to deal with + escaping the delimiter it uses (input data has to have + escaped it). + + + This parameter is optionnal and defaults to pipe char '|'. + + + + + + + + + The UNLOAD command does not escape + newlines when they appear into table data. Hence, you may + obtain multi-line data files, where a single database row + (say tuple if you prefer to) can span multiple physical + lines into the unloaded file. + + + If this is your case, you may want to configure here the + number of columns per tuple. Then + pgloader will count columns and + buffer line input in order to re-assemble several physical + lines into one data row when needed. + + + This parameter is optionnal. + + + + + + + + + If this option is set to True, the + input data file is known to append a + field_sep as the last character of each + of its lines. With this option set, this last character is + then not considered as a field separator. + + + This parameter is optionnal and defaults to False. + + + + + + + + + Set this parameter to have pgloader + connects to PostgreSQL using this encoding. + + + This parameter is optionnal and defaults to 'latin9'. If defined + on a table level, this local value will overwritte the global + one. + + + + + + + + + You can configure here how null value is + represented into your flat data file. + + + This parameter is optionnal and defaults to + '' (that is empty string). If defined on a + table level, this local value will overwritte the global one. + + + + + + + + + You can configure here how empty values are represented into + your flat data file. + + + This parameter is optionnal and defaults to '\ + ' (that is backslash followed by space). If defined on + a table level, this local value will overwritte the global one. + + + + + + + + + Sometimes the input data file has field values containing + newlines, and the export program used (as Informix + UNLOAD command) escape in-field + newlines. So you want pgloader to keep + those newlines, while at the same time preserving them. + + + This option does the described work on specified fields + and considering the escaping character you configure, + following this syntax: + + + newline_escapes = colname:\, other_colname:§ + + + This parameter is optionnal, and the extra work is only + done when set. You can configure + newline_escapes for as many fields as + necessary, and you may configure a different escaping + character each time. + + + Please note that at the moment, + pgloader does only support one + character length newline_escapes. + + + When both a global (see [pgsql] section) + newline_escapes parameter and a table local + one are set, pgloader issues a warning and + only consider the global setting. + + + + + + + + + Table index definition, to be used in blob UPDATE'ing. You + define an index column by giving its name and its column + number (as found into your data file, and counting from 1) + separated by a colon. If your table has a composite key, + then you can define multiple columns here, separated by a + comma. + + + index = colname:3, other_colname:5 + + + + + + + + + You can define here table columns, with the same + definition format as in previous index + parameter. + + + In case you have a lot a columns per table, you will want + to use ultiple lines for this parameter value. Python + ConfigParser module knows how to read + multi-line parameters, you don't have to escape anything. + + + + + + + + + The definition of the colums where to find some blob or + clob reference. This definition is composed by a table + column name, a column number (couting from one) reference + into the Informix UNLOAD data file, and + a large object type, separated by a colon. You can have + several columns in this field, separated by a + comma. + + + Supported large objects type are Informix blob and clob, + the awaited configuration string are respectively + ifx_blob for binary (bytea) content + type and ifx_clob for text type values. + + + Here's an example: + + + blob_type = clob_column:3:ifx_blob, other_clob_column:5:ifx_clob + + + + + + + + confifuration example + + Please see the given configuration example which should be distributed + in + /usr/share/doc/pgloader/examples/pgloader.conf. + + + + + History + + pgloader was at first an Informix to + PostgreSQL migration helper which imported Informix large + objects directly into a PostgreSQL database. + + + + Then as we got some data we couldn't file tools to care about, + we decided ifx_blob would become + pgloader, as it had to be able to import all + Informix UNLOAD data. Those data contains escaped separator into + unquoted data field and multi-lines fields (\r and \n are not + escaped). + + + + + Bugs + + Please report bugs to Dimitri Fontaine <dim@dalibo.com>. + + + When last line is alone on a COPY command and its + parsing ends in error (not enough columns read for example), no + information is given back by pgloader. + + + + + Authors + + pgloader is written by Dimitri + Fontaine dim@dalibo.com. + + + +
diff --git a/pgloader.conf b/pgloader.conf deleted file mode 100644 index a432739..0000000 --- a/pgloader.conf +++ /dev/null @@ -1,36 +0,0 @@ -# $Id: pgloader.conf,v 1.4 2006-03-28 21:39:25 jpargudo Exp $ -# ---- -# Conversion parameter file for pgloader -# -# Possible file formats: -# COPY native PostgreSQL COPY format (default) -# CSV Comma separated variables -# MSCSV Comma separated variables alternate format -# -# The COPY command is constructed from the table_name, the -# table_columns and the eventual nulls string definition. -# -# The default column separator character is comma. -# ---- - -table_name = my_table -table_columns = column1, column2, ... - -file_format = COPY -# COPY or CSV or MSCSV - -group_size = 1000 - -# file_sepchar = ; -# , (default) or ; or other - -# nulls = '' -# NULL or '' or other - -# quote = " -# how your strings are quoted in the CSV file - -file_is_utf8 = 0 -# how the datafile and database are encoded: utf8/unicode or not? -# 0=NO # if utf8, both data file and -# 1=YES # database must be in utf8 diff --git a/pgloader.py b/pgloader.py new file mode 100644 index 0000000..dabcec6 --- /dev/null +++ b/pgloader.py @@ -0,0 +1,362 @@ +#! /usr/bin/env python +# -*- coding: ISO-8859-15 -*- +# Author: Dimitri Fontaine + +""" +PostgreSQL data import tool, aimed to replace and extands pgloader. + +Important features : + - CSV file format import using COPY + - multi-line input file + - configurable amount of rows per COPY instruction + - large object to TEXT or BYTEA field handling + (only informix blobs and clobs supported as of now) + - trailing slash optionnal removal (support informix UNLOAD file format) + - begin processing at any line in the file, by number or row id + - dry-run option, to validate input reading without connecting to database + - pedantic option, to stop processing on warning + - reject log and reject data files: you can reprocess refused data later + - COPY errors recovery via redoing COPY with half files until file is + one line long, then reject log this line + +Please read the fine manual page pg_import(1) for command line usage +(options) and configuration file format. +""" + +import os, sys, os.path, time, codecs +from cStringIO import StringIO + +import pgloader.options +import pgloader.tools + +def parse_options(): + """ Parse given options """ + import ConfigParser + from optparse import OptionParser + + usage = "%prog [-c ] Section [Section ...]" + parser = OptionParser(usage = usage) + + parser.add_option("-c", "--config", dest = "config", + default = "pgloader.conf", + help = "configuration file, defauts to pgloader.conf") + + parser.add_option("-p", "--pedantic", action = "store_true", + dest = "pedantic", + default = False, + help = "pedantic mode, stop processing on warning") + + parser.add_option("-d", "--debug", action = "store_true", + dest = "debug", + default = False, + help = "add some debug information (a lot of)") + + parser.add_option("-v", "--verbose", action = "store_true", + dest = "verbose", + default = False, + help = "be verbose and about processing progress") + + parser.add_option("-n", "--dry-run", action = "store_true", + dest = "dryrun", + default = False, + help = "simulate operations, don't connect to the db") + + parser.add_option("-T", "--truncate", action = "store_true", + dest = "truncate", + default = False, + help = "truncate tables before importing data") + + parser.add_option("-V", "--vacuum", action = "store_true", + dest = "vacuum", + default = False, + help = "vacuum database after having imported data") + + parser.add_option("-C", "--count", dest = "count", + default = None, type = "int", + help = "number of input lines to process") + + parser.add_option("-F", "--from", dest = "fromcount", + default = 0, type = "int", + help = "number of input lines to skip") + + parser.add_option("-I", "--from-id", dest = "fromid", + default = None, + help = "wait for given id on input to begin") + + parser.add_option("-E", "--encoding", dest = "encoding", + default = None, + help = "input files encoding") + + (opts, args) = parser.parse_args() + + # check existence en read ability of config file + if not os.path.exists(opts.config): + print "Error: Configuration file %s does not exists" % opts.config + print parser.format_help() + sys.exit(1) + + if not os.access(opts.config, os.R_OK): + print "Error: Can't read configuration file %s" % opts.config + print parser.format_help() + sys.exit(1) + + if opts.verbose: + print 'Using %s configuration file' % opts.config + + if opts.fromcount != 0 and opts.fromid is not None: + print "Error: Can't set both options fromcount (-F) AND fromid (-I)" + sys.exit(1) + + pgloader.options.DRY_RUN = opts.dryrun + pgloader.options.DEBUG = opts.debug + # if debug, then verbose + pgloader.options.VERBOSE = opts.verbose or opts.debug + pgloader.options.PEDANTIC = opts.pedantic + + pgloader.options.TRUNCATE = opts.truncate + pgloader.options.VACUUM = opts.vacuum + + pgloader.options.COUNT = opts.count + pgloader.options.FROM_COUNT = opts.fromcount + pgloader.options.FROM_ID = opts.fromid + + pgloader.options.INPUT_ENCODING = opts.encoding + + return opts.config, args + +def parse_config(conffile): + """ Parse the configuration file """ + section = 'pgsql' + + # Now read pgsql configuration section + import ConfigParser + config = ConfigParser.ConfigParser() + + try: + config.read(conffile) + except: + print "Error: Given file is not a configuration file" + sys.exit(4) + + if not config.has_section(section): + print "Error: Please provide a [%s] section" % section + sys.exit(5) + + # load some options + # this has to be done after command line parsing + from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC + from pgloader.options import NULL, EMPTY_STRING + + if DRY_RUN: + if VERBOSE: + print "Notice: dry run mode, not connecting to database" + return config, None + + try: + from pgloader.db import db + + dbconn = db(config.get(section, 'host'), + config.getint(section, 'port'), + config.get(section, 'base'), + config.get(section, 'user'), + config.get(section, 'pass'), + connect = False) + + if config.has_option(section, 'client_encoding'): + dbconn.client_encoding = config.get(section, 'client_encoding') + + if config.has_option(section, 'copy_every'): + dbconn.copy_every = config.getint(section, 'copy_every') + + if config.has_option(section, 'commit_every'): + dbconn.commit_every = config.getint(section, 'commit_every') + + if config.has_option(section, 'copy_delimiter'): + dbconn.copy_sep = config.get(section, 'copy_delimiter') + + # Then there are null and empty_string optionnal parameters + # They canbe overriden in specific table configuration + if config.has_option(section, 'null'): + pgloader.options.NULL = pgloader.tools.parse_config_string( + config.get(section, 'null')) + + if config.has_option(section, 'empty_string'): + pgloader.options.EMPTY_STRING = pgloader.tools.parse_config_string( + config.get(section, 'empty_string')) + + # optionnal global newline_escapes + if config.has_option(section, 'newline_escapes'): + setting = pgloader.tools.parse_config_string( + config.get(section, 'newline_escapes')) + pgloader.options.NEWLINE_ESCAPES = setting + + except Exception, error: + print "Error: Could not initialize PostgreSQL connection:" + print error + sys.exit(6) + + return config, dbconn + +def myprint(l, line_prefix = " ", cols = 78): + """ pretty print list l elements """ + # some code for pretty print + tmp = line_prefix + for e in l: + if len(tmp) + len(e) > cols: + print tmp + tmp = line_prefix + + if tmp != line_prefix: tmp += " " + tmp += e + print tmp + +def duration_pprint(duration): + """ pretty print duration (human readable information) """ + if duration > 3600: + h = int(duration / 3600) + m = int((duration - 3600 * h) / 60) + s = duration - 3600 * h - 60 * m + 0.5 + return '%2dh%02dm%03.1f' % (h, m, s) + + elif duration > 60: + m = int(duration / 60) + s = duration - 60 * m + return ' %02dm%06.3f' % (m, s) + + else: + return '%10.3f' % duration + +def load_data(): + """ read option line and configuration file, then process data + import of given section, or all sections if no section is given on + command line """ + + # first parse command line options, and set pgloader.options values + # accordingly + conffile, args = parse_options() + + # now init db connection + config, dbconn = parse_config(conffile) + + # load some pgloader package modules + from pgloader.options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC, VACUUM + from pgloader.pgloader import PGLoader + from pgloader.tools import PGLoader_Error + + sections = [] + summary = {} + + # args are meant to be configuration sections + if len(args) > 0: + for s in args: + if config.has_section(s): + sections.append(s) + + else: + for s in config.sections(): + if s != 'pgsql': + sections.append(s) + + if VERBOSE: + print 'Will consider following sections:' + myprint(sections) + + # we count time passed from now on + begin = time.time() + + # we run through sorted section list + sections.sort() + for s in sections: + try: + pgloader = PGLoader(s, config, dbconn) + pgloader.run() + + summary[s] = (pgloader.name,) + pgloader.summary() + except PGLoader_Error, e: + if e == '': + print '[%s] Please correct previous errors' % s + else: + print + print 'Error: %s' % e + + if PEDANTIC: + pgloader.print_stats() + + except KeyboardInterrupt: + print "Aborting on user demand (Interrupt)" + + # total duration + td = time.time() - begin + + retcode = 0 + + # print a pretty summary + t= 'Table name | duration | size | updates | errors ' + _= '====================================================================' + + tu = te = ts = 0 # total updates, errors, size + if not DRY_RUN: + dbconn.reset() + cursor = dbconn.dbconn.cursor() + + s_ok = 0 + for s in sections: + if s not in summary: + continue + + s_ok += 1 + if s_ok == 1: + # print pretty sumary header now + print + print t + print _ + + t, d, u, e = summary[s] + d = duration_pprint(d) + + if not DRY_RUN: + sql = "select pg_total_relation_size(%s), " + \ + "pg_size_pretty(pg_total_relation_size(%s));" + cursor.execute(sql, [t, t]) + octets, s = cursor.fetchone() + ts += octets + + if s[5:] == 'bytes': s = s[:-5] + ' B' + else: + s = '-' + + print '%-18s| %ss | %7s | %10d | %10d' % (t, d, s, u, e) + + tu += u + te += e + + if e > 0: + retcode += 1 + + if s_ok > 1: + td = duration_pprint(td) + + # pretty size + cursor.execute("select pg_size_pretty(%s);", [ts]) + [ts] = cursor.fetchone() + if ts[5:] == 'bytes': ts = ts[:-5] + ' B' + + print _ + print 'Total | %ss | %7s | %10d | %10d' % (td, ts, tu, te) + + if not DRY_RUN: + cursor.close() + + print + if VACUUM and not DRY_RUN: + print 'vacuumdb... ' + try: + dbconn.vacuum() + except KeyboardInterrupt: + pass + + return retcode + +if __name__ == "__main__": + sys.exit(load_data()) +