Improve LOAD DBF command (support for http and zip).

This commit is contained in:
Dimitri Fontaine 2013-10-13 22:45:29 +02:00
parent 89aaabd179
commit e1d2bd1318
5 changed files with 94 additions and 50 deletions

View File

@ -117,42 +117,47 @@
(defun stream-file (filename
&key
dbname
state-before
(table-name (pathname-name filename))
create-table
truncate)
"Open the DB3 and stream its content to a PostgreSQL database."
(with-pgsql-transaction (dbname)
(when create-table
(let ((create-table-sql (db3-create-table filename)))
(log-message :notice "Create table \"~a\"" table-name)
(log-message :info "~a" create-table-sql)
(pgsql-execute create-table-sql)))
(let* ((summary (null *state*))
(*state* (or *state* (make-pgstate))))
(when (and truncate (not create-table))
;; we don't TRUNCATE a table we just CREATEd
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
(log-message :notice "~a" truncate-sql)
(pgsql-execute truncate-sql))))
(with-stats-collection (dbname "create, truncate"
:state state-before
:summary summary)
(with-pgsql-transaction (dbname)
(when create-table
(let ((create-table-sql (db3-create-table filename)))
(log-message :notice "Create table \"~a\"" table-name)
(log-message :info "~a" create-table-sql)
(pgsql-execute create-table-sql)))
(let* ((*state* (make-pgstate))
(lp:*kernel* (make-kernel 2))
(channel (lp:make-channel))
(dataq (lq:make-queue :fixed-capacity 4096)))
(when (and truncate (not create-table))
;; we don't TRUNCATE a table we just CREATEd
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
(log-message :notice "~a" truncate-sql)
(pgsql-execute truncate-sql)))))
(with-stats-collection (dbname table-name :state *state* :summary t)
(log-message :notice "COPY \"~a\" from '~a'" table-name filename)
(lp:submit-task channel #'copy-to-queue filename dataq table-name)
(let* ((lp:*kernel* (make-kernel 2))
(channel (lp:make-channel))
(dataq (lq:make-queue :fixed-capacity 4096)))
;; and start another task to push that data from the queue to PostgreSQL
(lp:submit-task channel
#'pgloader.pgsql:copy-from-queue
dbname table-name dataq
:truncate truncate
:transforms (transforms filename))
(with-stats-collection (dbname table-name :state *state* :summary summary)
(log-message :notice "COPY \"~a\" from '~a'" table-name filename)
(lp:submit-task channel #'copy-to-queue filename dataq table-name)
;; now wait until both the tasks are over, and kill the kernel
(loop for tasks below 2 do (lp:receive-result channel)
finally
(log-message :info "COPY \"~a\" done." table-name)
(lp:end-kernel)))))
;; and start another task to push that data from the queue to PostgreSQL
(lp:submit-task channel
#'pgloader.pgsql:copy-from-queue
dbname table-name dataq
:truncate truncate
:transforms (transforms filename))
;; now wait until both the tasks are over, and kill the kernel
(loop for tasks below 2 do (lp:receive-result channel)
finally
(log-message :info "COPY \"~a\" done." table-name)
(lp:end-kernel))))))

View File

@ -831,31 +831,53 @@ Here's a quick description of the format we're parsing here:
(declare (ignore w))
opts)))
(defrule dbf-source (and kw-load kw-dbf kw-from maybe-quoted-filename)
(defrule dbf-source (and kw-load kw-dbf kw-from filename-or-http-uri)
(:lambda (src)
(destructuring-bind (load dbf from source) src
(declare (ignore load dbf from))
;; source is (:filename #P"pathname/here")
(destructuring-bind (type uri) source
(ecase type
(:filename uri))))))
source)))
(defrule load-dbf-file (and dbf-source target dbf-options)
(defrule load-dbf-file (and dbf-source target dbf-options (? gucs))
(:lambda (command)
(destructuring-bind (source pg-db-uri options) command
(destructuring-bind (source pg-db-uri options gucs) command
(destructuring-bind (&key host port user password dbname table-name
&allow-other-keys)
pg-db-uri
`(lambda ()
(let* ((*pgconn-host* ,host)
(let* ((state-before (pgloader.utils:make-pgstate))
(*state* (pgloader.utils:make-pgstate))
(source
,(destructuring-bind (kind url) source
(ecase kind
(:http `(with-stats-collection
(,dbname "download" :state state-before)
(pgloader.archive:http-fetch-file ,url)))
(:filename url))))
(source
(if (string= "zip" (pathname-type source))
(progn
(with-stats-collection (,dbname "extract"
:state state-before)
(let ((d (pgloader.archive:expand-archive source)))
(merge-pathnames
(make-pathname :name (pathname-name source)
:type "dbf")
d))))
source))
(*pgconn-host* ,host)
(*pgconn-port* ,port)
(*pgconn-user* ,user)
(*pgconn-pass* ,password))
(pgloader.db3:stream-file ,source
(*pgconn-pass* ,password)
(*pg-settings* ',gucs))
(pgloader.db3:stream-file source
:state-before state-before
:dbname ,dbname
,@(when table-name
(list :table-name table-name))
,@options)))))))
,@options)
(report-full-summary *state* state-before nil
"Total import time")))))))
#|

View File

@ -266,20 +266,22 @@ This command instructs pgloader to load data from a `DBF` file. Here's an
example:
LOAD DBF
FROM '/Users/dim/Downloads/comsimp2013.dbf'
INTO postgresql://dim@localhost:54393/dim?comsimp2013
WITH truncate, create table, table name = 'comsimp2013';
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table;
The `csv` format command accepts the following clauses and options:
The `dbf` format command accepts the following clauses and options:
- *FROM*
Filename where to load the data from.
Filename where to load the data from. This support local files, HTTP
URLs and zip files containing a single dbf file of the same name. Fetch
such a zip file from an HTTP address is of course supported.
- *INTO*
The PostgreSQL connection URI must contains the possibly qualified name
of the target table where to load the data into.
The PostgreSQL connection URI. If it doesn't have a table name in the
target, then the name part of the filename will be used as a table name.
- *WITH*
@ -302,6 +304,15 @@ The `csv` format command accepts the following clauses and options:
This options expects as its value the possibly qualified name of the
table to create.
- *SET*
This clause allows to specify session parameters to be set for all the
sessions opened by pgloader. It expects a list of parameter name, the
equal sign, then the single-quoted value as a comma separated list.
The names and values of the parameters are not validated by pgloader,
they are given as-is to PostgreSQL.
## LOAD ARCHIVE
This command instructs pgloader to load data from one or more files contained

5
test/dbf-zip.load Normal file
View File

@ -0,0 +1,5 @@
LOAD DBF
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip
INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table
SET client_encoding TO 'latin1';

View File

@ -1,4 +1,5 @@
LOAD DBF
FROM '/Users/dim/Downloads/comsimp2013.dbf'
INTO postgresql://dim@localhost:54393/dim?comsimp2013
WITH truncate, create table, table name = 'comsimp2013';
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table
SET client_encoding TO 'latin1';