Improve LOAD DBF command (support for http and zip).

This commit is contained in:
Dimitri Fontaine 2013-10-13 22:45:29 +02:00
parent 89aaabd179
commit e1d2bd1318
5 changed files with 94 additions and 50 deletions

View File

@ -117,10 +117,17 @@
(defun stream-file (filename (defun stream-file (filename
&key &key
dbname dbname
state-before
(table-name (pathname-name filename)) (table-name (pathname-name filename))
create-table create-table
truncate) truncate)
"Open the DB3 and stream its content to a PostgreSQL database." "Open the DB3 and stream its content to a PostgreSQL database."
(let* ((summary (null *state*))
(*state* (or *state* (make-pgstate))))
(with-stats-collection (dbname "create, truncate"
:state state-before
:summary summary)
(with-pgsql-transaction (dbname) (with-pgsql-transaction (dbname)
(when create-table (when create-table
(let ((create-table-sql (db3-create-table filename))) (let ((create-table-sql (db3-create-table filename)))
@ -132,14 +139,13 @@
;; we don't TRUNCATE a table we just CREATEd ;; we don't TRUNCATE a table we just CREATEd
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name))) (let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
(log-message :notice "~a" truncate-sql) (log-message :notice "~a" truncate-sql)
(pgsql-execute truncate-sql)))) (pgsql-execute truncate-sql)))))
(let* ((*state* (make-pgstate)) (let* ((lp:*kernel* (make-kernel 2))
(lp:*kernel* (make-kernel 2))
(channel (lp:make-channel)) (channel (lp:make-channel))
(dataq (lq:make-queue :fixed-capacity 4096))) (dataq (lq:make-queue :fixed-capacity 4096)))
(with-stats-collection (dbname table-name :state *state* :summary t) (with-stats-collection (dbname table-name :state *state* :summary summary)
(log-message :notice "COPY \"~a\" from '~a'" table-name filename) (log-message :notice "COPY \"~a\" from '~a'" table-name filename)
(lp:submit-task channel #'copy-to-queue filename dataq table-name) (lp:submit-task channel #'copy-to-queue filename dataq table-name)
@ -154,5 +160,4 @@
(loop for tasks below 2 do (lp:receive-result channel) (loop for tasks below 2 do (lp:receive-result channel)
finally finally
(log-message :info "COPY \"~a\" done." table-name) (log-message :info "COPY \"~a\" done." table-name)
(lp:end-kernel))))) (lp:end-kernel))))))

View File

@ -831,31 +831,53 @@ Here's a quick description of the format we're parsing here:
(declare (ignore w)) (declare (ignore w))
opts))) opts)))
(defrule dbf-source (and kw-load kw-dbf kw-from maybe-quoted-filename) (defrule dbf-source (and kw-load kw-dbf kw-from filename-or-http-uri)
(:lambda (src) (:lambda (src)
(destructuring-bind (load dbf from source) src (destructuring-bind (load dbf from source) src
(declare (ignore load dbf from)) (declare (ignore load dbf from))
;; source is (:filename #P"pathname/here") source)))
(destructuring-bind (type uri) source
(ecase type
(:filename uri))))))
(defrule load-dbf-file (and dbf-source target dbf-options) (defrule load-dbf-file (and dbf-source target dbf-options (? gucs))
(:lambda (command) (:lambda (command)
(destructuring-bind (source pg-db-uri options) command (destructuring-bind (source pg-db-uri options gucs) command
(destructuring-bind (&key host port user password dbname table-name (destructuring-bind (&key host port user password dbname table-name
&allow-other-keys) &allow-other-keys)
pg-db-uri pg-db-uri
`(lambda () `(lambda ()
(let* ((*pgconn-host* ,host) (let* ((state-before (pgloader.utils:make-pgstate))
(*state* (pgloader.utils:make-pgstate))
(source
,(destructuring-bind (kind url) source
(ecase kind
(:http `(with-stats-collection
(,dbname "download" :state state-before)
(pgloader.archive:http-fetch-file ,url)))
(:filename url))))
(source
(if (string= "zip" (pathname-type source))
(progn
(with-stats-collection (,dbname "extract"
:state state-before)
(let ((d (pgloader.archive:expand-archive source)))
(merge-pathnames
(make-pathname :name (pathname-name source)
:type "dbf")
d))))
source))
(*pgconn-host* ,host)
(*pgconn-port* ,port) (*pgconn-port* ,port)
(*pgconn-user* ,user) (*pgconn-user* ,user)
(*pgconn-pass* ,password)) (*pgconn-pass* ,password)
(pgloader.db3:stream-file ,source (*pg-settings* ',gucs))
(pgloader.db3:stream-file source
:state-before state-before
:dbname ,dbname :dbname ,dbname
,@(when table-name ,@(when table-name
(list :table-name table-name)) (list :table-name table-name))
,@options))))))) ,@options)
(report-full-summary *state* state-before nil
"Total import time")))))))
#| #|

View File

@ -266,20 +266,22 @@ This command instructs pgloader to load data from a `DBF` file. Here's an
example: example:
LOAD DBF LOAD DBF
FROM '/Users/dim/Downloads/comsimp2013.dbf' FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
INTO postgresql://dim@localhost:54393/dim?comsimp2013 INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table, table name = 'comsimp2013'; WITH truncate, create table;
The `csv` format command accepts the following clauses and options: The `dbf` format command accepts the following clauses and options:
- *FROM* - *FROM*
Filename where to load the data from. Filename where to load the data from. This support local files, HTTP
URLs and zip files containing a single dbf file of the same name. Fetch
such a zip file from an HTTP address is of course supported.
- *INTO* - *INTO*
The PostgreSQL connection URI must contains the possibly qualified name The PostgreSQL connection URI. If it doesn't have a table name in the
of the target table where to load the data into. target, then the name part of the filename will be used as a table name.
- *WITH* - *WITH*
@ -302,6 +304,15 @@ The `csv` format command accepts the following clauses and options:
This options expects as its value the possibly qualified name of the This options expects as its value the possibly qualified name of the
table to create. table to create.
- *SET*
This clause allows to specify session parameters to be set for all the
sessions opened by pgloader. It expects a list of parameter name, the
equal sign, then the single-quoted value as a comma separated list.
The names and values of the parameters are not validated by pgloader,
they are given as-is to PostgreSQL.
## LOAD ARCHIVE ## LOAD ARCHIVE
This command instructs pgloader to load data from one or more files contained This command instructs pgloader to load data from one or more files contained

5
test/dbf-zip.load Normal file
View File

@ -0,0 +1,5 @@
LOAD DBF
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip
INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table
SET client_encoding TO 'latin1';

View File

@ -1,4 +1,5 @@
LOAD DBF LOAD DBF
FROM '/Users/dim/Downloads/comsimp2013.dbf' FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
INTO postgresql://dim@localhost:54393/dim?comsimp2013 INTO postgresql://dim@localhost:54393/dim
WITH truncate, create table, table name = 'comsimp2013'; WITH truncate, create table
SET client_encoding TO 'latin1';