mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-08 15:27:00 +02:00
Improve LOAD DBF command (support for http and zip).
This commit is contained in:
parent
89aaabd179
commit
e1d2bd1318
63
db3.lisp
63
db3.lisp
@ -117,42 +117,47 @@
|
||||
(defun stream-file (filename
|
||||
&key
|
||||
dbname
|
||||
state-before
|
||||
(table-name (pathname-name filename))
|
||||
create-table
|
||||
truncate)
|
||||
"Open the DB3 and stream its content to a PostgreSQL database."
|
||||
(with-pgsql-transaction (dbname)
|
||||
(when create-table
|
||||
(let ((create-table-sql (db3-create-table filename)))
|
||||
(log-message :notice "Create table \"~a\"" table-name)
|
||||
(log-message :info "~a" create-table-sql)
|
||||
(pgsql-execute create-table-sql)))
|
||||
(let* ((summary (null *state*))
|
||||
(*state* (or *state* (make-pgstate))))
|
||||
|
||||
(when (and truncate (not create-table))
|
||||
;; we don't TRUNCATE a table we just CREATEd
|
||||
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
|
||||
(log-message :notice "~a" truncate-sql)
|
||||
(pgsql-execute truncate-sql))))
|
||||
(with-stats-collection (dbname "create, truncate"
|
||||
:state state-before
|
||||
:summary summary)
|
||||
(with-pgsql-transaction (dbname)
|
||||
(when create-table
|
||||
(let ((create-table-sql (db3-create-table filename)))
|
||||
(log-message :notice "Create table \"~a\"" table-name)
|
||||
(log-message :info "~a" create-table-sql)
|
||||
(pgsql-execute create-table-sql)))
|
||||
|
||||
(let* ((*state* (make-pgstate))
|
||||
(lp:*kernel* (make-kernel 2))
|
||||
(channel (lp:make-channel))
|
||||
(dataq (lq:make-queue :fixed-capacity 4096)))
|
||||
(when (and truncate (not create-table))
|
||||
;; we don't TRUNCATE a table we just CREATEd
|
||||
(let ((truncate-sql (format nil "TRUNCATE ~a;" table-name)))
|
||||
(log-message :notice "~a" truncate-sql)
|
||||
(pgsql-execute truncate-sql)))))
|
||||
|
||||
(with-stats-collection (dbname table-name :state *state* :summary t)
|
||||
(log-message :notice "COPY \"~a\" from '~a'" table-name filename)
|
||||
(lp:submit-task channel #'copy-to-queue filename dataq table-name)
|
||||
(let* ((lp:*kernel* (make-kernel 2))
|
||||
(channel (lp:make-channel))
|
||||
(dataq (lq:make-queue :fixed-capacity 4096)))
|
||||
|
||||
;; and start another task to push that data from the queue to PostgreSQL
|
||||
(lp:submit-task channel
|
||||
#'pgloader.pgsql:copy-from-queue
|
||||
dbname table-name dataq
|
||||
:truncate truncate
|
||||
:transforms (transforms filename))
|
||||
(with-stats-collection (dbname table-name :state *state* :summary summary)
|
||||
(log-message :notice "COPY \"~a\" from '~a'" table-name filename)
|
||||
(lp:submit-task channel #'copy-to-queue filename dataq table-name)
|
||||
|
||||
;; now wait until both the tasks are over, and kill the kernel
|
||||
(loop for tasks below 2 do (lp:receive-result channel)
|
||||
finally
|
||||
(log-message :info "COPY \"~a\" done." table-name)
|
||||
(lp:end-kernel)))))
|
||||
;; and start another task to push that data from the queue to PostgreSQL
|
||||
(lp:submit-task channel
|
||||
#'pgloader.pgsql:copy-from-queue
|
||||
dbname table-name dataq
|
||||
:truncate truncate
|
||||
:transforms (transforms filename))
|
||||
|
||||
;; now wait until both the tasks are over, and kill the kernel
|
||||
(loop for tasks below 2 do (lp:receive-result channel)
|
||||
finally
|
||||
(log-message :info "COPY \"~a\" done." table-name)
|
||||
(lp:end-kernel))))))
|
||||
|
44
parser.lisp
44
parser.lisp
@ -831,31 +831,53 @@ Here's a quick description of the format we're parsing here:
|
||||
(declare (ignore w))
|
||||
opts)))
|
||||
|
||||
(defrule dbf-source (and kw-load kw-dbf kw-from maybe-quoted-filename)
|
||||
(defrule dbf-source (and kw-load kw-dbf kw-from filename-or-http-uri)
|
||||
(:lambda (src)
|
||||
(destructuring-bind (load dbf from source) src
|
||||
(declare (ignore load dbf from))
|
||||
;; source is (:filename #P"pathname/here")
|
||||
(destructuring-bind (type uri) source
|
||||
(ecase type
|
||||
(:filename uri))))))
|
||||
source)))
|
||||
|
||||
(defrule load-dbf-file (and dbf-source target dbf-options)
|
||||
(defrule load-dbf-file (and dbf-source target dbf-options (? gucs))
|
||||
(:lambda (command)
|
||||
(destructuring-bind (source pg-db-uri options) command
|
||||
(destructuring-bind (source pg-db-uri options gucs) command
|
||||
(destructuring-bind (&key host port user password dbname table-name
|
||||
&allow-other-keys)
|
||||
pg-db-uri
|
||||
`(lambda ()
|
||||
(let* ((*pgconn-host* ,host)
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(*state* (pgloader.utils:make-pgstate))
|
||||
(source
|
||||
,(destructuring-bind (kind url) source
|
||||
(ecase kind
|
||||
(:http `(with-stats-collection
|
||||
(,dbname "download" :state state-before)
|
||||
(pgloader.archive:http-fetch-file ,url)))
|
||||
(:filename url))))
|
||||
(source
|
||||
(if (string= "zip" (pathname-type source))
|
||||
(progn
|
||||
(with-stats-collection (,dbname "extract"
|
||||
:state state-before)
|
||||
(let ((d (pgloader.archive:expand-archive source)))
|
||||
(merge-pathnames
|
||||
(make-pathname :name (pathname-name source)
|
||||
:type "dbf")
|
||||
d))))
|
||||
source))
|
||||
(*pgconn-host* ,host)
|
||||
(*pgconn-port* ,port)
|
||||
(*pgconn-user* ,user)
|
||||
(*pgconn-pass* ,password))
|
||||
(pgloader.db3:stream-file ,source
|
||||
(*pgconn-pass* ,password)
|
||||
(*pg-settings* ',gucs))
|
||||
(pgloader.db3:stream-file source
|
||||
:state-before state-before
|
||||
:dbname ,dbname
|
||||
,@(when table-name
|
||||
(list :table-name table-name))
|
||||
,@options)))))))
|
||||
,@options)
|
||||
|
||||
(report-full-summary *state* state-before nil
|
||||
"Total import time")))))))
|
||||
|
||||
|
||||
#|
|
||||
|
@ -266,20 +266,22 @@ This command instructs pgloader to load data from a `DBF` file. Here's an
|
||||
example:
|
||||
|
||||
LOAD DBF
|
||||
FROM '/Users/dim/Downloads/comsimp2013.dbf'
|
||||
INTO postgresql://dim@localhost:54393/dim?comsimp2013
|
||||
WITH truncate, create table, table name = 'comsimp2013';
|
||||
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
|
||||
INTO postgresql://dim@localhost:54393/dim
|
||||
WITH truncate, create table;
|
||||
|
||||
The `csv` format command accepts the following clauses and options:
|
||||
The `dbf` format command accepts the following clauses and options:
|
||||
|
||||
- *FROM*
|
||||
|
||||
Filename where to load the data from.
|
||||
Filename where to load the data from. This support local files, HTTP
|
||||
URLs and zip files containing a single dbf file of the same name. Fetch
|
||||
such a zip file from an HTTP address is of course supported.
|
||||
|
||||
- *INTO*
|
||||
|
||||
The PostgreSQL connection URI must contains the possibly qualified name
|
||||
of the target table where to load the data into.
|
||||
The PostgreSQL connection URI. If it doesn't have a table name in the
|
||||
target, then the name part of the filename will be used as a table name.
|
||||
|
||||
- *WITH*
|
||||
|
||||
@ -302,6 +304,15 @@ The `csv` format command accepts the following clauses and options:
|
||||
This options expects as its value the possibly qualified name of the
|
||||
table to create.
|
||||
|
||||
- *SET*
|
||||
|
||||
This clause allows to specify session parameters to be set for all the
|
||||
sessions opened by pgloader. It expects a list of parameter name, the
|
||||
equal sign, then the single-quoted value as a comma separated list.
|
||||
|
||||
The names and values of the parameters are not validated by pgloader,
|
||||
they are given as-is to PostgreSQL.
|
||||
|
||||
## LOAD ARCHIVE
|
||||
|
||||
This command instructs pgloader to load data from one or more files contained
|
||||
|
5
test/dbf-zip.load
Normal file
5
test/dbf-zip.load
Normal file
@ -0,0 +1,5 @@
|
||||
LOAD DBF
|
||||
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip
|
||||
INTO postgresql://dim@localhost:54393/dim
|
||||
WITH truncate, create table
|
||||
SET client_encoding TO 'latin1';
|
@ -1,4 +1,5 @@
|
||||
LOAD DBF
|
||||
FROM '/Users/dim/Downloads/comsimp2013.dbf'
|
||||
INTO postgresql://dim@localhost:54393/dim?comsimp2013
|
||||
WITH truncate, create table, table name = 'comsimp2013';
|
||||
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
|
||||
INTO postgresql://dim@localhost:54393/dim
|
||||
WITH truncate, create table
|
||||
SET client_encoding TO 'latin1';
|
||||
|
Loading…
Reference in New Issue
Block a user