Improve HTTP handling of CSV and Fixed data sources.

In passing also allow --field to specify the whole field list, there's
no point in forcing the user to have as many --field switches on the
command line as they have columns in their data source file.
This commit is contained in:
Dimitri Fontaine 2014-12-27 17:02:19 +01:00
parent 25c39b05e2
commit 40f3c4f769
7 changed files with 81 additions and 17 deletions

View File

@ -183,6 +183,44 @@ For documentation about the available syntaxes for the `--field` and
Note also that the PostgreSQL URI includes the target *tablename*.
### Loading from CSV available through HTTP
The same command as just above can also be run if the CSV file happens to be
found on a remote HTTP location:
.pgloader --type csv \
--field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \
--with "skip header = 1" \
--with "fields terminated by '\t'" \
http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt \
postgresql:///pgloader?districts_longlat
Some more options have to be used in that case, as the file contains a
one-line header (most commonly that's column names, could be a copyright
notice). Also, in that case, we specify all the fields right into a single
`--field` option argument.
Again, the PostgreSQL target connection string must contain the *tablename*
option and you have to ensure that the target table exists and may fit the
data. Here's the SQL command used in that example in case you want to try it
yourself:
create table districts_longlat
(
usps text,
geoid text,
aland bigint,
awater bigint,
aland_sqmi double precision,
awater_sqmi double precision,
intptlat double precision,
intptlong double precision
);
Also notice that the same command will work against an archived version of
the same data, e.g.
[http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz]().
### Migrating from SQLite
The following command will open the SQLite database, discover its tables

View File

@ -329,6 +329,10 @@
;; process the files
(mapcar #'process-command-file arguments)))
(source-definition-error (c)
(log-message :fatal "~a" c)
(uiop:quit 2))
(condition (c)
(when debug (invoke-debugger c))
(uiop:quit 1))))))
@ -348,18 +352,32 @@
;;;
;;; Main API to use from outside of pgloader.
;;;
(define-condition source-definition-error (error)
((mesg :initarg :mesg :reader source-definition-error-mesg))
(:report (lambda (err stream)
(format stream "~a" (source-definition-error-mesg err)))))
(defun load-data (&key ((:from source)) ((:into target))
encoding fields options gucs casts before after
(start-logger t))
"Load data from SOURCE into TARGET."
(declare (type connection source)
(type pgsql-connection target))
(with-monitor (:start-logger start-logger)
;; some preliminary checks
(when (and (typep source 'csv-connection) (null fields))
(log-message :fatal "This source type requires --fields arguments.")
(return-from load-data))
;; some preliminary checks
(when (and (typep source 'csv-connection) (null fields))
(error 'source-definition-error
:mesg "CSV source type requires fields definitions."))
(when (and (typep source 'csv-connection) (null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "CSV data source require a table name target."))
(when (and (typep source 'fixed-connection) (null (pgconn-table-name target)))
(error 'source-definition-error
:mesg "Fixed-width data source require a table name target."))
(with-monitor (:start-logger start-logger)
(when (and casts (not (member (type-of source)
'(sqlite-connection
mysql-connection

View File

@ -483,8 +483,10 @@
;; Main package
;;
(defpackage #:pgloader
(:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.parser)
(:use #:cl
#:pgloader.params #:pgloader.utils #:pgloader.parser)
(:import-from #:pgloader.pgsql
#:pgconn-table-name
#:pgsql-connection
#:copy-from-file
#:list-databases

View File

@ -421,12 +421,14 @@
gucs before after
((:csv-options options)))
`(lambda ()
(let* ((state-before ,(when before `(pgloader.utils:make-pgstate)))
(let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options))
,@(batch-control-bindings options)
(source-db (with-stats-collection ("fetch" :state state-before)
(expand (fetch-file ,csv-conn)))))
(progn
,(sql-code-block pg-db-conn 'state-before before "before load")
@ -435,7 +437,7 @@
(source
(make-instance 'pgloader.csv:copy-csv
:target-db ,pg-db-conn
:source ,(expand (fetch-file csv-conn))
:source source-db
:target ,(pgconn-table-name pg-db-conn)
:encoding ,encoding
:fields ',fields

View File

@ -121,12 +121,14 @@
gucs before after
((:fixed-options options)))
`(lambda ()
(let* ((state-before ,(when before `(pgloader.utils:make-pgstate)))
(let* ((state-before (pgloader.utils:make-pgstate))
(summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
(state-after ,(when after `(pgloader.utils:make-pgstate)))
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options))
,@(batch-control-bindings options)
(source-db (with-stats-collection ("fetch" :state state-before)
(expand (fetch-file ,fixed-conn)))))
(progn
,(sql-code-block pg-db-conn 'state-before before "before load")
@ -135,7 +137,7 @@
(source
(make-instance 'pgloader.fixed:copy-fixed
:target-db ,pg-db-conn
:source ,(expand (fetch-file fixed-conn))
:source source-db
:target ,(pgconn-table-name pg-db-conn)
:encoding ,encoding
:fields ',fields

View File

@ -273,10 +273,10 @@
(defun parse-cli-fields (type fields)
"Parse the --fields option."
(loop :for field :in fields
:collect (parse (case type
(:csv 'csv-source-field)
(:fixed 'fixed-source-field))
field)))
:append (parse (case type
(:csv 'csv-source-fields)
(:fixed 'fixed-source-fields))
field)))
(defun parse-cli-options (type options)
"Parse options as per the WITH clause when we get them from the CLI."

View File

@ -100,10 +100,12 @@ load database
(*cast-rules* ',casts)
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options)
(source-db (with-stats-collection ("fetch" :state state-before)
(expand (fetch-file ,sqlite-db-conn))))
(source
(make-instance 'pgloader.sqlite::copy-sqlite
:target-db ,pg-db-conn
:source-db ,(expand (fetch-file sqlite-db-conn)))))
:source-db source-db)))
(pgloader.sqlite:copy-database source
:state-before state-before
:including ',incl