mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 10:31:02 +02:00
Improve HTTP handling of CSV and Fixed data sources.
In passing also allow --field to specify the whole field list, there's no point in forcing the user to have as many --field switches on the command line as they have columns in their data source file.
This commit is contained in:
parent
25c39b05e2
commit
40f3c4f769
@ -183,6 +183,44 @@ For documentation about the available syntaxes for the `--field` and
|
||||
|
||||
Note also that the PostgreSQL URI includes the target *tablename*.
|
||||
|
||||
### Loading from CSV available through HTTP
|
||||
|
||||
The same command as just above can also be run if the CSV file happens to be
|
||||
found on a remote HTTP location:
|
||||
|
||||
.pgloader --type csv \
|
||||
--field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \
|
||||
--with "skip header = 1" \
|
||||
--with "fields terminated by '\t'" \
|
||||
http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt \
|
||||
postgresql:///pgloader?districts_longlat
|
||||
|
||||
Some more options have to be used in that case, as the file contains a
|
||||
one-line header (most commonly that's column names, could be a copyright
|
||||
notice). Also, in that case, we specify all the fields right into a single
|
||||
`--field` option argument.
|
||||
|
||||
Again, the PostgreSQL target connection string must contain the *tablename*
|
||||
option and you have to ensure that the target table exists and may fit the
|
||||
data. Here's the SQL command used in that example in case you want to try it
|
||||
yourself:
|
||||
|
||||
create table districts_longlat
|
||||
(
|
||||
usps text,
|
||||
geoid text,
|
||||
aland bigint,
|
||||
awater bigint,
|
||||
aland_sqmi double precision,
|
||||
awater_sqmi double precision,
|
||||
intptlat double precision,
|
||||
intptlong double precision
|
||||
);
|
||||
|
||||
Also notice that the same command will work against an archived version of
|
||||
the same data, e.g.
|
||||
[http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz]().
|
||||
|
||||
### Migrating from SQLite
|
||||
|
||||
The following command will open the SQLite database, discover its tables
|
||||
|
||||
@ -329,6 +329,10 @@
|
||||
;; process the files
|
||||
(mapcar #'process-command-file arguments)))
|
||||
|
||||
(source-definition-error (c)
|
||||
(log-message :fatal "~a" c)
|
||||
(uiop:quit 2))
|
||||
|
||||
(condition (c)
|
||||
(when debug (invoke-debugger c))
|
||||
(uiop:quit 1))))))
|
||||
@ -348,18 +352,32 @@
|
||||
;;;
|
||||
;;; Main API to use from outside of pgloader.
|
||||
;;;
|
||||
(define-condition source-definition-error (error)
|
||||
((mesg :initarg :mesg :reader source-definition-error-mesg))
|
||||
(:report (lambda (err stream)
|
||||
(format stream "~a" (source-definition-error-mesg err)))))
|
||||
|
||||
(defun load-data (&key ((:from source)) ((:into target))
|
||||
encoding fields options gucs casts before after
|
||||
(start-logger t))
|
||||
"Load data from SOURCE into TARGET."
|
||||
(declare (type connection source)
|
||||
(type pgsql-connection target))
|
||||
(with-monitor (:start-logger start-logger)
|
||||
;; some preliminary checks
|
||||
(when (and (typep source 'csv-connection) (null fields))
|
||||
(log-message :fatal "This source type requires --fields arguments.")
|
||||
(return-from load-data))
|
||||
|
||||
;; some preliminary checks
|
||||
(when (and (typep source 'csv-connection) (null fields))
|
||||
(error 'source-definition-error
|
||||
:mesg "CSV source type requires fields definitions."))
|
||||
|
||||
(when (and (typep source 'csv-connection) (null (pgconn-table-name target)))
|
||||
(error 'source-definition-error
|
||||
:mesg "CSV data source require a table name target."))
|
||||
|
||||
(when (and (typep source 'fixed-connection) (null (pgconn-table-name target)))
|
||||
(error 'source-definition-error
|
||||
:mesg "Fixed-width data source require a table name target."))
|
||||
|
||||
(with-monitor (:start-logger start-logger)
|
||||
(when (and casts (not (member (type-of source)
|
||||
'(sqlite-connection
|
||||
mysql-connection
|
||||
|
||||
@ -483,8 +483,10 @@
|
||||
;; Main package
|
||||
;;
|
||||
(defpackage #:pgloader
|
||||
(:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.parser)
|
||||
(:use #:cl
|
||||
#:pgloader.params #:pgloader.utils #:pgloader.parser)
|
||||
(:import-from #:pgloader.pgsql
|
||||
#:pgconn-table-name
|
||||
#:pgsql-connection
|
||||
#:copy-from-file
|
||||
#:list-databases
|
||||
|
||||
@ -421,12 +421,14 @@
|
||||
gucs before after
|
||||
((:csv-options options)))
|
||||
`(lambda ()
|
||||
(let* ((state-before ,(when before `(pgloader.utils:make-pgstate)))
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when after `(pgloader.utils:make-pgstate)))
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options))
|
||||
,@(batch-control-bindings options)
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,csv-conn)))))
|
||||
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
@ -435,7 +437,7 @@
|
||||
(source
|
||||
(make-instance 'pgloader.csv:copy-csv
|
||||
:target-db ,pg-db-conn
|
||||
:source ,(expand (fetch-file csv-conn))
|
||||
:source source-db
|
||||
:target ,(pgconn-table-name pg-db-conn)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
|
||||
@ -121,12 +121,14 @@
|
||||
gucs before after
|
||||
((:fixed-options options)))
|
||||
`(lambda ()
|
||||
(let* ((state-before ,(when before `(pgloader.utils:make-pgstate)))
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when after `(pgloader.utils:make-pgstate)))
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options))
|
||||
,@(batch-control-bindings options)
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,fixed-conn)))))
|
||||
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
@ -135,7 +137,7 @@
|
||||
(source
|
||||
(make-instance 'pgloader.fixed:copy-fixed
|
||||
:target-db ,pg-db-conn
|
||||
:source ,(expand (fetch-file fixed-conn))
|
||||
:source source-db
|
||||
:target ,(pgconn-table-name pg-db-conn)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
|
||||
@ -273,10 +273,10 @@
|
||||
(defun parse-cli-fields (type fields)
|
||||
"Parse the --fields option."
|
||||
(loop :for field :in fields
|
||||
:collect (parse (case type
|
||||
(:csv 'csv-source-field)
|
||||
(:fixed 'fixed-source-field))
|
||||
field)))
|
||||
:append (parse (case type
|
||||
(:csv 'csv-source-fields)
|
||||
(:fixed 'fixed-source-fields))
|
||||
field)))
|
||||
|
||||
(defun parse-cli-options (type options)
|
||||
"Parse options as per the WITH clause when we get them from the CLI."
|
||||
|
||||
@ -100,10 +100,12 @@ load database
|
||||
(*cast-rules* ',casts)
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options)
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,sqlite-db-conn))))
|
||||
(source
|
||||
(make-instance 'pgloader.sqlite::copy-sqlite
|
||||
:target-db ,pg-db-conn
|
||||
:source-db ,(expand (fetch-file sqlite-db-conn)))))
|
||||
:source-db source-db)))
|
||||
(pgloader.sqlite:copy-database source
|
||||
:state-before state-before
|
||||
:including ',incl
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user