diff --git a/pgloader.1.md b/pgloader.1.md index b6e1977..fe261aa 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -183,6 +183,44 @@ For documentation about the available syntaxes for the `--field` and Note also that the PostgreSQL URI includes the target *tablename*. +### Loading from CSV available through HTTP + +The same command as just above can also be run if the CSV file happens to be +found on a remote HTTP location: + + .pgloader --type csv \ + --field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \ + --with "skip header = 1" \ + --with "fields terminated by '\t'" \ + http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt \ + postgresql:///pgloader?districts_longlat + +Some more options have to be used in that case, as the file contains a +one-line header (most commonly that's column names, could be a copyright +notice). Also, in that case, we specify all the fields right into a single +`--field` option argument. + +Again, the PostgreSQL target connection string must contain the *tablename* +option and you have to ensure that the target table exists and may fit the +data. Here's the SQL command used in that example in case you want to try it +yourself: + + create table districts_longlat + ( + usps text, + geoid text, + aland bigint, + awater bigint, + aland_sqmi double precision, + awater_sqmi double precision, + intptlat double precision, + intptlong double precision + ); + +Also notice that the same command will work against an archived version of +the same data, e.g. +[http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz](). + ### Migrating from SQLite The following command will open the SQLite database, discover its tables diff --git a/src/main.lisp b/src/main.lisp index df4f349..546b8ae 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -329,6 +329,10 @@ ;; process the files (mapcar #'process-command-file arguments))) + (source-definition-error (c) + (log-message :fatal "~a" c) + (uiop:quit 2)) + (condition (c) (when debug (invoke-debugger c)) (uiop:quit 1)))))) @@ -348,18 +352,32 @@ ;;; ;;; Main API to use from outside of pgloader. ;;; +(define-condition source-definition-error (error) + ((mesg :initarg :mesg :reader source-definition-error-mesg)) + (:report (lambda (err stream) + (format stream "~a" (source-definition-error-mesg err))))) + (defun load-data (&key ((:from source)) ((:into target)) encoding fields options gucs casts before after (start-logger t)) "Load data from SOURCE into TARGET." (declare (type connection source) (type pgsql-connection target)) - (with-monitor (:start-logger start-logger) - ;; some preliminary checks - (when (and (typep source 'csv-connection) (null fields)) - (log-message :fatal "This source type requires --fields arguments.") - (return-from load-data)) + ;; some preliminary checks + (when (and (typep source 'csv-connection) (null fields)) + (error 'source-definition-error + :mesg "CSV source type requires fields definitions.")) + + (when (and (typep source 'csv-connection) (null (pgconn-table-name target))) + (error 'source-definition-error + :mesg "CSV data source require a table name target.")) + + (when (and (typep source 'fixed-connection) (null (pgconn-table-name target))) + (error 'source-definition-error + :mesg "Fixed-width data source require a table name target.")) + + (with-monitor (:start-logger start-logger) (when (and casts (not (member (type-of source) '(sqlite-connection mysql-connection diff --git a/src/package.lisp b/src/package.lisp index 4da083b..2880d28 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -483,8 +483,10 @@ ;; Main package ;; (defpackage #:pgloader - (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.parser) + (:use #:cl + #:pgloader.params #:pgloader.utils #:pgloader.parser) (:import-from #:pgloader.pgsql + #:pgconn-table-name #:pgsql-connection #:copy-from-file #:list-databases diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index a034663..161bee3 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -421,12 +421,14 @@ gucs before after ((:csv-options options))) `(lambda () - (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) + (let* ((state-before (pgloader.utils:make-pgstate)) (summary (null *state*)) (*state* (or *state* (pgloader.utils:make-pgstate))) (state-after ,(when after `(pgloader.utils:make-pgstate))) ,@(pgsql-connection-bindings pg-db-conn gucs) - ,@(batch-control-bindings options)) + ,@(batch-control-bindings options) + (source-db (with-stats-collection ("fetch" :state state-before) + (expand (fetch-file ,csv-conn))))) (progn ,(sql-code-block pg-db-conn 'state-before before "before load") @@ -435,7 +437,7 @@ (source (make-instance 'pgloader.csv:copy-csv :target-db ,pg-db-conn - :source ,(expand (fetch-file csv-conn)) + :source source-db :target ,(pgconn-table-name pg-db-conn) :encoding ,encoding :fields ',fields diff --git a/src/parsers/command-fixed.lisp b/src/parsers/command-fixed.lisp index 71f7e56..6b73c1e 100644 --- a/src/parsers/command-fixed.lisp +++ b/src/parsers/command-fixed.lisp @@ -121,12 +121,14 @@ gucs before after ((:fixed-options options))) `(lambda () - (let* ((state-before ,(when before `(pgloader.utils:make-pgstate))) + (let* ((state-before (pgloader.utils:make-pgstate)) (summary (null *state*)) (*state* (or *state* (pgloader.utils:make-pgstate))) (state-after ,(when after `(pgloader.utils:make-pgstate))) ,@(pgsql-connection-bindings pg-db-conn gucs) - ,@(batch-control-bindings options)) + ,@(batch-control-bindings options) + (source-db (with-stats-collection ("fetch" :state state-before) + (expand (fetch-file ,fixed-conn))))) (progn ,(sql-code-block pg-db-conn 'state-before before "before load") @@ -135,7 +137,7 @@ (source (make-instance 'pgloader.fixed:copy-fixed :target-db ,pg-db-conn - :source ,(expand (fetch-file fixed-conn)) + :source source-db :target ,(pgconn-table-name pg-db-conn) :encoding ,encoding :fields ',fields diff --git a/src/parsers/command-parser.lisp b/src/parsers/command-parser.lisp index 54ec7ce..d507521 100644 --- a/src/parsers/command-parser.lisp +++ b/src/parsers/command-parser.lisp @@ -273,10 +273,10 @@ (defun parse-cli-fields (type fields) "Parse the --fields option." (loop :for field :in fields - :collect (parse (case type - (:csv 'csv-source-field) - (:fixed 'fixed-source-field)) - field))) + :append (parse (case type + (:csv 'csv-source-fields) + (:fixed 'fixed-source-fields)) + field))) (defun parse-cli-options (type options) "Parse options as per the WITH clause when we get them from the CLI." diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index cb66dad..5da7a19 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -100,10 +100,12 @@ load database (*cast-rules* ',casts) ,@(pgsql-connection-bindings pg-db-conn gucs) ,@(batch-control-bindings options) + (source-db (with-stats-collection ("fetch" :state state-before) + (expand (fetch-file ,sqlite-db-conn)))) (source (make-instance 'pgloader.sqlite::copy-sqlite :target-db ,pg-db-conn - :source-db ,(expand (fetch-file sqlite-db-conn))))) + :source-db source-db))) (pgloader.sqlite:copy-database source :state-before state-before :including ',incl