diff --git a/package.lisp b/package.lisp index 2f8070b..1fa0639 100644 --- a/package.lisp +++ b/package.lisp @@ -65,8 +65,8 @@ (:import-from #:pgloader.pgsql #:with-pgsql-transaction #:pgsql-execute) - (:export #:parse-command - #:run-command + (:export #:parse-commands + #:run-commands #:with-database-uri)) @@ -140,10 +140,11 @@ #:list-databases #:list-tables) (:import-from #:pgloader.parser - #:run-command - #:parse-command + #:run-commands + #:parse-commands #:with-database-uri) - (:export #:*state* + (:export #:*version-string* + #:*state* #:*csv-path-root* #:*reject-path-root* #:*pgconn-host* @@ -155,8 +156,8 @@ #:*myconn-port* #:*myconn-user* #:*myconn-pass* - #:run-command - #:parse-command + #:run-commands + #:parse-commands #:with-database-uri #:slurp-file-into-string #:copy-from-file diff --git a/params.lisp b/params.lisp index b712497..61d9bce 100644 --- a/params.lisp +++ b/params.lisp @@ -5,7 +5,8 @@ (defpackage #:pgloader.params (:use #:cl) - (:export #:*csv-path-root* + (:export #:*version-string* + #:*csv-path-root* #:*reject-path-root* #:*log-filename* #:*client-min-messages* @@ -25,6 +26,9 @@ (in-package :pgloader.params) +(defparameter *version-string* "3.0.50.1" + "pgloader version strings, following Emacs versionning model.") + ;; we can't use pgloader.utils:make-pgstate yet because params is compiled ;; first in the asd definition, we just make the symbol a special variable. (defparameter *state* nil diff --git a/parser.lisp b/parser.lisp index b65a6a6..fd8a6bb 100644 --- a/parser.lisp +++ b/parser.lisp @@ -788,7 +788,13 @@ Here's a quick description of the format we're parsing here: (defrule option-create-table (and kw-create kw-table) (:constant (cons :create-table t))) -(defrule option-table-name (and kw-table kw-name equal-sign qualified-table-name) +(defrule quoted-table-name (and #\' (or qualified-table-name namestring) #\') + (:lambda (qtn) + (destructuring-bind (open name close) qtn + (declare (ignore open close)) + name))) + +(defrule option-table-name (and kw-table kw-name equal-sign quoted-table-name) (:lambda (tn) (destructuring-bind (table name e table-name) tn (declare (ignore table name e)) @@ -1219,17 +1225,17 @@ Here's a quick description of the format we're parsing here: (defrule filename-or-http-uri (or http-uri maybe-quoted-filename)) -(defrule archive-source (and kw-load kw-from kw-archive filename-or-http-uri) +(defrule archive-source (and kw-load kw-archive kw-from filename-or-http-uri) (:lambda (src) (destructuring-bind (load from archive source) src (declare (ignore load from archive)) source))) -(defrule load-from-archive (and archive-source - target - (? before-load-do) - archive-command-list - (? finally-do)) +(defrule load-archive (and archive-source + target + (? before-load-do) + archive-command-list + (? finally-do)) (:lambda (archive) (destructuring-bind (source pg-db-uri before commands finally) archive (destructuring-bind (&key host port user password dbname &allow-other-keys) @@ -1301,7 +1307,7 @@ Here's a quick description of the format we're parsing here: (defrule end-of-command (and ignore-whitespace #\; ignore-whitespace) (:constant :eoc)) -(defrule command (and (or load-from-archive +(defrule command (and (or load-archive load-csv-file load-dbf-file load-database @@ -1314,25 +1320,31 @@ Here's a quick description of the format we're parsing here: (defrule commands (+ command)) -(defun parse-command (command) +(defun parse-commands (commands) "Parse a command and return a LAMBDA form that takes no parameter." - (parse 'command command)) + (parse 'commands commands)) -(defun run-command (command) - "Parse given COMMAND then run it." - (let ((func - (typecase command - (function command) - (list (compile nil command)) - (pathname (compile nil (parse-command - (slurp-file-into-string command)))) - (t (compile nil (parse-command command)))))) +(defun run-commands (source) + "SOURCE can be a function, which is run, a list, which is compiled as CL + code then run, a pathname containing one or more commands that are parsed + then run, or a commands string that is then parsed and each command run." + (let* ((funcs + (typecase source + (function (list source)) + + (list (list (compile nil source))) + + (pathname (mapcar (lambda (expr) (compile nil expr)) + (parse-commands (slurp-file-into-string source)))) + + (t (mapcar (lambda (expr) (compile nil expr)) + (parse-commands source)))))) ;; Start the logger (start-logger) - ;; run the command - (funcall func))) + ;; run the commands + (loop for func in funcs do (funcall func)))) (defmacro with-database-uri ((database-uri) &body body) "Run the BODY forms with the connection parameters set to proper values @@ -1356,285 +1368,27 @@ Here's a quick description of the format we're parsing here: ,@body))))) -(defun test-parsing () - (parse-command " -LOAD FROM http:///tapoueh.org/db.t - INTO postgresql://localhost:6432/db?t")) +;;; +;;; Some testing +;;; +(defun test-parsing (&rest tests) + "Try parsing the command(s) from the file test/TEST.load" + (let* ((tdir (directory-namestring + (asdf:system-relative-pathname :pgloader "test/"))) + (tests (or (remove-if #'null tests) (fad:list-directory tdir)))) + (loop + for test in tests + for filename = + (if (fad:pathname-relative-p test) + (make-pathname :directory tdir :name test :type "load") + test) + collect + (cons filename + (ignore-errors + (parse-commands (slurp-file-into-string filename))))))) -(defun test-parsing-load-database () - (parse-command " - LOAD DATABASE FROM mysql://localhost:3306/dbname - INTO postgresql://localhost/db - WITH drop tables, - truncate, - create tables, - create indexes, - reset sequences, - downcase identifiers - SET guc_1 = 'value', guc_2 = 'other value' - CAST column col1 to timestamptz drop default using zero-dates-to-null, - type varchar to text, - type int with extra auto_increment to bigserial, - type datetime to timestamptz drop default using zero-dates-to-null, - type date drop not null drop default using zero-dates-to-null; -")) +(defun list-failing-tests (&rest tests) + "Return the list of test files we can't parse." + (loop for (name . code) in (test-parsing tests) unless code collect name)) -(defun test-parsing-syslog-server () - (parse-command " - LOAD MESSAGES FROM syslog://localhost:10514/ - WHEN MATCHES rsyslog-msg IN apache - REGISTERING timestamp, ip, rest - INTO postgresql://localhost/db?logs.apache - SET guc_1 = 'value', guc_2 = 'other value' - - WHEN MATCHES rsyslog-msg IN others - REGISTERING timestamp, app-name, data - INTO postgresql://localhost/db?logs.others - SET guc_1 = 'value', guc_2 = 'other value' - - WITH apache = rsyslog - DATA = IP REST - IP = 1*3DIGIT \".\" 1*3DIGIT \".\"1*3DIGIT \".\"1*3DIGIT - REST = ~/.*/ - - WITH others = rsyslog; -")) - -(defun test-parsing-load-from-csv () - (parse-command " - LOAD CSV FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv' - Into postgresql://dim@localhost:54393/yagoa?communaute_profil - - WITH truncate, - fields optionally enclosed by '\"', - fields escaped by double-quote, - fields terminated by '\t'; -")) - -(defun test-parsing-load-from-dbf () - (parse-command " - LOAD DBF FROM '/Users/dim/Downloads/comsimp2013.dbf' - INTO postgresql://dim@localhost:54393/dim?comsimp2013 - WITH truncate, create table, table name = 'comsimp2013'; ")) - -(defun test-parsing-load-from-csv-full () - (parse-command " - LOAD CSV FROM '*/GeoLiteCity-Blocks.csv' - WITH ENCODING iso-646-us - ( - startIpNum, endIpNum, locId - ) - INTO postgresql://dim@localhost:54393/dim?geolite.blocks - ( - iprange ip4r using (ip-range startIpNum endIpNum), - locId - ) - WITH truncate, - skip header = 2, - fields optionally enclosed by '\"', - fields escaped by backslash-quote, - fields terminated by '\t'; -")) - -(defun test-parsing-load-from-archive () - "Use either http://pgsql.tapoueh.org/temp/foo.zip - or /Users/dim/Downloads/GeoLiteCity-latest.zip - or http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip" - (parse-command " - LOAD FROM ARCHIVE /Users/dim/Downloads/GeoLiteCity-latest.zip - INTO postgresql://dim@localhost:54393/ip4r - - BEFORE LOAD DO - $$ create extension if not exists ip4r; $$, - $$ create schema if not exists geolite; $$, - $$ create table if not exists geolite.location - ( - locid integer primary key, - country text, - region text, - city text, - postalcode text, - location point, - metrocode text, - areacode text - ); - $$, - $$ create table if not exists geolite.blocks - ( - iprange ip4r, - locid integer - ); - $$, - $$ drop index if exists geolite.blocks_ip4r_idx; $$, - $$ truncate table geolite.blocks, geolite.location cascade; - $$ - - LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/ - WITH ENCODING iso-8859-1 - ( - locId, - country, - region null if blanks, - city null if blanks, - postalCode null if blanks, - latitude, - longitude, - metroCode null if blanks, - areaCode null if blanks - ) - INTO postgresql://dim@localhost:54393/ip4r?geolite.location - ( - locid,country,region,city,postalCode, - location point using (format nil \"(~a,~a)\" longitude latitude), - metroCode,areaCode - ) - WITH skip header = 2, - fields optionally enclosed by '\"', - fields escaped by double-quote, - fields terminated by ',' - - AND LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/ - WITH ENCODING iso-8859-1 - ( - startIpNum, endIpNum, locId - ) - INTO postgresql://dim@localhost:54393/ip4r?geolite.blocks - ( - iprange ip4r using (ip-range startIpNum endIpNum), - locId - ) - WITH skip header = 2, - fields optionally enclosed by '\"', - fields escaped by double-quote, - fields terminated by ',' - - FINALLY DO - $$ - create index blocks_ip4r_idx on geolite.blocks using gist(iprange); - $$; -")) - -(defun test-parsing-load-from-archive-noprojection () - "Use either http://pgsql.tapoueh.org/temp/foo.zip - or /Users/dim/Downloads/GeoLiteCity-latest.zip - or http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip" - (parse-command " - LOAD FROM ARCHIVE /Users/dim/Downloads/GeoLiteCity-latest.zip - INTO postgresql://dim@localhost:54393/dim - - BEFORE LOAD DO - $$ create schema if not exists geonumip; $$, - $$ create table if not exists geonumip.location - ( - locid integer primary key, - country text, - region text, - city text, - postalcode text, - location point, - metrocode text, - areacode text - ); - $$, - $$ create table if not exists geonumip.blocks - ( - startip bigint, - endip bigint, - locid integer - ); - $$, - $$ truncate table geonumip.blocks, geonumip.location cascade; - $$ - - LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/ - WITH ENCODING iso-8859-1 - ( - locId, - country, - region null if blanks, - city null if blanks, - postalCode null if blanks, - latitude, - longitude, - metroCode null if blanks, - areaCode null if blanks - ) - INTO postgresql://dim@localhost:54393/dim?geonumip.location - ( - locid,country,region,city,postalCode, - location point using (format nil \"(~a,~a)\" longitude latitude), - metroCode,areaCode - ) - WITH skip header = 2, - fields optionally enclosed by '\"', - fields escaped by double-quote, - fields terminated by ',' - - AND LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/ - WITH ENCODING iso-8859-1 - INTO postgresql://dim@localhost:54393/dim?geonumip.blocks - WITH skip header = 2, - fields optionally enclosed by '\"', - fields escaped by double-quote, - fields terminated by ','; -")) - -(defun test-parsing-lots () - (parse 'commands " - LOAD CSV FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv' - Into postgresql://dim@localhost:54393/yagoa?communaute_profil - - WITH truncate, - fields optionally enclosed by '\"', - fields escaped by '\"', - fields terminated by '\t'; - - LOAD MESSAGES FROM syslog://localhost:10514/ - - WHEN MATCHES rsyslog-msg IN apache - REGISTERING timestamp, ip, rest - INTO postgresql://localhost/db?logs.apache - SET guc_1 = 'value', guc_2 = 'other value' - - WHEN MATCHES rsyslog-msg IN others - REGISTERING timestamp, app-name, data - INTO postgresql://localhost/db?logs.others - SET guc_1 = 'value', guc_2 = 'other value' - - WITH apache = rsyslog - DATA = IP REST - IP = 1*3DIGIT \".\" 1*3DIGIT \".\"1*3DIGIT \".\"1*3DIGIT - REST = ~/.*/ - - WITH others = rsyslog; - - LOAD CSV FROM '*/GeoLiteCity-Blocks.csv' - ( - startIpNum, endIpNum, locId - ) - INTO postgresql://dim@localhost:54393/dim?geolite.blocks - ( - iprange ip4r using (ip-range startIpNum endIpNum), - locId - ) - WITH truncate, - skip header = 2, - fields optionally enclosed by '\"', - fields escaped by '\"', - fields terminated by '\\t'; - - LOAD DATABASE FROM mysql://localhost:3306/dbname - INTO postgresql://localhost/db - WITH drop tables, - truncate, - create tables, - create indexes, - reset sequences - SET guc_1 = 'value', guc_2 = 'other value' - CAST column col1 to timestamptz drop default using zero-dates-to-null, - type varchar to text, - type int with extra auto_increment to bigserial, - type datetime to timestamptz drop default using zero-dates-to-null, - type date drop not null drop default using zero-dates-to-null; -")) diff --git a/test/archive.load b/test/archive.load new file mode 100644 index 0000000..0af4d81 --- /dev/null +++ b/test/archive.load @@ -0,0 +1,71 @@ +LOAD ARCHIVE + FROM /Users/dim/Downloads/GeoLiteCity-latest.zip + INTO postgresql://dim@localhost:54393/ip4r + + BEFORE LOAD DO + $$ create extension if not exists ip4r; $$, + $$ create schema if not exists geolite; $$, + $$ create table if not exists geolite.location + ( + locid integer primary key, + country text, + region text, + city text, + postalcode text, + location point, + metrocode text, + areacode text + ); + $$, + $$ create table if not exists geolite.blocks + ( + iprange ip4r, + locid integer + ); + $$, + $$ drop index if exists geolite.blocks_ip4r_idx; $$, + $$ truncate table geolite.blocks, geolite.location cascade; $$ + + LOAD CSV + FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/ + WITH ENCODING iso-8859-1 + ( + locId, + country, + region null if blanks, + city null if blanks, + postalCode null if blanks, + latitude, + longitude, + metroCode null if blanks, + areaCode null if blanks + ) + INTO postgresql://dim@localhost:54393/ip4r?geolite.location + ( + locid,country,region,city,postalCode, + location point using (format nil "(~a,~a)" longitude latitude), + metroCode,areaCode + ) + WITH skip header = 2, + fields optionally enclosed by '"', + fields escaped by double-quote, + fields terminated by ',' + + AND LOAD CSV + FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/ + WITH ENCODING iso-8859-1 + ( + startIpNum, endIpNum, locId + ) + INTO postgresql://dim@localhost:54393/ip4r?geolite.blocks + ( + iprange ip4r using (ip-range startIpNum endIpNum), + locId + ) + WITH skip header = 2, + fields optionally enclosed by '"', + fields escaped by double-quote, + fields terminated by ',' + + FINALLY DO + $$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$; diff --git a/test/csv-with-projection.load b/test/csv-with-projection.load new file mode 100644 index 0000000..429179d --- /dev/null +++ b/test/csv-with-projection.load @@ -0,0 +1,15 @@ +LOAD CSV + FROM '*/GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us + ( + startIpNum, endIpNum, locId + ) + INTO postgresql://dim@localhost:54393/dim?geolite.blocks + ( + iprange ip4r using (ip-range startIpNum endIpNum), + locId + ) + WITH truncate, + skip header = 2, + fields optionally enclosed by '"', + fields escaped by backslash-quote, + fields terminated by '\t'; diff --git a/test/csv.load b/test/csv.load new file mode 100644 index 0000000..febb11f --- /dev/null +++ b/test/csv.load @@ -0,0 +1,8 @@ +LOAD CSV + FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv' + INTO postgresql://dim@localhost:54393/yagoa?communaute_profil + + WITH truncate, + fields optionally enclosed by '"', + fields escaped by double-quote, + fields terminated by '\t'; diff --git a/test/database.load b/test/database.load new file mode 100644 index 0000000..c647e78 --- /dev/null +++ b/test/database.load @@ -0,0 +1,18 @@ +LOAD DATABASE + FROM mysql://localhost:3306/dbname + INTO postgresql://localhost/db + + WITH drop tables, + truncate, + create tables, + create indexes, + reset sequences, + downcase identifiers + + SET guc_1 = 'value', guc_2 = 'other value' + + CAST column col1 to timestamptz drop default using zero-dates-to-null, + type varchar to text, + type int with extra auto_increment to bigserial, + type datetime to timestamptz drop default using zero-dates-to-null, + type date drop not null drop default using zero-dates-to-null; diff --git a/test/dbf.load b/test/dbf.load new file mode 100644 index 0000000..33f854e --- /dev/null +++ b/test/dbf.load @@ -0,0 +1,4 @@ +LOAD DBF + FROM '/Users/dim/Downloads/comsimp2013.dbf' + INTO postgresql://dim@localhost:54393/dim?comsimp2013 + WITH truncate, create table, table name = 'comsimp2013'; diff --git a/test/hans.goeuro.load b/test/hans.goeuro.load new file mode 100644 index 0000000..655ba78 --- /dev/null +++ b/test/hans.goeuro.load @@ -0,0 +1,11 @@ +load database + from mysql://root@localhost:3306/goeuro + into postgresql://dim@localhost:54393/goeuro + + WITH drop tables, create tables, create indexes, reset sequences + + CAST type datetime to timestamptz drop default drop not null using zero-dates-to-null, + type date drop not null drop default using zero-dates-to-null, + type tinyint to boolean using tinyint-to-boolean, + type year to integer, + type timestamp to timestamptz drop not null using zero-dates-to-null; diff --git a/test/lahman2012-csv.zip b/test/lahman2012-csv.zip deleted file mode 100644 index 9d2dcee..0000000 Binary files a/test/lahman2012-csv.zip and /dev/null differ diff --git a/test/load-from-archive.load b/test/load-from-archive.load deleted file mode 100644 index 35b27eb..0000000 --- a/test/load-from-archive.load +++ /dev/null @@ -1,71 +0,0 @@ - LOAD FROM ARCHIVE /Users/dim/Downloads/GeoLiteCity-latest.zip - INTO postgresql://dim@localhost:54393/ip4r - - BEFORE LOAD DO - $$ create extension if not exists ip4r; $$, - $$ create schema if not exists geolite; $$, - $$ create table if not exists geolite.location - ( - locid integer primary key, - country text, - region text, - city text, - postalcode text, - location point, - metrocode text, - areacode text - ); - $$, - $$ create table if not exists geolite.blocks - ( - iprange ip4r, - locid integer - ); - $$, - $$ drop index if exists geolite.blocks_ip4r_idx; $$, - $$ truncate table geolite.blocks, geolite.location cascade; - $$ - - LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/ - WITH ENCODING iso-8859-1 - ( - locId, - country, - region null if blanks, - city null if blanks, - postalCode null if blanks, - latitude, - longitude, - metroCode null if blanks, - areaCode null if blanks - ) - INTO postgresql://dim@localhost:54393/ip4r?geolite.location - ( - locid,country,region,city,postalCode, - location point using (format nil "(~a,~a)" longitude latitude), - metroCode,areaCode - ) - WITH skip header = 2, - fields optionally enclosed by '"', - fields escaped by double-quote, - fields terminated by ',' - - AND LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/ - WITH ENCODING iso-8859-1 - ( - startIpNum, endIpNum, locId - ) - INTO postgresql://dim@localhost:54393/ip4r?geolite.blocks - ( - iprange ip4r using (ip-range startIpNum endIpNum), - locId - ) - WITH skip header = 2, - fields optionally enclosed by '"', - fields escaped by double-quote, - fields terminated by ',' - - FINALLY DO - $$ - create index blocks_ip4r_idx on geolite.blocks using gist(iprange); - $$; diff --git a/test/messages.load b/test/messages.load new file mode 100644 index 0000000..aa5aec6 --- /dev/null +++ b/test/messages.load @@ -0,0 +1,19 @@ +LOAD MESSAGES + FROM syslog://localhost:10514/ + + WHEN MATCHES rsyslog-msg IN apache + REGISTERING timestamp, ip, rest + INTO postgresql://localhost/db?logs.apache + SET guc_1 = 'value', guc_2 = 'other value' + + WHEN MATCHES rsyslog-msg IN others + REGISTERING timestamp, app-name, data + INTO postgresql://localhost/db?logs.others + SET guc_1 = 'value', guc_2 = 'other value' + + WITH apache = rsyslog + DATA = IP REST + IP = 1*3DIGIT "." 1*3DIGIT "."1*3DIGIT "."1*3DIGIT + REST = ~/.*/ + + WITH others = rsyslog; diff --git a/test/mix.load b/test/mix.load new file mode 100644 index 0000000..984798b --- /dev/null +++ b/test/mix.load @@ -0,0 +1,34 @@ +load database from mysql://localhost/adv + into postgresql://dim@localhost/adv + +with drop tables, truncate, create tables, create indexes, + reset sequences, + downcase identifiers + set work_mem to '128MB', maintenance_work_mem to '512 MB' + +cast type datetime to timestamptz drop default using zero-dates-to-null, + type date drop not null drop default using zero-dates-to-null, + type tinyint to boolean using tinyint-to-boolean; + + +LOAD DBF + FROM '/Users/dim/Downloads/comsimp2013.dbf' + INTO postgresql://dim@localhost:54393/dim?comsimp2013 + WITH truncate, create table, table name = 'comsimp2013'; + + +LOAD CSV + FROM '*/GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us + ( + startIpNum, endIpNum, locId + ) + INTO postgresql://dim@localhost:54393/dim?geolite.blocks + ( + iprange ip4r using (ip-range startIpNum endIpNum), + locId + ) + WITH truncate, + skip header = 2, + fields optionally enclosed by '"', + fields escaped by backslash-quote, + fields terminated by '\t';