diff --git a/parser.lisp b/parser.lisp index 28885ca..552d626 100644 --- a/parser.lisp +++ b/parser.lisp @@ -11,67 +11,21 @@ (defvar *data-expected-inline* nil "Set to :inline when parsing an INLINE keyword in a FROM clause.") -#| -Here's a quick description of the format we're parsing here: - - LOAD FROM '/path/to/filename.txt' - stdin - http://url.to/some/file.txt - mysql://[user[:pass]@][host[:port]]/dbname?table-name - postgresql://[user[:pass]@][host[:port]]/dbname?table-name - - [ COMPRESSED WITH zip | bzip2 | gzip ] - - WITH workers = 2, - batch size = 25000, - batch split = 5, - reject file = '/tmp/pgloader/.dat' - log file = '/tmp/pgloader/pgloader.log', - log level = debug | info | notice | warning | error | critical, - truncate, - fields [ optionally ] enclosed by '"', - fields escaped by '\\', - fields terminated by '\t', - lines terminated by '\r\n', - encoding = 'latin9', - drop table, - create table, - create indexes, - reset sequences - - SET guc-1 = 'value', guc-2 = 'value' - - PREPARE CLIENT WITH ( ) - PREPARE SERVER WITH ( ) - - INTO table-name [ WITH SET ] - ( - field-name data-type field-desc [ with column options ], - ... - ) - USING (expression field-name other-field-name) as column-name, - ... - - INTO table-name [ WITH SET ] - ( - * - ) - - TODO WHEN - - FINALLY ON CLIENT DO ( ) - ON SERVER DO ( ) - - < data here if loading from stdin > -|# - ;; ;; Some useful rules ;; +(defrule single-line-comment (and "--" (+ (not #\Newline)) #\Newline) + (:constant :comment)) + +(defrule multi-line-comment (and "/*" (+ (not "*/")) "*/") + (:constant :comment)) + +(defrule comments (or single-line-comment multi-line-comment)) + (defrule keep-a-single-whitespace (+ (or #\space #\tab #\newline #\linefeed)) (:constant " ")) -(defrule whitespace (+ (or #\space #\tab #\newline #\linefeed)) +(defrule whitespace (+ (or #\space #\tab #\newline #\linefeed comments)) (:constant 'whitespace)) (defrule ignore-whitespace (* whitespace) diff --git a/test/allcols.load b/test/allcols.load index 2b8569e..7d08995 100644 --- a/test/allcols.load +++ b/test/allcols.load @@ -1,3 +1,16 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [allcols] + * table = allcols + * format = csv + * filename = allcols/allcols.data + * field_sep = : + * columns = * + * pg_option_work_mem = 14MB + * + */ + LOAD CSV FROM inline (a, b, c) INTO postgresql://dim:pgpass@localhost:54393/pgloader?allcols diff --git a/test/archive.load b/test/archive.load index a74fb63..6f78a6b 100644 --- a/test/archive.load +++ b/test/archive.load @@ -1,3 +1,12 @@ +/* + * Loading from a ZIP archive containing CSV files. The full test can be + * done with using the archive found at + * http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip + * + * And a very light version of this data set is found at + * http://pgsql.tapoueh.org/temp/foo.zip for quick testing. + */ + LOAD ARCHIVE FROM http://pgsql.tapoueh.org/temp/foo.zip INTO postgresql://dim@localhost:54393/ip4r diff --git a/test/csv.load b/test/csv.load index 9ba1651..e494fb5 100644 --- a/test/csv.load +++ b/test/csv.load @@ -1,3 +1,20 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [csv] + * table = csv + * format = csv + * filename = csv/csv.data + * field_size_limit = 512kB + * field_sep = , + * quotechar = " + * columns = x, y, a, b, d:6, c:5 + * only_cols = 3-6 + * skip_head_lines = 1 + * truncate = True + * + */ + LOAD CSV FROM inline ( diff --git a/test/errors.load b/test/errors.load index 33adba8..4182cd2 100644 --- a/test/errors.load +++ b/test/errors.load @@ -1,3 +1,16 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [errors] + * table = errors + * format = text + * filename = errors/errors.data + * field_sep = | + * trailing_sep = True + * columns = a:1, b:3, c:2 + * + */ + LOAD CSV FROM inline (a, c, b, trailing) INTO postgresql://dim:pgpass@localhost:54393/pgloader?errors diff --git a/test/partial.load b/test/partial.load index 9da1fad..208c2e7 100644 --- a/test/partial.load +++ b/test/partial.load @@ -1,3 +1,15 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [partial] + * table = partial + * format = text + * filename = partial/partial.data + * field_sep = % + * columns = * + * only_cols = 1-3, 5 + * + */ LOAD CSV FROM inline (a, b, c, d, e) INTO postgresql://dim:pgpass@localhost:54393/pgloader?partial diff --git a/test/reformat.load b/test/reformat.load index 4f6daa3..c98e673 100644 --- a/test/reformat.load +++ b/test/reformat.load @@ -1,3 +1,17 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [reformat] + * table = reformat + * format = text + * filename = reformat/reformat.data + * field_sep = | + * columns = id, timestamp + * reformat = timestamp:mysql:timestamp + * + */ + + LOAD CSV FROM inline (id, timestamp) INTO postgresql://dim:pgpass@localhost:54393/pgloader?reformat diff --git a/test/serial.load b/test/serial.load index 92961e2..c8b2e79 100644 --- a/test/serial.load +++ b/test/serial.load @@ -1,3 +1,15 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [serial] + * table = serial + * format = text + * filename = serial/serial.data + * field_sep = ; + * columns = b:2, c:1 + * + */ + LOAD CSV FROM inline (c, b) INTO postgresql://dim:pgpass@localhost:54393/pgloader?serial diff --git a/test/simple.load b/test/simple.load index 06b8cc5..213de8c 100644 --- a/test/simple.load +++ b/test/simple.load @@ -1,3 +1,25 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [simple_tmpl] + * template = True + * format = text + * datestyle = dmy + * field_sep = | + * trailing_sep = True + * + * [simple] + * use_template = simple_tmpl + * table = simple + * filename = simple/simple.data + * columns = a:1, b:3, c:2 + * skip_head_lines = 2 + * + * # those reject settings are defaults one + * reject_log = /tmp/simple.rej.log + * reject_data = /tmp/simple.rej + */ + LOAD CSV FROM inline (a, c, b, trailing) INTO postgresql://dim:pgpass@localhost:54393/pgloader?simple diff --git a/test/udc.load b/test/udc.load index 7db5b27..617ed2f 100644 --- a/test/udc.load +++ b/test/udc.load @@ -1,3 +1,18 @@ +/* + * This test is ported from pgloader 2.x where it was defined as: + * + * [udc] + * table = udc + * format = text + * filename = udc/udc.data + * input_encoding = 'latin1' + * field_sep = % + * columns = b:2, d:1, x:3, y:4 + * udc_c = constant value + * copy_columns = b, c, d + * + */ + LOAD CSV FROM inline WITH ENCODING latin1 (d, b, x, y) diff --git a/test/xzero.load b/test/xzero.load index d5d1105..e32f9ad 100644 Binary files a/test/xzero.load and b/test/xzero.load differ