From bffec4cc630d433a89943deaf66c9790941ab020 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Fri, 22 May 2015 12:31:01 +0200 Subject: [PATCH] Allow for more options in the CSV escape character, fix #38. To allow for importing JSON one-liners as-is in the database it can be interesting to leverage the CSV parser in a compatible setup. That setup requires being able to use any separator character as the escape character. --- pgloader.1.md | 9 +++++---- src/parsers/command-csv.lisp | 4 +++- test/Makefile | 1 + test/csv-json.load | 30 ++++++++++++++++++++++++++++++ test/regress/expected/csv-json.out | 15 +++++++++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 test/csv-json.load create mode 100644 test/regress/expected/csv-json.out diff --git a/pgloader.1.md b/pgloader.1.md index 4238bf6..d2645ef 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -878,10 +878,11 @@ The `csv` format command accepts the following clauses and options: - *fields escaped by* - Takes either the special value *backslash-quote* or *double-quote*. - This value is used to recognize escaped field separators when they - are to be found within the data fields themselves. Defaults to - *double-quote*. + Takes either the special value *backslash-quote* or *double-quote*, + or any value supported by the *fields terminated by* option (see + below). This value is used to recognize escaped field separators + when they are to be found within the data fields themselves. + Defaults to *double-quote*. - *fields terminated by* diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index 1162700..67c44f6 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -65,7 +65,9 @@ (defrule backslash-quote "backslash-quote" (:constant "\\\"")) (defrule escaped-quote-name (or quote-quote backslash-quote)) (defrule escaped-quote-literal (or (and #\" #\") (and #\\ #\")) (:text t)) -(defrule escaped-quote (or escaped-quote-literal escaped-quote-name)) +(defrule escaped-quote (or escaped-quote-literal + escaped-quote-name + separator)) (defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote) (:lambda (esc) diff --git a/test/Makefile b/test/Makefile index a4cd15c..125a99b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,6 +11,7 @@ REGRESS= allcols.load \ csv-error.load \ csv-filename-pattern.load \ csv-header.load \ + csv-json.load \ csv-keep-extra-blanks.load \ csv-nulls.load \ csv-trim-extra-blanks.load \ diff --git a/test/csv-json.load b/test/csv-json.load new file mode 100644 index 0000000..9680418 --- /dev/null +++ b/test/csv-json.load @@ -0,0 +1,30 @@ +LOAD CSV + FROM INLINE + INTO postgresql:///pgloader?json + + WITH truncate, + fields not enclosed, + fields terminated by '0x02', + fields escaped by '0x02' + + BEFORE LOAD DO + $$ drop table if exists json; $$, + $$ CREATE TABLE json (json text); $$; + + + +{"table-name": "fetch","read":0,"imported":0,"errors":0,"time":"0.000s"} +{"table-name": "fetch meta data","read":8,"imported":8,"errors":0,"time":"0.026s"} +{"table-name": "create, truncate","read":0,"imported":0,"errors":0,"time":"0.046s"} +{"table-name": "long","read":0,"imported":2,"errors":0,"time":"0.069s"} +{"table-name": "blobs","read":0,"imported":1,"errors":0,"time":"0.021s"} +{"table-name": "unsigned","read":0,"imported":2,"errors":0,"time":"0.007s"} +{"table-name": "reals","read":0,"imported":3,"errors":0,"time":"0.007s"} +{"table-name": "ints","read":0,"imported":3,"errors":0,"time":"0.006s"} +{"table-name": "def","read":0,"imported":2,"errors":0,"time":"0.007s"} +{"table-name": "stamps","read":0,"imported":2,"errors":0,"time":"0.007s"} +{"table-name": "character","read":0,"imported":4,"errors":0,"time":"0.005s"} +{"table-name": "index build completion","read":0,"imported":0,"errors":0,"time":"0.000s"} +{"table-name": "Create Indexes","read":0,"imported":0,"errors":0,"time":"0.000s"} +{"table-name": "Reset Sequences","read":0,"imported":0,"errors":0,"time":"0.015s"} +{"table-name": "Total streaming time","read":0,"imported":19,"errors":0,"time":"0.216s"} diff --git a/test/regress/expected/csv-json.out b/test/regress/expected/csv-json.out new file mode 100644 index 0000000..8e835db --- /dev/null +++ b/test/regress/expected/csv-json.out @@ -0,0 +1,15 @@ +{"table-name": "fetch","read":0,"imported":0,"errors":0,"time":"0.000s"}, +{"table-name": "fetch meta data","read":8,"imported":8,"errors":0,"time":"0.026s"}, +{"table-name": "create, truncate","read":0,"imported":0,"errors":0,"time":"0.046s"}, +{"table-name": "long","read":0,"imported":2,"errors":0,"time":"0.069s"}, +{"table-name": "blobs","read":0,"imported":1,"errors":0,"time":"0.021s"}, +{"table-name": "unsigned","read":0,"imported":2,"errors":0,"time":"0.007s"}, +{"table-name": "reals","read":0,"imported":3,"errors":0,"time":"0.007s"}, +{"table-name": "ints","read":0,"imported":3,"errors":0,"time":"0.006s"}, +{"table-name": "def","read":0,"imported":2,"errors":0,"time":"0.007s"}, +{"table-name": "stamps","read":0,"imported":2,"errors":0,"time":"0.007s"}, +{"table-name": "character","read":0,"imported":4,"errors":0,"time":"0.005s"}, +{"table-name": "index build completion","read":0,"imported":0,"errors":0,"time":"0.000s"}, +{"table-name": "Create Indexes","read":0,"imported":0,"errors":0,"time":"0.000s"}, +{"table-name": "Reset Sequences","read":0,"imported":0,"errors":0,"time":"0.015s"}, +{"table-name": "Total streaming time","read":0,"imported":19,"errors":0,"time":"0.216s"}