Expose cl-csv escape mode option, fix #80.

Some CSV files are using the CSV escape character internally in their
fields. In that case we enter a parsing bug in cl-csv where backtracking
from parsing the escape string isn't possible (or at least
unimplemented).

To handle the case, change the quote parameter from \" to just \ and let
cl-csv use its escape-quote mechanism to decide if we're escaping only
separators or just any data.

See https://github.com/AccelerationNet/cl-csv/issues/17 where the escape
mode feature was introduced for pgloader issue #80 already.
This commit is contained in:
Dimitri Fontaine 2015-06-25 14:08:31 +02:00
parent 250ed1c791
commit d75c100399
4 changed files with 24 additions and 1 deletions

View File

@ -890,6 +890,13 @@ The `csv` format command accepts the following clauses and options:
when they are to be found within the data fields themselves. when they are to be found within the data fields themselves.
Defaults to *double-quote*. Defaults to *double-quote*.
- *csv escape mode*
Takes either the special value *quote* (the default) or *following*
and allows the CSV parser to parse either only escaped field
separator or any character (including CSV data) when using the
*following* value.
- *fields terminated by* - *fields terminated by*
Takes a single character as argument, which must be found inside Takes a single character as argument, which must be found inside

View File

@ -69,6 +69,10 @@
escaped-quote-name escaped-quote-name
separator)) separator))
(defrule escape-mode-quote "quote" (:constant :quote))
(defrule escape-mode-following "following" (:constant :following))
(defrule escape-mode (or escape-mode-quote escape-mode-following))
(defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote) (defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote)
(:lambda (esc) (:lambda (esc)
(bind (((_ _ _ sep) esc)) (bind (((_ _ _ sep) esc))
@ -94,6 +98,11 @@
(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks) (defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks)
(:constant (cons :trim-blanks t))) (:constant (cons :trim-blanks t)))
(defrule option-csv-escape-mode (and kw-csv kw-escape kw-mode escape-mode)
(:lambda (term)
(bind (((_ _ _ escape-mode) term))
(cons :escape-mode escape-mode))))
(defrule csv-option (or option-batch-rows (defrule csv-option (or option-batch-rows
option-batch-size option-batch-size
option-batch-concurrency option-batch-concurrency
@ -107,7 +116,8 @@
option-fields-escaped-by option-fields-escaped-by
option-fields-terminated-by option-fields-terminated-by
option-trim-unquoted-blanks option-trim-unquoted-blanks
option-keep-unquoted-blanks)) option-keep-unquoted-blanks
option-csv-escape-mode))
(defrule another-csv-option (and comma csv-option) (defrule another-csv-option (and comma csv-option)
(:lambda (source) (:lambda (source)

View File

@ -71,6 +71,8 @@
(def-keyword-rule "by") (def-keyword-rule "by")
(def-keyword-rule "escaped") (def-keyword-rule "escaped")
(def-keyword-rule "terminated") (def-keyword-rule "terminated")
(def-keyword-rule "escape")
(def-keyword-rule "mode")
(def-keyword-rule "nullif") (def-keyword-rule "nullif")
(def-keyword-rule "blank") (def-keyword-rule "blank")
(def-keyword-rule "trim") (def-keyword-rule "trim")

View File

@ -57,6 +57,9 @@
(escape :accessor csv-escape ; CSV quote escaping (escape :accessor csv-escape ; CSV quote escaping
:initarg :escape ; :initarg :escape ;
:initform cl-csv:*quote-escape*) :initform cl-csv:*quote-escape*)
(escape-mode :accessor csv-escape-mode ; CSV quote escaping mode
:initarg :escape-mode ;
:initform cl-csv::*escape-mode*)
(trim-blanks :accessor csv-trim-blanks ; CSV blank and NULLs (trim-blanks :accessor csv-trim-blanks ; CSV blank and NULLs
:initarg :trim-blanks ; :initarg :trim-blanks ;
:initform t)) :initform t))
@ -156,6 +159,7 @@
:separator (csv-separator csv) :separator (csv-separator csv)
:quote (csv-quote csv) :quote (csv-quote csv)
:escape (csv-escape csv) :escape (csv-escape csv)
:escape-mode (csv-escape-mode csv)
:unquoted-empty-string-is-nil t :unquoted-empty-string-is-nil t
:quoted-empty-string-is-nil nil :quoted-empty-string-is-nil nil
:trim-outer-whitespace (csv-trim-blanks csv) :trim-outer-whitespace (csv-trim-blanks csv)