mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-09 07:47:00 +02:00
Expose cl-csv escape mode option, fix #80.
Some CSV files are using the CSV escape character internally in their fields. In that case we enter a parsing bug in cl-csv where backtracking from parsing the escape string isn't possible (or at least unimplemented). To handle the case, change the quote parameter from \" to just \ and let cl-csv use its escape-quote mechanism to decide if we're escaping only separators or just any data. See https://github.com/AccelerationNet/cl-csv/issues/17 where the escape mode feature was introduced for pgloader issue #80 already.
This commit is contained in:
parent
250ed1c791
commit
d75c100399
@ -890,6 +890,13 @@ The `csv` format command accepts the following clauses and options:
|
|||||||
when they are to be found within the data fields themselves.
|
when they are to be found within the data fields themselves.
|
||||||
Defaults to *double-quote*.
|
Defaults to *double-quote*.
|
||||||
|
|
||||||
|
- *csv escape mode*
|
||||||
|
|
||||||
|
Takes either the special value *quote* (the default) or *following*
|
||||||
|
and allows the CSV parser to parse either only escaped field
|
||||||
|
separator or any character (including CSV data) when using the
|
||||||
|
*following* value.
|
||||||
|
|
||||||
- *fields terminated by*
|
- *fields terminated by*
|
||||||
|
|
||||||
Takes a single character as argument, which must be found inside
|
Takes a single character as argument, which must be found inside
|
||||||
|
@ -69,6 +69,10 @@
|
|||||||
escaped-quote-name
|
escaped-quote-name
|
||||||
separator))
|
separator))
|
||||||
|
|
||||||
|
(defrule escape-mode-quote "quote" (:constant :quote))
|
||||||
|
(defrule escape-mode-following "following" (:constant :following))
|
||||||
|
(defrule escape-mode (or escape-mode-quote escape-mode-following))
|
||||||
|
|
||||||
(defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote)
|
(defrule option-fields-escaped-by (and kw-fields kw-escaped kw-by escaped-quote)
|
||||||
(:lambda (esc)
|
(:lambda (esc)
|
||||||
(bind (((_ _ _ sep) esc))
|
(bind (((_ _ _ sep) esc))
|
||||||
@ -94,6 +98,11 @@
|
|||||||
(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks)
|
(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks)
|
||||||
(:constant (cons :trim-blanks t)))
|
(:constant (cons :trim-blanks t)))
|
||||||
|
|
||||||
|
(defrule option-csv-escape-mode (and kw-csv kw-escape kw-mode escape-mode)
|
||||||
|
(:lambda (term)
|
||||||
|
(bind (((_ _ _ escape-mode) term))
|
||||||
|
(cons :escape-mode escape-mode))))
|
||||||
|
|
||||||
(defrule csv-option (or option-batch-rows
|
(defrule csv-option (or option-batch-rows
|
||||||
option-batch-size
|
option-batch-size
|
||||||
option-batch-concurrency
|
option-batch-concurrency
|
||||||
@ -107,7 +116,8 @@
|
|||||||
option-fields-escaped-by
|
option-fields-escaped-by
|
||||||
option-fields-terminated-by
|
option-fields-terminated-by
|
||||||
option-trim-unquoted-blanks
|
option-trim-unquoted-blanks
|
||||||
option-keep-unquoted-blanks))
|
option-keep-unquoted-blanks
|
||||||
|
option-csv-escape-mode))
|
||||||
|
|
||||||
(defrule another-csv-option (and comma csv-option)
|
(defrule another-csv-option (and comma csv-option)
|
||||||
(:lambda (source)
|
(:lambda (source)
|
||||||
|
@ -71,6 +71,8 @@
|
|||||||
(def-keyword-rule "by")
|
(def-keyword-rule "by")
|
||||||
(def-keyword-rule "escaped")
|
(def-keyword-rule "escaped")
|
||||||
(def-keyword-rule "terminated")
|
(def-keyword-rule "terminated")
|
||||||
|
(def-keyword-rule "escape")
|
||||||
|
(def-keyword-rule "mode")
|
||||||
(def-keyword-rule "nullif")
|
(def-keyword-rule "nullif")
|
||||||
(def-keyword-rule "blank")
|
(def-keyword-rule "blank")
|
||||||
(def-keyword-rule "trim")
|
(def-keyword-rule "trim")
|
||||||
|
@ -57,6 +57,9 @@
|
|||||||
(escape :accessor csv-escape ; CSV quote escaping
|
(escape :accessor csv-escape ; CSV quote escaping
|
||||||
:initarg :escape ;
|
:initarg :escape ;
|
||||||
:initform cl-csv:*quote-escape*)
|
:initform cl-csv:*quote-escape*)
|
||||||
|
(escape-mode :accessor csv-escape-mode ; CSV quote escaping mode
|
||||||
|
:initarg :escape-mode ;
|
||||||
|
:initform cl-csv::*escape-mode*)
|
||||||
(trim-blanks :accessor csv-trim-blanks ; CSV blank and NULLs
|
(trim-blanks :accessor csv-trim-blanks ; CSV blank and NULLs
|
||||||
:initarg :trim-blanks ;
|
:initarg :trim-blanks ;
|
||||||
:initform t))
|
:initform t))
|
||||||
@ -156,6 +159,7 @@
|
|||||||
:separator (csv-separator csv)
|
:separator (csv-separator csv)
|
||||||
:quote (csv-quote csv)
|
:quote (csv-quote csv)
|
||||||
:escape (csv-escape csv)
|
:escape (csv-escape csv)
|
||||||
|
:escape-mode (csv-escape-mode csv)
|
||||||
:unquoted-empty-string-is-nil t
|
:unquoted-empty-string-is-nil t
|
||||||
:quoted-empty-string-is-nil nil
|
:quoted-empty-string-is-nil nil
|
||||||
:trim-outer-whitespace (csv-trim-blanks csv)
|
:trim-outer-whitespace (csv-trim-blanks csv)
|
||||||
|
Loading…
Reference in New Issue
Block a user