Implement CSV option keep|trim unquoted blanks.

This commit is contained in:
Dimitri Fontaine 2013-10-12 14:49:52 +02:00
parent 4ff7d9ae69
commit 7d1b34477e
3 changed files with 27 additions and 9 deletions

View File

@ -124,7 +124,8 @@
(skip-lines nil)
(separator #\Tab)
(quote cl-csv:*quote*)
(escape cl-csv:*quote-escape*))
(escape cl-csv:*quote-escape*)
(trim-blanks cl-csv:*trim-blanks*))
"Load data from a text file in CSV format, with support for advanced
projecting capabilities. See `project-fields' for details.
@ -176,7 +177,8 @@
:row-fn (compile nil reformat-then-process)
:separator separator
:quote quote
:escape escape)
:escape escape
:trim-blanks trim-blanks)
((or cl-csv:csv-parse-error type-error) (condition)
;; some form of parse error did happen, TODO: log it
(progn
@ -193,7 +195,8 @@
skip-lines
(separator #\Tab)
(quote cl-csv:*quote*)
(escape cl-csv:*quote-escape*))
(escape cl-csv:*quote-escape*)
(trim-blanks cl-csv:*trim-blanks*))
"Copy data from CSV FILENAME into lprallel.queue DATAQ"
(let ((read
(pgloader.queue:map-push-queue dataq
@ -205,7 +208,8 @@
:skip-lines skip-lines
:separator separator
:quote quote
:escape escape)))
:escape escape
:trim-blanks trim-blanks)))
(pgstate-incf *state* table-name :read read)))
(defun copy-from-file (dbname table-name filename-or-regex
@ -222,7 +226,8 @@
(encoding :utf-8)
(separator #\Tab)
(quote cl-csv:*quote*)
(escape cl-csv:*quote-escape*))
(escape cl-csv:*quote-escape*)
(trim-blanks cl-csv:*trim-blanks*))
"Copy data from CSV file FILENAME into PostgreSQL DBNAME.TABLE-NAME"
(let* ((summary (null *state*))
(*state* (or *state* (pgloader.utils:make-pgstate)))
@ -242,7 +247,8 @@
:skip-lines skip-lines
:separator separator
:quote quote
:escape escape)
:escape escape
:trim-blanks trim-blanks)
;; and start another task to push that data from the queue to PostgreSQL
(lp:submit-task channel

View File

@ -156,6 +156,9 @@ Here's a quick description of the format we're parsing here:
(def-keyword-rule "blanks")
(def-keyword-rule "date")
(def-keyword-rule "format")
(def-keyword-rule "keep")
(def-keyword-rule "trim")
(def-keyword-rule "unquoted")
;; option for MySQL imports
(def-keyword-rule "schema")
(def-keyword-rule "only")
@ -999,12 +1002,20 @@ Here's a quick description of the format we're parsing here:
(declare (ignore fields ))
sep)))
(defrule option-keep-unquoted-blanks (and kw-keep kw-unquoted kw-blanks)
(:constant (cons :trim-blanks nil)))
(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks)
(:constant (cons :trim-blanks t)))
(defrule csv-option (or option-truncate
option-skip-header
option-fields-not-enclosed
option-fields-enclosed-by
option-fields-escaped-by
option-fields-terminated-by))
option-fields-terminated-by
option-trim-unquoted-blanks
option-keep-unquoted-blanks))
(defrule another-csv-option (and #\, ignore-whitespace csv-option)
(:lambda (source)

View File

@ -2,6 +2,7 @@ LOAD CSV
FROM INLINE
INTO postgresql://dim@localhost:54393/dim?nulls (f1, f2, f3)
WITH truncate,
keep unquoted blanks,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ','
@ -13,7 +14,7 @@ LOAD CSV
"quoted empty string","","should be empty string"
"no value between separators",,"should be null"
"quoted blanks"," ","should be blanks"
"unquoted blanks between separators", ,"should be null"
"unquoted blanks", ,"should be null"
"unquoted string",no quote,"should be 'no quote'"
"quoted separator","a,b,c","should be 'a,b,c'"
"keep extra blanks", test string , "should be ' test string '"