From 7d1b34477e9c39774461fff7842faf5fa577b24e Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sat, 12 Oct 2013 14:49:52 +0200 Subject: [PATCH] Implement CSV option keep|trim unquoted blanks. --- csv.lisp | 18 ++++++++++++------ parser.lisp | 13 ++++++++++++- test/csv-empty-as-null.load | 5 +++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/csv.lisp b/csv.lisp index 4475a73..64122b4 100644 --- a/csv.lisp +++ b/csv.lisp @@ -124,7 +124,8 @@ (skip-lines nil) (separator #\Tab) (quote cl-csv:*quote*) - (escape cl-csv:*quote-escape*)) + (escape cl-csv:*quote-escape*) + (trim-blanks cl-csv:*trim-blanks*)) "Load data from a text file in CSV format, with support for advanced projecting capabilities. See `project-fields' for details. @@ -176,7 +177,8 @@ :row-fn (compile nil reformat-then-process) :separator separator :quote quote - :escape escape) + :escape escape + :trim-blanks trim-blanks) ((or cl-csv:csv-parse-error type-error) (condition) ;; some form of parse error did happen, TODO: log it (progn @@ -193,7 +195,8 @@ skip-lines (separator #\Tab) (quote cl-csv:*quote*) - (escape cl-csv:*quote-escape*)) + (escape cl-csv:*quote-escape*) + (trim-blanks cl-csv:*trim-blanks*)) "Copy data from CSV FILENAME into lprallel.queue DATAQ" (let ((read (pgloader.queue:map-push-queue dataq @@ -205,7 +208,8 @@ :skip-lines skip-lines :separator separator :quote quote - :escape escape))) + :escape escape + :trim-blanks trim-blanks))) (pgstate-incf *state* table-name :read read))) (defun copy-from-file (dbname table-name filename-or-regex @@ -222,7 +226,8 @@ (encoding :utf-8) (separator #\Tab) (quote cl-csv:*quote*) - (escape cl-csv:*quote-escape*)) + (escape cl-csv:*quote-escape*) + (trim-blanks cl-csv:*trim-blanks*)) "Copy data from CSV file FILENAME into PostgreSQL DBNAME.TABLE-NAME" (let* ((summary (null *state*)) (*state* (or *state* (pgloader.utils:make-pgstate))) @@ -242,7 +247,8 @@ :skip-lines skip-lines :separator separator :quote quote - :escape escape) + :escape escape + :trim-blanks trim-blanks) ;; and start another task to push that data from the queue to PostgreSQL (lp:submit-task channel diff --git a/parser.lisp b/parser.lisp index e7fd3c0..78c7327 100644 --- a/parser.lisp +++ b/parser.lisp @@ -156,6 +156,9 @@ Here's a quick description of the format we're parsing here: (def-keyword-rule "blanks") (def-keyword-rule "date") (def-keyword-rule "format") + (def-keyword-rule "keep") + (def-keyword-rule "trim") + (def-keyword-rule "unquoted") ;; option for MySQL imports (def-keyword-rule "schema") (def-keyword-rule "only") @@ -999,12 +1002,20 @@ Here's a quick description of the format we're parsing here: (declare (ignore fields )) sep))) +(defrule option-keep-unquoted-blanks (and kw-keep kw-unquoted kw-blanks) + (:constant (cons :trim-blanks nil))) + +(defrule option-trim-unquoted-blanks (and kw-trim kw-unquoted kw-blanks) + (:constant (cons :trim-blanks t))) + (defrule csv-option (or option-truncate option-skip-header option-fields-not-enclosed option-fields-enclosed-by option-fields-escaped-by - option-fields-terminated-by)) + option-fields-terminated-by + option-trim-unquoted-blanks + option-keep-unquoted-blanks)) (defrule another-csv-option (and #\, ignore-whitespace csv-option) (:lambda (source) diff --git a/test/csv-empty-as-null.load b/test/csv-empty-as-null.load index febd638..92ef9f3 100644 --- a/test/csv-empty-as-null.load +++ b/test/csv-empty-as-null.load @@ -2,6 +2,7 @@ LOAD CSV FROM INLINE INTO postgresql://dim@localhost:54393/dim?nulls (f1, f2, f3) WITH truncate, + keep unquoted blanks, fields optionally enclosed by '"', fields escaped by double-quote, fields terminated by ',' @@ -13,7 +14,7 @@ LOAD CSV "quoted empty string","","should be empty string" "no value between separators",,"should be null" "quoted blanks"," ","should be blanks" -"unquoted blanks between separators", ,"should be null" +"unquoted blanks", ,"should be null" "unquoted string",no quote,"should be 'no quote'" "quoted separator","a,b,c","should be 'a,b,c'" - +"keep extra blanks", test string , "should be ' test string '"