From 643875a26669de24d57a22d5367364a26e4a7fa0 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sat, 8 Feb 2014 17:51:15 +0100 Subject: [PATCH] Improve CSV error handling, thanks to cl-csv continue restart. --- src/sources/csv.lisp | 29 +++++++++++++++-------------- test/csv-error.load | 25 +++++++++++++++++++++++++ test/errors.load | 4 +++- 3 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 test/csv-error.load diff --git a/src/sources/csv.lisp b/src/sources/csv.lisp index 5333be4..047112c 100644 --- a/src/sources/csv.lisp +++ b/src/sources/csv.lisp @@ -95,20 +95,21 @@ :columns (columns csv) :target (target csv) :process-row-fn process-row-fn))) - (handler-case - (cl-csv:read-csv input - :row-fn (compile nil reformat-then-process) - :separator (csv-separator csv) - :quote (csv-quote csv) - :escape (csv-escape csv) - :unquoted-empty-string-is-nil t - :quoted-empty-string-is-nil nil - :trim-outer-whitespace (csv-trim-blanks csv) - :newline (csv-newline csv)) - ((or cl-csv:csv-parse-error) (condition) - (progn - (log-message :error "~a" condition) - (pgstate-setf *state* (target csv) :errs -1)))))))))) + (handler-case + (handler-bind ((cl-csv:csv-parse-error #'cl-csv::continue)) + (cl-csv:read-csv input + :row-fn (compile nil reformat-then-process) + :separator (csv-separator csv) + :quote (csv-quote csv) + :escape (csv-escape csv) + :unquoted-empty-string-is-nil t + :quoted-empty-string-is-nil nil + :trim-outer-whitespace (csv-trim-blanks csv) + :newline (csv-newline csv))) + (condition (e) + (progn + (log-message :error "~a" e) + (pgstate-incf *state* (target csv) :errs 1)))))))))) (defmethod copy-to-queue ((csv copy-csv) queue) "Copy data from given CSV definition into lparallel.queue DATAQ" diff --git a/test/csv-error.load b/test/csv-error.load new file mode 100644 index 0000000..1a4a846 --- /dev/null +++ b/test/csv-error.load @@ -0,0 +1,25 @@ +LOAD CSV + FROM INLINE with encoding 'ascii' + INTO postgresql:///pgloader?jordane + + WITH truncate, + fields terminated by '|', + fields not enclosed, + fields escaped by backslash-quote + + SET work_mem to '128MB', + standard_conforming_strings to 'on' + + BEFORE LOAD DO + $$ drop table if exists jordane; $$, + $$ CREATE TABLE jordane + ( + "NOM" character(20), + "PRENOM" character(20) + ) + $$; + +BORDET|Jordane +BORDET|Audrey +JOURDAIN|héhé¶ +BONNIER|testprenombeaucouptroplong diff --git a/test/errors.load b/test/errors.load index 379a386..965e4d8 100644 --- a/test/errors.load +++ b/test/errors.load @@ -11,7 +11,8 @@ * * * Note that we added ragged lines, empty lines, and lines with extra - * columns. + * columns. The last line opens a quoted value and reaches end-of-file + * without closing it, too. */ LOAD CSV @@ -54,3 +55,4 @@ LOAD CSV 9|and another line|2014-01-22| +10|"plop