From 04ddf940d9551e72a23918e0213acbbac42fcebc Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 20 Aug 2015 18:17:18 +0200 Subject: [PATCH] Left pad COPY octal chars with 0, fix #275. The COPY TEXT format accepts non printable characters with an escaped sequence wherin pgloader can pass in the octal number for the character in its encoding. When doing that with small numbers like \6 and the non-printable character is then followed by other numbers, then it becomes e.g. \646 which might not be part of the target encoding... To fix, always left pad the character octal number with zeroes, so that we now send in \00646 which COPY knows how to read: the char at \006 then 4 then 6. Also copy the test case over to pgloader and run it in the test suite. --- src/pgsql/copy-format.lisp | 2 +- test/Makefile | 1 + test/csv-non-printable.load | 29 +++++++++++++++++++++ test/regress/expected/csv-non-printable.out | 7 +++++ 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test/csv-non-printable.load create mode 100644 test/regress/expected/csv-non-printable.out diff --git a/src/pgsql/copy-format.lisp b/src/pgsql/copy-format.lisp index 92e97ca..bf104d7 100644 --- a/src/pgsql/copy-format.lisp +++ b/src/pgsql/copy-format.lisp @@ -78,7 +78,7 @@ details about the format, and format specs." (t (if (<= 32 byte 127) (write-bytes (code-char byte)) - (write-bytes (format nil "\\~o" byte)))))))) + (write-bytes (format nil "\\~3,'0o" byte)))))))) when more? do (write-bytes #\Tab) finally (progn (write-bytes #\Newline) (return bytes)))))) diff --git a/test/Makefile b/test/Makefile index bee62aa..a6cc71a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,6 +14,7 @@ REGRESS= allcols.load \ csv-header.load \ csv-json.load \ csv-keep-extra-blanks.load \ + csv-non-printable.load \ csv-nulls.load \ csv-trim-extra-blanks.load \ csv.load \ diff --git a/test/csv-non-printable.load b/test/csv-non-printable.load new file mode 100644 index 0000000..f172b4f --- /dev/null +++ b/test/csv-non-printable.load @@ -0,0 +1,29 @@ +-- +-- From https://github.com/dimitri/pgloader/issues/275 +-- + +LOAD CSV + FROM inline with encoding 'LATIN1' + INTO postgresql:///pgloader?tab_csv + WITH truncate, + skip header = 0, + fields terminated by ',', + fields optionally enclosed by '"' + + BEFORE LOAD DO + $$ drop table if exists tab_csv; $$, + $$ create table tab_csv + (c1 varchar(100), + c2 varchar(100), + c3 varchar(100)); + $$ + +; + +col1, 45, "005 45 works" +col1, 44, "006 44 Fails 0xa5" +col1, 45, "006 45 Fails 0xa6" +col1, "45", "006 45 Fails using escape 0xa6" +col1, 46, "006 46 Fails 0xa7" +col1, 47, "006 47 Fails 0xa8" +col1, 4T works, "006 followed by 4 works ? why" \ No newline at end of file diff --git a/test/regress/expected/csv-non-printable.out b/test/regress/expected/csv-non-printable.out new file mode 100644 index 0000000..17308b0 --- /dev/null +++ b/test/regress/expected/csv-non-printable.out @@ -0,0 +1,7 @@ +col1 45 005 45 works +col1 44 006 44 Fails 0xa5 +col1 45 006 45 Fails 0xa6 +col1 45 006 45 Fails using escape 0xa6 +col1 46 006 46 Fails 0xa7 +col1 47 006 47 Fails 0xa8 +col1 4T works 006 followed by 4 works ? why