From 04ddf940d9551e72a23918e0213acbbac42fcebc Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Thu, 20 Aug 2015 18:17:18 +0200
Subject: [PATCH] Left pad COPY octal chars with 0, fix #275.

The COPY TEXT format accepts non printable characters with an escaped
sequence wherin pgloader can pass in the octal number for the character
in its encoding. When doing that with small numbers like \6 and the
non-printable character is then followed by other numbers, then it
becomes e.g. \646 which might not be part of the target encoding...

To fix, always left pad the character octal number with zeroes, so that
we now send in \00646 which COPY knows how to read: the char at \006
then 4 then 6.

Also copy the test case over to pgloader and run it in the test suite.
---
 src/pgsql/copy-format.lisp                  |  2 +-
 test/Makefile                               |  1 +
 test/csv-non-printable.load                 | 29 +++++++++++++++++++++
 test/regress/expected/csv-non-printable.out |  7 +++++
 4 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 test/csv-non-printable.load
 create mode 100644 test/regress/expected/csv-non-printable.out

diff --git a/src/pgsql/copy-format.lisp b/src/pgsql/copy-format.lisp
index 92e97ca..bf104d7 100644
--- a/src/pgsql/copy-format.lisp
+++ b/src/pgsql/copy-format.lisp
@@ -78,7 +78,7 @@ details about the format, and format specs."
                            (t
                             (if (<= 32 byte 127)
                                 (write-bytes (code-char byte))
-                                (write-bytes (format nil "\\~o" byte))))))))
+                                (write-bytes (format nil "\\~3,'0o" byte))))))))
          when more? do (write-bytes #\Tab)
          finally       (progn (write-bytes #\Newline)
                               (return bytes))))))
diff --git a/test/Makefile b/test/Makefile
index bee62aa..a6cc71a 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -14,6 +14,7 @@ REGRESS= allcols.load 		    \
          csv-header.load            \
          csv-json.load              \
          csv-keep-extra-blanks.load \
+         csv-non-printable.load     \
          csv-nulls.load             \
          csv-trim-extra-blanks.load \
          csv.load 		    \
diff --git a/test/csv-non-printable.load b/test/csv-non-printable.load
new file mode 100644
index 0000000..f172b4f
--- /dev/null
+++ b/test/csv-non-printable.load
@@ -0,0 +1,29 @@
+--
+-- From https://github.com/dimitri/pgloader/issues/275
+--
+
+LOAD CSV
+   FROM inline with encoding 'LATIN1'
+   INTO postgresql:///pgloader?tab_csv
+   WITH truncate,
+        skip header = 0,
+        fields terminated by ',',
+        fields optionally enclosed by '"'
+
+   BEFORE LOAD DO
+     $$ drop table if exists tab_csv; $$,
+     $$ create table tab_csv
+          (c1 varchar(100),
+           c2 varchar(100),
+           c3 varchar(100));
+     $$
+
+;
+
+col1, 45, "005 45 works"
+col1, 44, "006 44 Fails 0xa5"
+col1, 45, "006 45 Fails 0xa6"
+col1, "45", "006 45 Fails using escape 0xa6"
+col1, 46, "006 46 Fails 0xa7"
+col1, 47, "006 47 Fails 0xa8"
+col1, 4T works, "006 followed by 4 works ? why"
\ No newline at end of file
diff --git a/test/regress/expected/csv-non-printable.out b/test/regress/expected/csv-non-printable.out
new file mode 100644
index 0000000..17308b0
--- /dev/null
+++ b/test/regress/expected/csv-non-printable.out
@@ -0,0 +1,7 @@
+col1	45	005 45 works
+col1	44	006 44 Fails 0xa5
+col1	45	006 45 Fails 0xa6
+col1	45	006 45 Fails using escape 0xa6
+col1	46	006 46 Fails 0xa7
+col1	47	006 47 Fails 0xa8
+col1	4T works	006 followed by 4 works ? why