From e9e9e364b0a089209198e28e42d4554e3e8a6353 Mon Sep 17 00:00:00 2001
From: Dimitri Fontaine <dim@tapoueh.org>
Date: Mon, 26 May 2014 15:04:06 +0200
Subject: [PATCH] Add optional clauses USING FIELDS and TARGET COLUMNS.

---
 pgloader.1                     |  4 +++-
 pgloader.1.md                  |  5 ++++-
 src/parser.lisp                | 21 +++++++++++++++------
 test/csv-districts.load        |  2 ++
 test/csv-filename-pattern.load |  4 +++-
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/pgloader.1 b/pgloader.1
index f8b33bb..46afd02 100644
--- a/pgloader.1
+++ b/pgloader.1
@@ -410,10 +410,12 @@ This command instructs pgloader to load data from a \fBCSV\fR file\. Here\'s an
 
 LOAD CSV
    FROM \'GeoLiteCity\-Blocks\.csv\' WITH ENCODING iso\-646\-us
+        HAVING FIELDS
         (
            startIpNum, endIpNum, locId
         )
    INTO postgresql://user@localhost:54393/dbname?geolite\.blocks
+        TARGET COLUMNS
         (
            iprange ip4r using (ip\-range startIpNum endIpNum),
            locId
@@ -481,7 +483,7 @@ The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk
 .IP "" 0
 .
 .IP
-The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file\.
+The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file, optionally introduced by the clause \fBHAVING FIELDS\fR\.
 .
 .IP
 Each field name can be either only one name or a name following with specific reader options for that field\. Supported per\-field reader options are:
diff --git a/pgloader.1.md b/pgloader.1.md
index 4bbdb69..b875e63 100644
--- a/pgloader.1.md
+++ b/pgloader.1.md
@@ -363,10 +363,12 @@ example:
 
     LOAD CSV
        FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us
+            HAVING FIELDS
             (
                startIpNum, endIpNum, locId
             )
        INTO postgresql://user@localhost:54393/dbname?geolite.blocks
+            TARGET COLUMNS
             (
                iprange ip4r using (ip-range startIpNum endIpNum),
                locId
@@ -419,7 +421,8 @@ The `csv` format command accepts the following clauses and options:
         directory must exists.
 
 	The *FROM* option also supports an optional comma separated list of
-	*field* names describing what is expected in the `CSV` data file.
+	*field* names describing what is expected in the `CSV` data file,
+	optionally introduced by the clause `HAVING FIELDS`.
 
 	Each field name can be either only one name or a name following with
 	specific reader options for that field. Supported per-field reader
diff --git a/src/parser.lisp b/src/parser.lisp
index 0d4a85d..0d28a14 100644
--- a/src/parser.lisp
+++ b/src/parser.lisp
@@ -77,6 +77,8 @@
   (def-keyword-rule "registering")
   (def-keyword-rule "cast")
   (def-keyword-rule "column")
+  (def-keyword-rule "target")
+  (def-keyword-rule "columns")
   (def-keyword-rule "type")
   (def-keyword-rule "extra")
   (def-keyword-rule "include")
@@ -102,6 +104,7 @@
   (def-keyword-rule "decoding")
   (def-keyword-rule "truncate")
   (def-keyword-rule "lines")
+  (def-keyword-rule "having")
   (def-keyword-rule "fields")
   (def-keyword-rule "optionally")
   (def-keyword-rule "enclosed")
@@ -1524,10 +1527,13 @@ load database
 (defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
   (:constant :close-paren))
 
-(defrule csv-source-field-list (and open-paren csv-source-fields close-paren)
+(defrule having-fields (and kw-having kw-fields) (:constant nil))
+
+(defrule csv-source-field-list (and (? having-fields)
+                                    open-paren csv-source-fields close-paren)
   (:lambda (source)
-    (destructuring-bind (open field-defs close) source
-      (declare (ignore open close))
+    (destructuring-bind (having open field-defs close) source
+      (declare (ignore having open close))
       field-defs)))
 
 ;;
@@ -1607,10 +1613,13 @@ load database
     (destructuring-bind (col1 cols) source
       (list* col1 cols))))
 
-(defrule csv-target-column-list (and open-paren csv-target-columns close-paren)
+(defrule target-columns (and kw-target kw-columns) (:constant nil))
+
+(defrule csv-target-column-list (and (? target-columns)
+                                     open-paren csv-target-columns close-paren)
   (:lambda (source)
-    (destructuring-bind (open columns close) source
-      (declare (ignore open close))
+    (destructuring-bind (target-columns open columns close) source
+      (declare (ignore target-columns open close))
       columns)))
 ;;
 ;; The main command parsing
diff --git a/test/csv-districts.load b/test/csv-districts.load
index 8f6cb9b..3787f32 100644
--- a/test/csv-districts.load
+++ b/test/csv-districts.load
@@ -9,6 +9,7 @@
 
 LOAD CSV
      FROM data/2013_Gaz_113CDs_national.txt
+     HAVING FIELDS
       (
          usps,          -- United States Postal Service State Abbreviation
          geoid,         -- Geographic Identifier
@@ -21,6 +22,7 @@ LOAD CSV
       )
 
      INTO postgresql:///pgloader?districts
+     TARGET COLUMNS
       (
          usps, geoid, aland, awater, aland_sqmi, awater_sqmi,
          location point using (format nil "(~a,~a)" intptlong intptlat)
diff --git a/test/csv-filename-pattern.load b/test/csv-filename-pattern.load
index bed0ce4..e283ff6 100644
--- a/test/csv-filename-pattern.load
+++ b/test/csv-filename-pattern.load
@@ -1,5 +1,7 @@
 load csv
-    from all filenames matching ~<matching.*csv$> in directory 'data' (id, field)
+    from all filenames matching ~<matching.*csv$>
+         in directory 'data'
+         having fields (id, field)
     into postgresql:///pgloader?matching
     with fields optionally enclosed by '"', 
          fields terminated by ',',