Add optional clauses USING FIELDS and TARGET COLUMNS.

2026-05-05 19:06:11 +02:00 · 2014-05-26 15:04:06 +02:00 · 2014-05-26 15:04:06 +02:00 · e9e9e364b0
commit e9e9e364b0
parent b17383fa90
5 changed files with 27 additions and 9 deletions
--- a/pgloader.1
+++ b/pgloader.1
@ -410,10 +410,12 @@ This command instructs pgloader to load data from a \fBCSV\fR file\. Here\'s an

 LOAD CSV
   FROM \'GeoLiteCity\-Blocks\.csv\' WITH ENCODING iso\-646\-us
+        HAVING FIELDS
        (
           startIpNum, endIpNum, locId
        )
   INTO postgresql://user@localhost:54393/dbname?geolite\.blocks
+        TARGET COLUMNS
        (
           iprange ip4r using (ip\-range startIpNum endIpNum),
           locId
@ -481,7 +483,7 @@ The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk
 .IP "" 0
 .
 .IP
-The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file\.
+The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file, optionally introduced by the clause \fBHAVING FIELDS\fR\.
 .
 .IP
 Each field name can be either only one name or a name following with specific reader options for that field\. Supported per\-field reader options are:
--- a/pgloader.1.md
+++ b/pgloader.1.md
@ -363,10 +363,12 @@ example:

    LOAD CSV
       FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us
+            HAVING FIELDS
            (
               startIpNum, endIpNum, locId
            )
       INTO postgresql://user@localhost:54393/dbname?geolite.blocks
+            TARGET COLUMNS
            (
               iprange ip4r using (ip-range startIpNum endIpNum),
               locId
@ -419,7 +421,8 @@ The `csv` format command accepts the following clauses and options:
        directory must exists.

 	The *FROM* option also supports an optional comma separated list of
-	*field* names describing what is expected in the `CSV` data file.
+	*field* names describing what is expected in the `CSV` data file,
+	optionally introduced by the clause `HAVING FIELDS`.

 	Each field name can be either only one name or a name following with
 	specific reader options for that field. Supported per-field reader
--- a/src/parser.lisp
+++ b/src/parser.lisp
@ -77,6 +77,8 @@
  (def-keyword-rule "registering")
  (def-keyword-rule "cast")
  (def-keyword-rule "column")
+  (def-keyword-rule "target")
+  (def-keyword-rule "columns")
  (def-keyword-rule "type")
  (def-keyword-rule "extra")
  (def-keyword-rule "include")
@ -102,6 +104,7 @@
  (def-keyword-rule "decoding")
  (def-keyword-rule "truncate")
  (def-keyword-rule "lines")
+  (def-keyword-rule "having")
  (def-keyword-rule "fields")
  (def-keyword-rule "optionally")
  (def-keyword-rule "enclosed")
@ -1524,10 +1527,13 @@ load database
 (defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
  (:constant :close-paren))

-(defrule csv-source-field-list (and open-paren csv-source-fields close-paren)
+(defrule having-fields (and kw-having kw-fields) (:constant nil))
+
+(defrule csv-source-field-list (and (? having-fields)
+                                    open-paren csv-source-fields close-paren)
  (:lambda (source)
-    (destructuring-bind (open field-defs close) source
-      (declare (ignore open close))
+    (destructuring-bind (having open field-defs close) source
+      (declare (ignore having open close))
      field-defs)))

 ;;
@ -1607,10 +1613,13 @@ load database
    (destructuring-bind (col1 cols) source
      (list* col1 cols))))

-(defrule csv-target-column-list (and open-paren csv-target-columns close-paren)
+(defrule target-columns (and kw-target kw-columns) (:constant nil))
+
+(defrule csv-target-column-list (and (? target-columns)
+                                     open-paren csv-target-columns close-paren)
  (:lambda (source)
-    (destructuring-bind (open columns close) source
-      (declare (ignore open close))
+    (destructuring-bind (target-columns open columns close) source
+      (declare (ignore target-columns open close))
      columns)))
 ;;
 ;; The main command parsing
--- a/test/csv-districts.load
+++ b/test/csv-districts.load
@ -9,6 +9,7 @@

 LOAD CSV
     FROM data/2013_Gaz_113CDs_national.txt
+     HAVING FIELDS
      (
         usps,          -- United States Postal Service State Abbreviation
         geoid,         -- Geographic Identifier
@ -21,6 +22,7 @@ LOAD CSV
      )

     INTO postgresql:///pgloader?districts
+     TARGET COLUMNS
      (
         usps, geoid, aland, awater, aland_sqmi, awater_sqmi,
         location point using (format nil "(~a,~a)" intptlong intptlat)
--- a/test/csv-filename-pattern.load
+++ b/test/csv-filename-pattern.load
@ -1,5 +1,7 @@
 load csv
-    from all filenames matching ~<matching.*csv$> in directory 'data' (id, field)
+    from all filenames matching ~<matching.*csv$>
+         in directory 'data'
+         having fields (id, field)
    into postgresql:///pgloader?matching
    with fields optionally enclosed by '"', 
         fields terminated by ',',