From e9e9e364b0a089209198e28e42d4554e3e8a6353 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 26 May 2014 15:04:06 +0200 Subject: [PATCH] Add optional clauses USING FIELDS and TARGET COLUMNS. --- pgloader.1 | 4 +++- pgloader.1.md | 5 ++++- src/parser.lisp | 21 +++++++++++++++------ test/csv-districts.load | 2 ++ test/csv-filename-pattern.load | 4 +++- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pgloader.1 b/pgloader.1 index f8b33bb..46afd02 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -410,10 +410,12 @@ This command instructs pgloader to load data from a \fBCSV\fR file\. Here\'s an LOAD CSV FROM \'GeoLiteCity\-Blocks\.csv\' WITH ENCODING iso\-646\-us + HAVING FIELDS ( startIpNum, endIpNum, locId ) INTO postgresql://user@localhost:54393/dbname?geolite\.blocks + TARGET COLUMNS ( iprange ip4r using (ip\-range startIpNum endIpNum), locId @@ -481,7 +483,7 @@ The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk .IP "" 0 . .IP -The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file\. +The \fIFROM\fR option also supports an optional comma separated list of \fIfield\fR names describing what is expected in the \fBCSV\fR data file, optionally introduced by the clause \fBHAVING FIELDS\fR\. . .IP Each field name can be either only one name or a name following with specific reader options for that field\. Supported per\-field reader options are: diff --git a/pgloader.1.md b/pgloader.1.md index 4bbdb69..b875e63 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -363,10 +363,12 @@ example: LOAD CSV FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us + HAVING FIELDS ( startIpNum, endIpNum, locId ) INTO postgresql://user@localhost:54393/dbname?geolite.blocks + TARGET COLUMNS ( iprange ip4r using (ip-range startIpNum endIpNum), locId @@ -419,7 +421,8 @@ The `csv` format command accepts the following clauses and options: directory must exists. The *FROM* option also supports an optional comma separated list of - *field* names describing what is expected in the `CSV` data file. + *field* names describing what is expected in the `CSV` data file, + optionally introduced by the clause `HAVING FIELDS`. Each field name can be either only one name or a name following with specific reader options for that field. Supported per-field reader diff --git a/src/parser.lisp b/src/parser.lisp index 0d4a85d..0d28a14 100644 --- a/src/parser.lisp +++ b/src/parser.lisp @@ -77,6 +77,8 @@ (def-keyword-rule "registering") (def-keyword-rule "cast") (def-keyword-rule "column") + (def-keyword-rule "target") + (def-keyword-rule "columns") (def-keyword-rule "type") (def-keyword-rule "extra") (def-keyword-rule "include") @@ -102,6 +104,7 @@ (def-keyword-rule "decoding") (def-keyword-rule "truncate") (def-keyword-rule "lines") + (def-keyword-rule "having") (def-keyword-rule "fields") (def-keyword-rule "optionally") (def-keyword-rule "enclosed") @@ -1524,10 +1527,13 @@ load database (defrule close-paren (and ignore-whitespace #\) ignore-whitespace) (:constant :close-paren)) -(defrule csv-source-field-list (and open-paren csv-source-fields close-paren) +(defrule having-fields (and kw-having kw-fields) (:constant nil)) + +(defrule csv-source-field-list (and (? having-fields) + open-paren csv-source-fields close-paren) (:lambda (source) - (destructuring-bind (open field-defs close) source - (declare (ignore open close)) + (destructuring-bind (having open field-defs close) source + (declare (ignore having open close)) field-defs))) ;; @@ -1607,10 +1613,13 @@ load database (destructuring-bind (col1 cols) source (list* col1 cols)))) -(defrule csv-target-column-list (and open-paren csv-target-columns close-paren) +(defrule target-columns (and kw-target kw-columns) (:constant nil)) + +(defrule csv-target-column-list (and (? target-columns) + open-paren csv-target-columns close-paren) (:lambda (source) - (destructuring-bind (open columns close) source - (declare (ignore open close)) + (destructuring-bind (target-columns open columns close) source + (declare (ignore target-columns open close)) columns))) ;; ;; The main command parsing diff --git a/test/csv-districts.load b/test/csv-districts.load index 8f6cb9b..3787f32 100644 --- a/test/csv-districts.load +++ b/test/csv-districts.load @@ -9,6 +9,7 @@ LOAD CSV FROM data/2013_Gaz_113CDs_national.txt + HAVING FIELDS ( usps, -- United States Postal Service State Abbreviation geoid, -- Geographic Identifier @@ -21,6 +22,7 @@ LOAD CSV ) INTO postgresql:///pgloader?districts + TARGET COLUMNS ( usps, geoid, aland, awater, aland_sqmi, awater_sqmi, location point using (format nil "(~a,~a)" intptlong intptlat) diff --git a/test/csv-filename-pattern.load b/test/csv-filename-pattern.load index bed0ce4..e283ff6 100644 --- a/test/csv-filename-pattern.load +++ b/test/csv-filename-pattern.load @@ -1,5 +1,7 @@ load csv - from all filenames matching ~ in directory 'data' (id, field) + from all filenames matching ~ + in directory 'data' + having fields (id, field) into postgresql:///pgloader?matching with fields optionally enclosed by '"', fields terminated by ',',