diff --git a/pgloader.1 b/pgloader.1 index 4cd029a..f8b33bb 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -464,7 +464,9 @@ The whole \fImatching\fR clause must follow the following rule: . .nf -[ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<> +[ ALL FILENAMES | [ FIRST ] FILENAME ] +MATCHING regexp +[ IN DIRECTORY \'\.\.\.\' ] . .fi . @@ -473,6 +475,9 @@ The whole \fImatching\fR clause must follow the following rule: .IP The \fImatching\fR clause applies given \fIregular expression\fR (see above for exact syntax, several options can be used here) to filenames\. It\'s then possible to load data from only the first match of all of them\. . +.IP +The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk for finding the data files, and can be either relative to where the command file is read from, or absolute\. The given directory must exists\. +. .IP "" 0 . .IP diff --git a/pgloader.1.md b/pgloader.1.md index ff2deaf..4bbdb69 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -404,12 +404,19 @@ The `csv` format command accepts the following clauses and options: The whole *matching* clause must follow the following rule: - [ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<> + [ ALL FILENAMES | [ FIRST ] FILENAME ] + MATCHING regexp + [ IN DIRECTORY '...' ] The *matching* clause applies given *regular expression* (see above for exact syntax, several options can be used here) to filenames. It's then possible to load data from only the first match of all of them. + + The optional *IN DIRECTORY* clause allows specifying which directory + to walk for finding the data files, and can be either relative to + where the command file is read from, or absolute. The given + directory must exists. The *FROM* option also supports an optional comma separated list of *field* names describing what is expected in the `CSV` data file. diff --git a/src/parser.lisp b/src/parser.lisp index 3f2906e..0d4a85d 100644 --- a/src/parser.lisp +++ b/src/parser.lisp @@ -73,6 +73,7 @@ (def-keyword-rule "messages") (def-keyword-rule "matches") (def-keyword-rule "in") + (def-keyword-rule "directory") (def-keyword-rule "registering") (def-keyword-rule "cast") (def-keyword-rule "column") @@ -1654,7 +1655,29 @@ load database ;; as a string (list* :regex :all (cdr regex))))) -(defrule filename-matching (or first-filename-matching all-filename-matching)) +(defrule in-directory (and kw-in kw-directory maybe-quoted-filename) + (:lambda (in-d) + (destructuring-bind (in d dir) in-d + (declare (ignore in d)) + dir))) + +(defrule filename-matching (and (or first-filename-matching + all-filename-matching) + (? in-directory)) + (:lambda (filename-matching) + (destructuring-bind (matching directory) filename-matching + (let ((directory (or directory `(:filename ,*cwd*)))) + (destructuring-bind (m-type first-or-all regex) matching + (assert (eq m-type :regex)) + (destructuring-bind (d-type dir) directory + (assert (eq d-type :filename)) + (let ((root (uiop:directory-exists-p + (if (uiop:absolute-pathname-p dir) dir + (uiop:merge-pathnames* dir *cwd*))))) + (unless root + (error "Directory ~s does not exists." + (uiop:native-namestring dir))) + `(:regex ,first-or-all ,regex ,root)))))))) (defrule csv-file-source (or stdin inline @@ -1698,7 +1721,6 @@ load database (summary (null *state*)) (*state* (or *state* (pgloader.utils:make-pgstate))) (state-after ,(when after `(pgloader.utils:make-pgstate))) - (*csv-path-root* ,*cwd*) ,@(pgsql-connection-bindings pg-db-uri gucs) ,@(batch-control-bindings options)) diff --git a/src/sources/sources.lisp b/src/sources/sources.lisp index 4342849..e8507a5 100644 --- a/src/sources/sources.lisp +++ b/src/sources/sources.lisp @@ -188,7 +188,7 @@ (ecase type (:inline (car part)) ; because of &rest (:stdin *standard-input*) - (:regex (destructuring-bind (keep regex) part + (:regex (destructuring-bind (keep regex root) part (filter-directory regex :keep keep :root root))) (:filename (let* ((filename (first part)) (realname diff --git a/test/csv-filename-pattern.load b/test/csv-filename-pattern.load index 548782b..bed0ce4 100644 --- a/test/csv-filename-pattern.load +++ b/test/csv-filename-pattern.load @@ -1,5 +1,5 @@ load csv - from all filenames matching ~ (id, field) + from all filenames matching ~ in directory 'data' (id, field) into postgresql:///pgloader?matching with fields optionally enclosed by '"', fields terminated by ',',