Allow IN DIRECTORY sub-clause for the FILENAME MATCHING clause.

With this the user is now able to have a way about where the files are going to be read and matched against the regular expression. It used not to be necessary in the archive expansion mode, but is required now that the feature is exposed in more cases.
2026-05-04 18:36:12 +02:00 · 2014-05-26 14:45:12 +02:00 · 2014-05-26 14:45:12 +02:00 · b17383fa90
commit b17383fa90
parent a0518f2e98
5 changed files with 40 additions and 6 deletions
--- a/pgloader.1
+++ b/pgloader.1
@ -464,7 +464,9 @@ The whole \fImatching\fR clause must follow the following rule:
 .
 .nf

-[ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<>
+[ ALL FILENAMES | [ FIRST ] FILENAME ]
+MATCHING regexp
+[ IN DIRECTORY \'\.\.\.\' ]
 .
 .fi
 .
@ -473,6 +475,9 @@ The whole \fImatching\fR clause must follow the following rule:
 .IP
 The \fImatching\fR clause applies given \fIregular expression\fR (see above for exact syntax, several options can be used here) to filenames\. It\'s then possible to load data from only the first match of all of them\.
 .
+.IP
+The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk for finding the data files, and can be either relative to where the command file is read from, or absolute\. The given directory must exists\.
+.
 .IP "" 0
 .
 .IP
--- a/pgloader.1.md
+++ b/pgloader.1.md
@ -404,12 +404,19 @@ The `csv` format command accepts the following clauses and options:

        The whole *matching* clause must follow the following rule:

-	        [ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<>
+	        [ ALL FILENAMES | [ FIRST ] FILENAME ]
+            MATCHING regexp
+            [ IN DIRECTORY '...' ]

        The *matching* clause applies given *regular expression* (see above
        for exact syntax, several options can be used here) to filenames.
        It's then possible to load data from only the first match of all of
        them.
+        
+        The optional *IN DIRECTORY* clause allows specifying which directory
+        to walk for finding the data files, and can be either relative to
+        where the command file is read from, or absolute. The given
+        directory must exists.

 	The *FROM* option also supports an optional comma separated list of
 	*field* names describing what is expected in the `CSV` data file.
--- a/src/parser.lisp
+++ b/src/parser.lisp
@ -73,6 +73,7 @@
  (def-keyword-rule "messages")
  (def-keyword-rule "matches")
  (def-keyword-rule "in")
+  (def-keyword-rule "directory")
  (def-keyword-rule "registering")
  (def-keyword-rule "cast")
  (def-keyword-rule "column")
@ -1654,7 +1655,29 @@ load database
      ;; as a string
      (list* :regex :all (cdr regex)))))

-(defrule filename-matching (or first-filename-matching all-filename-matching))
+(defrule in-directory (and kw-in kw-directory maybe-quoted-filename)
+  (:lambda (in-d)
+    (destructuring-bind (in d dir) in-d
+      (declare (ignore in d))
+      dir)))
+
+(defrule filename-matching (and (or first-filename-matching
+                                    all-filename-matching)
+                                (? in-directory))
+  (:lambda (filename-matching)
+    (destructuring-bind (matching directory) filename-matching
+      (let ((directory (or directory `(:filename ,*cwd*))))
+        (destructuring-bind (m-type first-or-all regex) matching
+          (assert (eq m-type :regex))
+          (destructuring-bind (d-type dir) directory
+            (assert (eq d-type :filename))
+            (let ((root (uiop:directory-exists-p
+                         (if (uiop:absolute-pathname-p dir) dir
+                             (uiop:merge-pathnames* dir *cwd*)))))
+              (unless root
+                (error "Directory ~s does not exists."
+                       (uiop:native-namestring dir)))
+             `(:regex ,first-or-all ,regex ,root))))))))

 (defrule csv-file-source (or stdin
 			     inline
@ -1698,7 +1721,6 @@ load database
 		  (summary       (null *state*))
 		  (*state*       (or *state* (pgloader.utils:make-pgstate)))
 		  (state-after   ,(when after `(pgloader.utils:make-pgstate)))
-                  (*csv-path-root* ,*cwd*)
                  ,@(pgsql-connection-bindings pg-db-uri gucs)
                  ,@(batch-control-bindings options))

--- a/src/sources/sources.lisp
+++ b/src/sources/sources.lisp
@ -188,7 +188,7 @@
    (ecase type
      (:inline   (car part))		; because of &rest
      (:stdin    *standard-input*)
-      (:regex    (destructuring-bind (keep regex) part
+      (:regex    (destructuring-bind (keep regex root) part
 		   (filter-directory regex :keep keep :root root)))
      (:filename (let* ((filename (first part))
                        (realname
--- a/test/csv-filename-pattern.load
+++ b/test/csv-filename-pattern.load
@ -1,5 +1,5 @@
 load csv
-    from all filenames matching ~<data/matching.*csv$> (id, field)
+    from all filenames matching ~<matching.*csv$> in directory 'data' (id, field)
    into postgresql:///pgloader?matching
    with fields optionally enclosed by '"', 
         fields terminated by ',',