mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 18:36:12 +02:00
Allow IN DIRECTORY sub-clause for the FILENAME MATCHING clause.
With this the user is now able to have a way about where the files are going to be read and matched against the regular expression. It used not to be necessary in the archive expansion mode, but is required now that the feature is exposed in more cases.
This commit is contained in:
parent
a0518f2e98
commit
b17383fa90
@ -464,7 +464,9 @@ The whole \fImatching\fR clause must follow the following rule:
|
||||
.
|
||||
.nf
|
||||
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<>
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ]
|
||||
MATCHING regexp
|
||||
[ IN DIRECTORY \'\.\.\.\' ]
|
||||
.
|
||||
.fi
|
||||
.
|
||||
@ -473,6 +475,9 @@ The whole \fImatching\fR clause must follow the following rule:
|
||||
.IP
|
||||
The \fImatching\fR clause applies given \fIregular expression\fR (see above for exact syntax, several options can be used here) to filenames\. It\'s then possible to load data from only the first match of all of them\.
|
||||
.
|
||||
.IP
|
||||
The optional \fIIN DIRECTORY\fR clause allows specifying which directory to walk for finding the data files, and can be either relative to where the command file is read from, or absolute\. The given directory must exists\.
|
||||
.
|
||||
.IP "" 0
|
||||
.
|
||||
.IP
|
||||
|
||||
@ -404,12 +404,19 @@ The `csv` format command accepts the following clauses and options:
|
||||
|
||||
The whole *matching* clause must follow the following rule:
|
||||
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING ~<>
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ]
|
||||
MATCHING regexp
|
||||
[ IN DIRECTORY '...' ]
|
||||
|
||||
The *matching* clause applies given *regular expression* (see above
|
||||
for exact syntax, several options can be used here) to filenames.
|
||||
It's then possible to load data from only the first match of all of
|
||||
them.
|
||||
|
||||
The optional *IN DIRECTORY* clause allows specifying which directory
|
||||
to walk for finding the data files, and can be either relative to
|
||||
where the command file is read from, or absolute. The given
|
||||
directory must exists.
|
||||
|
||||
The *FROM* option also supports an optional comma separated list of
|
||||
*field* names describing what is expected in the `CSV` data file.
|
||||
|
||||
@ -73,6 +73,7 @@
|
||||
(def-keyword-rule "messages")
|
||||
(def-keyword-rule "matches")
|
||||
(def-keyword-rule "in")
|
||||
(def-keyword-rule "directory")
|
||||
(def-keyword-rule "registering")
|
||||
(def-keyword-rule "cast")
|
||||
(def-keyword-rule "column")
|
||||
@ -1654,7 +1655,29 @@ load database
|
||||
;; as a string
|
||||
(list* :regex :all (cdr regex)))))
|
||||
|
||||
(defrule filename-matching (or first-filename-matching all-filename-matching))
|
||||
(defrule in-directory (and kw-in kw-directory maybe-quoted-filename)
|
||||
(:lambda (in-d)
|
||||
(destructuring-bind (in d dir) in-d
|
||||
(declare (ignore in d))
|
||||
dir)))
|
||||
|
||||
(defrule filename-matching (and (or first-filename-matching
|
||||
all-filename-matching)
|
||||
(? in-directory))
|
||||
(:lambda (filename-matching)
|
||||
(destructuring-bind (matching directory) filename-matching
|
||||
(let ((directory (or directory `(:filename ,*cwd*))))
|
||||
(destructuring-bind (m-type first-or-all regex) matching
|
||||
(assert (eq m-type :regex))
|
||||
(destructuring-bind (d-type dir) directory
|
||||
(assert (eq d-type :filename))
|
||||
(let ((root (uiop:directory-exists-p
|
||||
(if (uiop:absolute-pathname-p dir) dir
|
||||
(uiop:merge-pathnames* dir *cwd*)))))
|
||||
(unless root
|
||||
(error "Directory ~s does not exists."
|
||||
(uiop:native-namestring dir)))
|
||||
`(:regex ,first-or-all ,regex ,root))))))))
|
||||
|
||||
(defrule csv-file-source (or stdin
|
||||
inline
|
||||
@ -1698,7 +1721,6 @@ load database
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when after `(pgloader.utils:make-pgstate)))
|
||||
(*csv-path-root* ,*cwd*)
|
||||
,@(pgsql-connection-bindings pg-db-uri gucs)
|
||||
,@(batch-control-bindings options))
|
||||
|
||||
|
||||
@ -188,7 +188,7 @@
|
||||
(ecase type
|
||||
(:inline (car part)) ; because of &rest
|
||||
(:stdin *standard-input*)
|
||||
(:regex (destructuring-bind (keep regex) part
|
||||
(:regex (destructuring-bind (keep regex root) part
|
||||
(filter-directory regex :keep keep :root root)))
|
||||
(:filename (let* ((filename (first part))
|
||||
(realname
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
load csv
|
||||
from all filenames matching ~<data/matching.*csv$> (id, field)
|
||||
from all filenames matching ~<matching.*csv$> in directory 'data' (id, field)
|
||||
into postgresql:///pgloader?matching
|
||||
with fields optionally enclosed by '"',
|
||||
fields terminated by ',',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user