mirror of
https://github.com/dimitri/pgloader.git
synced 2026-02-27 01:01:18 +01:00
Port tests from pgloader 2.x, implement trailing-sep.
This commit is contained in:
parent
bfaf996265
commit
89aaabd179
115
parse-ini.lisp
115
parse-ini.lisp
@ -57,14 +57,14 @@
|
||||
:template template)))
|
||||
(datestyle (read-value-for-param config section "datestyle"
|
||||
:template template)))
|
||||
|
||||
(setf (params-gucs params)
|
||||
(append
|
||||
(when encoding (list (cons "client_encoding" encoding)))
|
||||
(when datestyle (list (cons "datestyle" datestyle)))
|
||||
(get-gucs config section)
|
||||
(when template (get-gucs config template))
|
||||
(get-gucs config *global-section*)))))
|
||||
(merge-gucs
|
||||
(get-gucs config section)
|
||||
(when template (get-gucs config template))
|
||||
(get-gucs config *global-section*))))))
|
||||
|
||||
(defun get-gucs (config section)
|
||||
"Get PostgreSQL settings from SECTION."
|
||||
@ -73,6 +73,14 @@
|
||||
when (and (< 10 (length option)) (string= "pg_option_" option :end2 10))
|
||||
collect (cons (subseq option 10) value)))
|
||||
|
||||
(defun merge-gucs (&rest gucs)
|
||||
"Merge several guc lists into a consolidated one. When the same GUC is
|
||||
found more than once, we keep the one found first."
|
||||
(remove-duplicates (apply #'append gucs)
|
||||
:from-end t
|
||||
:key #'car
|
||||
:test #'string=))
|
||||
|
||||
(defun user-defined-columns (config section)
|
||||
"Fetch all option that begin with udc_ as user defined columns"
|
||||
(loop for (option . value) in (ini:items config section)
|
||||
@ -96,7 +104,7 @@
|
||||
for name in (list-columns dbname table-name)
|
||||
collect (cons name pos))))
|
||||
|
||||
(defun parse-columns-spec (string config section)
|
||||
(defun parse-columns-spec (string config section &key trailing-sep)
|
||||
"Parse old-style columns specification, such as:
|
||||
* --> nil
|
||||
x, y, a, b, d:6, c:5 --> \"x, y, a, b, d, c\"
|
||||
@ -107,7 +115,9 @@
|
||||
(if (string= string "*")
|
||||
(get-pgsql-column-specs config section)
|
||||
(split-columns-specs string))))
|
||||
(values (mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
|
||||
(values (append
|
||||
(mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
|
||||
(when trailing-sep '("trailing")))
|
||||
(mapcar #'car colspecs))))
|
||||
|
||||
(defun parse-only-cols (columns only-cols)
|
||||
@ -181,6 +191,8 @@
|
||||
|
||||
;; now parse fields and columns
|
||||
(let* ((template (params-use-template params))
|
||||
(trailing-sep (read-value-for-param config section "trailing_sep"
|
||||
:template template))
|
||||
(columns (read-value-for-param config section "columns"
|
||||
:template template))
|
||||
(user-defined (append
|
||||
@ -195,11 +207,14 @@
|
||||
|
||||
;; make sense of the old cruft
|
||||
(multiple-value-bind (fields columns)
|
||||
(parse-columns-spec columns config section)
|
||||
(setf (params-fields params) fields)
|
||||
(setf (params-columns params)
|
||||
(compute-columns columns only-cols copy-columns user-defined
|
||||
config section))))
|
||||
(parse-columns-spec columns config section :trailing-sep trailing-sep)
|
||||
(setf (params-fields params) fields)
|
||||
(setf (params-columns params) (compute-columns columns
|
||||
only-cols
|
||||
copy-columns
|
||||
user-defined
|
||||
config
|
||||
section))))
|
||||
params))
|
||||
|
||||
(defun get-connection-params (config section)
|
||||
@ -265,8 +280,12 @@
|
||||
(skip-lines (when value
|
||||
(format nil "skip header = ~a" value))))))
|
||||
|
||||
(defun write-command-to-string (config section &key with-data-inline)
|
||||
"Return the new syntax for the command found in SECTION."
|
||||
(defun write-command-to-string (config section
|
||||
&key with-data-inline (end-command t))
|
||||
"Return the new syntax for the command found in SECTION.
|
||||
|
||||
When WITH-DATA-INLINE is true, instead of using the SECTION's filename
|
||||
option, use the constant INLINE in the command."
|
||||
(let ((params (parse-section config section)))
|
||||
(when (and (params-filename params)
|
||||
(params-separator params))
|
||||
@ -290,9 +309,12 @@
|
||||
when option collect it))
|
||||
|
||||
;; GUCs
|
||||
(format s "~% SET ~{~a~^,~&~10T~};"
|
||||
(format s "~% SET ~{~a~^,~&~10T~}"
|
||||
(loop for (name . setting) in (params-gucs params)
|
||||
collect (format nil "~a to '~a'" name setting)))))))
|
||||
collect (format nil "~a to '~a'" name setting)))
|
||||
|
||||
;; End the command with a semicolon, unless asked not to
|
||||
(format s "~@[;~]" end-command)))))
|
||||
|
||||
(defun convert-ini-into-commands (filename)
|
||||
"Read the INI file at FILENAME and convert each section of it to a command
|
||||
@ -303,9 +325,21 @@
|
||||
for command = (write-command-to-string config section)
|
||||
when command collect it))))
|
||||
|
||||
(defun convert-ini-into-files (filename target-directory &key with-data-inline)
|
||||
(defun convert-ini-into-files (filename target-directory
|
||||
&key
|
||||
with-data-inline
|
||||
include-sql-file)
|
||||
"Reads the INI file at FILENAME and creates files names <section>.load for
|
||||
each section in the INI file, in TARGET-DIRECTORY."
|
||||
each section in the INI file, in TARGET-DIRECTORY.
|
||||
|
||||
When WITH-DATA-INLINE is true, read the CSV file listed as the section's
|
||||
filename and insert its content in the command itself, as inline data.
|
||||
|
||||
When INCLUDE-SQL-FILE is :if-exists, try to find a sibling file to the
|
||||
data file, with the same name and with the \"sql\" type, and use its
|
||||
content in a BEFORE LOAD DO clause.
|
||||
|
||||
When INCLUDE-SQL-FILE is t, not finding the SQL file is an error."
|
||||
(let ((config (read-ini-file filename)))
|
||||
|
||||
;; first mkdir -p
|
||||
@ -317,7 +351,8 @@
|
||||
:name section
|
||||
:type "load")
|
||||
for command = (write-command-to-string config section
|
||||
:with-data-inline with-data-inline)
|
||||
:with-data-inline with-data-inline
|
||||
:end-command nil)
|
||||
when command
|
||||
do (with-open-file (c target
|
||||
:direction :output
|
||||
@ -325,11 +360,41 @@
|
||||
:if-does-not-exist :create
|
||||
:external-format :utf-8)
|
||||
(format c "~a" command)
|
||||
(when with-data-inline
|
||||
(let* ((params (parse-section config section))
|
||||
(datafile
|
||||
(merge-pathnames (params-filename params)
|
||||
(directory-namestring filename))))
|
||||
(format c "~%~%~%~%~a"
|
||||
(slurp-file-into-string datafile)))))
|
||||
|
||||
(let* ((params (parse-section config section))
|
||||
(datafile
|
||||
(merge-pathnames (params-filename params)
|
||||
(directory-namestring filename)))
|
||||
(sqlfile
|
||||
(make-pathname :directory (directory-namestring datafile)
|
||||
:name (pathname-name datafile)
|
||||
:type "sql"))
|
||||
(sql-file-exists (probe-file sqlfile))
|
||||
(sql-commands (when sql-file-exists
|
||||
(slurp-file-into-string sqlfile))))
|
||||
;; First
|
||||
(if include-sql-file
|
||||
(if sql-file-exists
|
||||
(progn
|
||||
(format c "~%~% BEFORE LOAD DO")
|
||||
(format c "~{~&~3T$$ ~a; $$~^,~};~%"
|
||||
(remove-if
|
||||
(lambda (x)
|
||||
(string= ""
|
||||
(string-trim '(#\Space
|
||||
#\Return
|
||||
#\Linefeed) x)))
|
||||
(sq:split-sequence #\; sql-commands))))
|
||||
(unless (eq sql-file-exists :if-exists)
|
||||
(error "File not found: ~s" sqlfile)))
|
||||
;; don't include sql file
|
||||
(format c ";~%"))
|
||||
|
||||
(when with-data-inline
|
||||
(let* ((params (parse-section config section))
|
||||
(datafile
|
||||
(merge-pathnames (params-filename params)
|
||||
(directory-namestring filename))))
|
||||
(format c "~%~%~%~%~a"
|
||||
(slurp-file-into-string datafile))))))
|
||||
and collect target)))
|
||||
|
||||
@ -1423,7 +1423,9 @@ Here's a quick description of the format we're parsing here:
|
||||
;; normal error processing happen
|
||||
(parse 'commands content)))))
|
||||
|
||||
(defun run-commands (source)
|
||||
(defun run-commands (source
|
||||
&key
|
||||
((:client-min-messages *client-min-messages*) *client-min-messages*))
|
||||
"SOURCE can be a function, which is run, a list, which is compiled as CL
|
||||
code then run, a pathname containing one or more commands that are parsed
|
||||
then run, or a commands string that is then parsed and each command run."
|
||||
|
||||
4
test/README.md
Normal file
4
test/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
# pgloader tests
|
||||
|
||||
In the `parser` directory are tests for the parser only, in the current
|
||||
directory are tests that can be run to import data.
|
||||
32
test/allcols.load
Normal file
32
test/allcols.load
Normal file
@ -0,0 +1,32 @@
|
||||
LOAD CSV
|
||||
FROM inline (a, b, c)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?allcols
|
||||
(a, b, c)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ':'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '14MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists allcols (
|
||||
a integer primary key,
|
||||
b date,
|
||||
c text
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
1:2008-02-18:first entry
|
||||
2:2008-02-19:second one
|
||||
3:2008-02-20:another
|
||||
4:2008-02-21:still running
|
||||
5:2008-02-22:well, some more
|
||||
6:2008-02-23:antepenultima
|
||||
7:2008-02-24:next to last
|
||||
8:2008-02-25:hey, it's today!
|
||||
@ -1,9 +1,46 @@
|
||||
LOAD CSV
|
||||
FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
|
||||
INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
|
||||
FROM inline
|
||||
(
|
||||
x,
|
||||
y,
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d
|
||||
)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?csv
|
||||
(
|
||||
a,
|
||||
b,
|
||||
d,
|
||||
c
|
||||
)
|
||||
|
||||
WITH truncate,
|
||||
fields not enclosed,
|
||||
fields terminated by '\t'
|
||||
WITH truncate,
|
||||
skip header = 1,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ','
|
||||
|
||||
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ CREATE TABLE csv (
|
||||
a bigint,
|
||||
b bigint,
|
||||
c char(2),
|
||||
d text
|
||||
); $$;
|
||||
|
||||
|
||||
|
||||
|
||||
Stupid useless header with a © sign
|
||||
"2.6.190.56","2.6.190.63","33996344","33996351","GB","United Kingdom"
|
||||
"3.0.0.0","4.17.135.31","50331648","68257567","US","United States"
|
||||
"4.17.135.32","4.17.135.63","68257568","68257599","CA","Canada"
|
||||
"4.17.135.64","4.17.142.255","68257600","68259583","US","United States"
|
||||
"4.17.143.0","4.17.143.15","68259584","68259599","CA","Canada"
|
||||
"4.17.143.16","4.18.32.71","68259600","68296775","US","United States"
|
||||
|
||||
31
test/errors.load
Normal file
31
test/errors.load
Normal file
@ -0,0 +1,31 @@
|
||||
LOAD CSV
|
||||
FROM inline (a, c, b, trailing)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?errors
|
||||
(a, b, c)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by '|'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists errors (
|
||||
a integer primary key,
|
||||
b date,
|
||||
c text
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
1|some first row text|2006-13-11|
|
||||
2|some second row text|2006-11-11|
|
||||
3|some third row text|2006-10-12|
|
||||
4|\ |2006-16-4|
|
||||
5|some fifth row text|2006-5-12|
|
||||
6|some sixth row text|2006-13-10|
|
||||
7|some null date to play with||
|
||||
9
test/parse/csv.load
Normal file
9
test/parse/csv.load
Normal file
@ -0,0 +1,9 @@
|
||||
LOAD CSV
|
||||
FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
|
||||
INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
|
||||
|
||||
WITH truncate,
|
||||
fields not enclosed,
|
||||
fields terminated by '\t'
|
||||
|
||||
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
|
||||
33
test/partial.load
Normal file
33
test/partial.load
Normal file
@ -0,0 +1,33 @@
|
||||
LOAD CSV
|
||||
FROM inline (a, b, c, d, e)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?partial
|
||||
(a, b, c, e)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by '%'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists partial (
|
||||
a integer primary key,
|
||||
b text,
|
||||
c text,
|
||||
d text,
|
||||
e text
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
1%foo%bar%baz%hop
|
||||
2%foo%bar%baz%hop
|
||||
3%foo%bar%baz%hop
|
||||
4%foo%bar%baz%hop
|
||||
5%foo%bar%baz%hop
|
||||
6%foo%bar%baz%hop
|
||||
7%foo%bar%baz%hop
|
||||
31
test/reformat.load
Normal file
31
test/reformat.load
Normal file
@ -0,0 +1,31 @@
|
||||
LOAD CSV
|
||||
FROM inline (id, timestamp)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?reformat
|
||||
(
|
||||
id,
|
||||
timestamp timestamptz using (date-with-no-separator timestamp)
|
||||
)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by '|'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists reformat (
|
||||
id integer primary key,
|
||||
timestamp timestamp with time zone
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
1|20071119150718
|
||||
2|20041002153048
|
||||
3|20060111060850
|
||||
4|20060111060958
|
||||
5|00000000000000
|
||||
31
test/serial.load
Normal file
31
test/serial.load
Normal file
@ -0,0 +1,31 @@
|
||||
LOAD CSV
|
||||
FROM inline (c, b)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?serial
|
||||
(b, c)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ';'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists serial (
|
||||
a serial primary key,
|
||||
b date,
|
||||
c text
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
some first row text;2006-11-11
|
||||
some second row text;2006-11-11
|
||||
some third row text;2006-10-12
|
||||
\ ;2006-10-4
|
||||
some fifth row text;2006-5-12
|
||||
some sixth row text;2006-7-10
|
||||
some null date to play with;
|
||||
36
test/simple.load
Normal file
36
test/simple.load
Normal file
@ -0,0 +1,36 @@
|
||||
LOAD CSV
|
||||
FROM inline (a, c, b, trailing)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?simple
|
||||
(a, b, c)
|
||||
|
||||
WITH truncate,
|
||||
skip header = 2,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by '|'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
datestyle to 'dmy',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ CREATE TABLE if not exists simple (
|
||||
a integer primary key,
|
||||
b date,
|
||||
c text
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
This is a stupid useless header like you sometime find in CSV files
|
||||
id|data|date|
|
||||
1|some first row text|2006-11-11|
|
||||
2|some second row text|13/11/2006|
|
||||
3|some third row text|12-10-2006|
|
||||
4|\ |2006-10-4|
|
||||
5|some fifth row text|2006-5-12|
|
||||
6|some sixth row text|10/7/6|
|
||||
7|some null date to play with||
|
||||
34
test/udc.load
Normal file
34
test/udc.load
Normal file
@ -0,0 +1,34 @@
|
||||
LOAD CSV
|
||||
FROM inline WITH ENCODING latin1
|
||||
(d, b, x, y)
|
||||
INTO postgresql://dim:pgpass@localhost:54393/pgloader?udc
|
||||
(
|
||||
b,
|
||||
c text using "constant value",
|
||||
d
|
||||
)
|
||||
|
||||
WITH fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by '%'
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create table if not exists udc (
|
||||
b integer primary key,
|
||||
c text,
|
||||
d integer
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
1%5%foo%bar
|
||||
2%10%bar%toto
|
||||
3%4%toto%titi
|
||||
4%18%titi%baz
|
||||
5%2%baz%foo
|
||||
BIN
test/xzero.load
Normal file
BIN
test/xzero.load
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user