Port tests from pgloader 2.x, implement trailing-sep.

This commit is contained in:
Dimitri Fontaine 2013-10-13 22:10:07 +02:00
parent bfaf996265
commit 89aaabd179
19 changed files with 377 additions and 32 deletions

View File

@ -57,14 +57,14 @@
:template template)))
(datestyle (read-value-for-param config section "datestyle"
:template template)))
(setf (params-gucs params)
(append
(when encoding (list (cons "client_encoding" encoding)))
(when datestyle (list (cons "datestyle" datestyle)))
(get-gucs config section)
(when template (get-gucs config template))
(get-gucs config *global-section*)))))
(merge-gucs
(get-gucs config section)
(when template (get-gucs config template))
(get-gucs config *global-section*))))))
(defun get-gucs (config section)
"Get PostgreSQL settings from SECTION."
@ -73,6 +73,14 @@
when (and (< 10 (length option)) (string= "pg_option_" option :end2 10))
collect (cons (subseq option 10) value)))
(defun merge-gucs (&rest gucs)
"Merge several guc lists into a consolidated one. When the same GUC is
found more than once, we keep the one found first."
(remove-duplicates (apply #'append gucs)
:from-end t
:key #'car
:test #'string=))
(defun user-defined-columns (config section)
"Fetch all option that begin with udc_ as user defined columns"
(loop for (option . value) in (ini:items config section)
@ -96,7 +104,7 @@
for name in (list-columns dbname table-name)
collect (cons name pos))))
(defun parse-columns-spec (string config section)
(defun parse-columns-spec (string config section &key trailing-sep)
"Parse old-style columns specification, such as:
* --> nil
x, y, a, b, d:6, c:5 --> \"x, y, a, b, d, c\"
@ -107,7 +115,9 @@
(if (string= string "*")
(get-pgsql-column-specs config section)
(split-columns-specs string))))
(values (mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
(values (append
(mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
(when trailing-sep '("trailing")))
(mapcar #'car colspecs))))
(defun parse-only-cols (columns only-cols)
@ -181,6 +191,8 @@
;; now parse fields and columns
(let* ((template (params-use-template params))
(trailing-sep (read-value-for-param config section "trailing_sep"
:template template))
(columns (read-value-for-param config section "columns"
:template template))
(user-defined (append
@ -195,11 +207,14 @@
;; make sense of the old cruft
(multiple-value-bind (fields columns)
(parse-columns-spec columns config section)
(setf (params-fields params) fields)
(setf (params-columns params)
(compute-columns columns only-cols copy-columns user-defined
config section))))
(parse-columns-spec columns config section :trailing-sep trailing-sep)
(setf (params-fields params) fields)
(setf (params-columns params) (compute-columns columns
only-cols
copy-columns
user-defined
config
section))))
params))
(defun get-connection-params (config section)
@ -265,8 +280,12 @@
(skip-lines (when value
(format nil "skip header = ~a" value))))))
(defun write-command-to-string (config section &key with-data-inline)
"Return the new syntax for the command found in SECTION."
(defun write-command-to-string (config section
&key with-data-inline (end-command t))
"Return the new syntax for the command found in SECTION.
When WITH-DATA-INLINE is true, instead of using the SECTION's filename
option, use the constant INLINE in the command."
(let ((params (parse-section config section)))
(when (and (params-filename params)
(params-separator params))
@ -290,9 +309,12 @@
when option collect it))
;; GUCs
(format s "~% SET ~{~a~^,~&~10T~};"
(format s "~% SET ~{~a~^,~&~10T~}"
(loop for (name . setting) in (params-gucs params)
collect (format nil "~a to '~a'" name setting)))))))
collect (format nil "~a to '~a'" name setting)))
;; End the command with a semicolon, unless asked not to
(format s "~@[;~]" end-command)))))
(defun convert-ini-into-commands (filename)
"Read the INI file at FILENAME and convert each section of it to a command
@ -303,9 +325,21 @@
for command = (write-command-to-string config section)
when command collect it))))
(defun convert-ini-into-files (filename target-directory &key with-data-inline)
(defun convert-ini-into-files (filename target-directory
&key
with-data-inline
include-sql-file)
"Reads the INI file at FILENAME and creates files names <section>.load for
each section in the INI file, in TARGET-DIRECTORY."
each section in the INI file, in TARGET-DIRECTORY.
When WITH-DATA-INLINE is true, read the CSV file listed as the section's
filename and insert its content in the command itself, as inline data.
When INCLUDE-SQL-FILE is :if-exists, try to find a sibling file to the
data file, with the same name and with the \"sql\" type, and use its
content in a BEFORE LOAD DO clause.
When INCLUDE-SQL-FILE is t, not finding the SQL file is an error."
(let ((config (read-ini-file filename)))
;; first mkdir -p
@ -317,7 +351,8 @@
:name section
:type "load")
for command = (write-command-to-string config section
:with-data-inline with-data-inline)
:with-data-inline with-data-inline
:end-command nil)
when command
do (with-open-file (c target
:direction :output
@ -325,11 +360,41 @@
:if-does-not-exist :create
:external-format :utf-8)
(format c "~a" command)
(when with-data-inline
(let* ((params (parse-section config section))
(datafile
(merge-pathnames (params-filename params)
(directory-namestring filename))))
(format c "~%~%~%~%~a"
(slurp-file-into-string datafile)))))
(let* ((params (parse-section config section))
(datafile
(merge-pathnames (params-filename params)
(directory-namestring filename)))
(sqlfile
(make-pathname :directory (directory-namestring datafile)
:name (pathname-name datafile)
:type "sql"))
(sql-file-exists (probe-file sqlfile))
(sql-commands (when sql-file-exists
(slurp-file-into-string sqlfile))))
;; First
(if include-sql-file
(if sql-file-exists
(progn
(format c "~%~% BEFORE LOAD DO")
(format c "~{~&~3T$$ ~a; $$~^,~};~%"
(remove-if
(lambda (x)
(string= ""
(string-trim '(#\Space
#\Return
#\Linefeed) x)))
(sq:split-sequence #\; sql-commands))))
(unless (eq sql-file-exists :if-exists)
(error "File not found: ~s" sqlfile)))
;; don't include sql file
(format c ";~%"))
(when with-data-inline
(let* ((params (parse-section config section))
(datafile
(merge-pathnames (params-filename params)
(directory-namestring filename))))
(format c "~%~%~%~%~a"
(slurp-file-into-string datafile))))))
and collect target)))

View File

@ -1423,7 +1423,9 @@ Here's a quick description of the format we're parsing here:
;; normal error processing happen
(parse 'commands content)))))
(defun run-commands (source)
(defun run-commands (source
&key
((:client-min-messages *client-min-messages*) *client-min-messages*))
"SOURCE can be a function, which is run, a list, which is compiled as CL
code then run, a pathname containing one or more commands that are parsed
then run, or a commands string that is then parsed and each command run."

4
test/README.md Normal file
View File

@ -0,0 +1,4 @@
# pgloader tests
In the `parser` directory are tests for the parser only, in the current
directory are tests that can be run to import data.

32
test/allcols.load Normal file
View File

@ -0,0 +1,32 @@
LOAD CSV
FROM inline (a, b, c)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?allcols
(a, b, c)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ':'
SET client_encoding to 'latin1',
work_mem to '14MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists allcols (
a integer primary key,
b date,
c text
);
$$;
1:2008-02-18:first entry
2:2008-02-19:second one
3:2008-02-20:another
4:2008-02-21:still running
5:2008-02-22:well, some more
6:2008-02-23:antepenultima
7:2008-02-24:next to last
8:2008-02-25:hey, it's today!

View File

@ -1,9 +1,46 @@
LOAD CSV
FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
FROM inline
(
x,
y,
a,
b,
c,
d
)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?csv
(
a,
b,
d,
c
)
WITH truncate,
fields not enclosed,
fields terminated by '\t'
WITH truncate,
skip header = 1,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ','
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ CREATE TABLE csv (
a bigint,
b bigint,
c char(2),
d text
); $$;
Stupid useless header with a © sign
"2.6.190.56","2.6.190.63","33996344","33996351","GB","United Kingdom"
"3.0.0.0","4.17.135.31","50331648","68257567","US","United States"
"4.17.135.32","4.17.135.63","68257568","68257599","CA","Canada"
"4.17.135.64","4.17.142.255","68257600","68259583","US","United States"
"4.17.143.0","4.17.143.15","68259584","68259599","CA","Canada"
"4.17.143.16","4.18.32.71","68259600","68296775","US","United States"

31
test/errors.load Normal file
View File

@ -0,0 +1,31 @@
LOAD CSV
FROM inline (a, c, b, trailing)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?errors
(a, b, c)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by '|'
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists errors (
a integer primary key,
b date,
c text
);
$$;
1|some first row text|2006-13-11|
2|some second row text|2006-11-11|
3|some third row text|2006-10-12|
4|\ |2006-16-4|
5|some fifth row text|2006-5-12|
6|some sixth row text|2006-13-10|
7|some null date to play with||

9
test/parse/csv.load Normal file
View File

@ -0,0 +1,9 @@
LOAD CSV
FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
WITH truncate,
fields not enclosed,
fields terminated by '\t'
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';

33
test/partial.load Normal file
View File

@ -0,0 +1,33 @@
LOAD CSV
FROM inline (a, b, c, d, e)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?partial
(a, b, c, e)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by '%'
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists partial (
a integer primary key,
b text,
c text,
d text,
e text
);
$$;
1%foo%bar%baz%hop
2%foo%bar%baz%hop
3%foo%bar%baz%hop
4%foo%bar%baz%hop
5%foo%bar%baz%hop
6%foo%bar%baz%hop
7%foo%bar%baz%hop

31
test/reformat.load Normal file
View File

@ -0,0 +1,31 @@
LOAD CSV
FROM inline (id, timestamp)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?reformat
(
id,
timestamp timestamptz using (date-with-no-separator timestamp)
)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by '|'
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists reformat (
id integer primary key,
timestamp timestamp with time zone
);
$$;
1|20071119150718
2|20041002153048
3|20060111060850
4|20060111060958
5|00000000000000

31
test/serial.load Normal file
View File

@ -0,0 +1,31 @@
LOAD CSV
FROM inline (c, b)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?serial
(b, c)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by ';'
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists serial (
a serial primary key,
b date,
c text
);
$$;
some first row text;2006-11-11
some second row text;2006-11-11
some third row text;2006-10-12
\ ;2006-10-4
some fifth row text;2006-5-12
some sixth row text;2006-7-10
some null date to play with;

36
test/simple.load Normal file
View File

@ -0,0 +1,36 @@
LOAD CSV
FROM inline (a, c, b, trailing)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?simple
(a, b, c)
WITH truncate,
skip header = 2,
fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by '|'
SET client_encoding to 'latin1',
datestyle to 'dmy',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ CREATE TABLE if not exists simple (
a integer primary key,
b date,
c text
);
$$;
This is a stupid useless header like you sometime find in CSV files
id|data|date|
1|some first row text|2006-11-11|
2|some second row text|13/11/2006|
3|some third row text|12-10-2006|
4|\ |2006-10-4|
5|some fifth row text|2006-5-12|
6|some sixth row text|10/7/6|
7|some null date to play with||

34
test/udc.load Normal file
View File

@ -0,0 +1,34 @@
LOAD CSV
FROM inline WITH ENCODING latin1
(d, b, x, y)
INTO postgresql://dim:pgpass@localhost:54393/pgloader?udc
(
b,
c text using "constant value",
d
)
WITH fields optionally enclosed by '"',
fields escaped by double-quote,
fields terminated by '%'
SET client_encoding to 'latin1',
work_mem to '12MB',
standard_conforming_strings to 'on'
BEFORE LOAD DO
$$ create table if not exists udc (
b integer primary key,
c text,
d integer
);
$$;
1%5%foo%bar
2%10%bar%toto
3%4%toto%titi
4%18%titi%baz
5%2%baz%foo

BIN
test/xzero.load Normal file

Binary file not shown.