mirror of
https://github.com/dimitri/pgloader.git
synced 2026-05-04 10:31:02 +02:00
Improve guessing of CSV parameters.
In this commit we fail the guess faster, allowing to test for a much larger sample. The sample is still hard-coded, but this time to 1000 lines. Also add a test case, see #618.
This commit is contained in:
parent
8004a9dd59
commit
b685c8801d
@ -49,11 +49,11 @@
|
||||
:separator separator
|
||||
:escape escape)
|
||||
((or cl-csv:csv-parse-error type-error) ()
|
||||
nil)))))
|
||||
(return-from try-csv-params nil))))))
|
||||
(and rows
|
||||
(every (lambda (row) (= cols (length row))) rows))))
|
||||
|
||||
(defun guess-csv-params (filename-or-stream nb-cols &key (sample-size 10))
|
||||
(defun guess-csv-params (filename-or-stream nb-cols &key (sample-size 1000))
|
||||
"Try a bunch of field separators with LINES and return the first one that
|
||||
returns COLS number of columns"
|
||||
(let ((sample
|
||||
|
||||
@ -11,6 +11,7 @@ REGRESS= allcols.load \
|
||||
csv-error.load \
|
||||
csv-escape-mode.load \
|
||||
csv-filename-pattern.load \
|
||||
csv-guess.load \
|
||||
csv-header.load \
|
||||
csv-json.load \
|
||||
csv-keep-extra-blanks.load \
|
||||
|
||||
21
test/csv-guess.load
Normal file
21
test/csv-guess.load
Normal file
@ -0,0 +1,21 @@
|
||||
LOAD CSV
|
||||
FROM data/track.csv
|
||||
INTO postgresql:///pgloader?csv.track
|
||||
|
||||
WITH truncate
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create schema if not exists csv; $$,
|
||||
$$ drop table if exists csv.track; $$,
|
||||
$$ create table csv.track (
|
||||
trackid bigserial primary key,
|
||||
track text,
|
||||
album text,
|
||||
media text,
|
||||
genre text,
|
||||
composer text,
|
||||
milliseconds bigint,
|
||||
bytes bigint,
|
||||
unitprice numeric
|
||||
);
|
||||
$$;
|
||||
3503
test/data/track.csv
Normal file
3503
test/data/track.csv
Normal file
File diff suppressed because it is too large
Load Diff
3503
test/regress/expected/csv-guess.out
Normal file
3503
test/regress/expected/csv-guess.out
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user