mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 06:47:00 +02:00
Fix loading data from stdin: fix #53.
The stdin support really was one brick shy of a load, and in particular with-open-file was used against a stream when using that option.
This commit is contained in:
parent
b5dec87915
commit
429232c3de
@ -111,6 +111,7 @@
|
||||
#:copy-to
|
||||
#:copy-database
|
||||
#:filter-column-list
|
||||
#:with-open-file-or-stream
|
||||
#:get-pathname
|
||||
#:get-absolute-pathname
|
||||
#:project-fields
|
||||
|
@ -65,12 +65,13 @@
|
||||
|
||||
Finally returns how many rows where read and processed."
|
||||
(let ((filenames (case (source-type csv)
|
||||
(:stdin (list (source csv)))
|
||||
(:inline (list (car (source csv))))
|
||||
(:regex (source csv))
|
||||
(t (list (source csv))))))
|
||||
(loop for filename in filenames
|
||||
do
|
||||
(with-open-file
|
||||
(with-open-file-or-stream
|
||||
;; we just ignore files that don't exist
|
||||
(input filename
|
||||
:direction :input
|
||||
|
@ -47,12 +47,13 @@
|
||||
|
||||
Returns how many rows where read and processed."
|
||||
(let ((filenames (case (source-type fixed)
|
||||
(:stdin (list (source csv)))
|
||||
(:inline (list (car (source fixed))))
|
||||
(:regex (source fixed))
|
||||
(t (list (source fixed))))))
|
||||
(loop for filename in filenames
|
||||
do
|
||||
(with-open-file
|
||||
(with-open-file-or-stream
|
||||
;; we just ignore files that don't exist
|
||||
(input filename
|
||||
:direction :input
|
||||
|
@ -133,6 +133,19 @@
|
||||
;;;
|
||||
;;; Some common tools for file based sources, such as CSV and FIXED
|
||||
;;;
|
||||
(defmacro with-open-file-or-stream ((&whole arguments
|
||||
stream filename-or-stream
|
||||
&key &allow-other-keys)
|
||||
&body body)
|
||||
"Generate a with-open-file call, or just bind STREAM varialbe to the
|
||||
FILENAME-OR-STREAM stream when this variable is of type STREAM."
|
||||
`(typecase ,filename-or-stream
|
||||
(stream (let ((,stream *standard-input*))
|
||||
,@body))
|
||||
|
||||
(t (with-open-file (,stream ,filename-or-stream ,@(cddr arguments))
|
||||
,@body))))
|
||||
|
||||
(defun get-pathname (dbname table-name &key (csv-path-root *csv-path-root*))
|
||||
"Return a pathname where to read or write the file data"
|
||||
(make-pathname
|
||||
|
@ -5,7 +5,7 @@ OUT = $(TESTS:.load=.out)
|
||||
REMOTE = archive.load bossa-all.load bossa.load census-places.load dbf-zip.load
|
||||
LOCAL = $(filter-out $(REMOTE:.load=.out),$(OUT))
|
||||
|
||||
PGLOADER ?= ../build/pgloader.exe
|
||||
PGLOADER ?= ../build/bin/pgloader
|
||||
|
||||
local: prepare $(LOCAL)
|
||||
|
||||
@ -48,6 +48,9 @@ sakila.out: sakila sakila.load
|
||||
-$(PGLOADER) sakila.load
|
||||
@echo
|
||||
|
||||
csv-districts-stdin.out: csv-districts-stdin.load
|
||||
cat data/2013_Gaz_113CDs_national.txt | $(PGLOADER) $^
|
||||
|
||||
# General case where we do NOT expect any error
|
||||
%.out: %.load
|
||||
$(PGLOADER) $<
|
||||
|
47
test/csv-districts-stdin.load
Normal file
47
test/csv-districts-stdin.load
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* The data file comes from the US census website:
|
||||
*
|
||||
* http://www.census.gov/geo/maps-data/data/gazetteer2013.html
|
||||
*
|
||||
* We import it directly into pgloader git repository so that we have at
|
||||
* least a CSV test where we read from a local file...
|
||||
*/
|
||||
|
||||
LOAD CSV
|
||||
FROM stdin
|
||||
(
|
||||
usps, -- United States Postal Service State Abbreviation
|
||||
geoid, -- Geographic Identifier
|
||||
aland, -- Land Area (square meters)
|
||||
awater, -- Water Area (square meters)
|
||||
aland_sqmi, -- SQMI Land Area (square miles)
|
||||
awater_sqmi, -- SQMI Water Area (square miles)
|
||||
intptlat, -- Latitude (decimal degrees)
|
||||
intptlong -- Longitude (decimal degrees)
|
||||
)
|
||||
|
||||
INTO postgresql:///pgloader?districts
|
||||
(
|
||||
usps, geoid, aland, awater, aland_sqmi, awater_sqmi,
|
||||
location point using (format nil "(~a,~a)" intptlong intptlat)
|
||||
)
|
||||
|
||||
WITH truncate,
|
||||
skip header = 1,
|
||||
batch rows = 200,
|
||||
batch size = 1024 kB,
|
||||
batch concurrency = 3,
|
||||
fields terminated by '\t'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists districts; $$,
|
||||
$$ create table districts (
|
||||
usps text,
|
||||
geoid text,
|
||||
aland bigint,
|
||||
awater bigint,
|
||||
aland_sqmi double precision,
|
||||
awater_sqmi double precision,
|
||||
location point
|
||||
);
|
||||
$$;
|
Loading…
Reference in New Issue
Block a user