Fix loading data from stdin: fix #53.

The stdin support really was one brick shy of a load, and in particular
with-open-file was used against a stream when using that option.
This commit is contained in:
Dimitri Fontaine 2014-04-27 23:38:02 +02:00
parent b5dec87915
commit 429232c3de
6 changed files with 69 additions and 3 deletions

View File

@ -111,6 +111,7 @@
#:copy-to #:copy-to
#:copy-database #:copy-database
#:filter-column-list #:filter-column-list
#:with-open-file-or-stream
#:get-pathname #:get-pathname
#:get-absolute-pathname #:get-absolute-pathname
#:project-fields #:project-fields

View File

@ -65,12 +65,13 @@
Finally returns how many rows where read and processed." Finally returns how many rows where read and processed."
(let ((filenames (case (source-type csv) (let ((filenames (case (source-type csv)
(:stdin (list (source csv)))
(:inline (list (car (source csv)))) (:inline (list (car (source csv))))
(:regex (source csv)) (:regex (source csv))
(t (list (source csv)))))) (t (list (source csv))))))
(loop for filename in filenames (loop for filename in filenames
do do
(with-open-file (with-open-file-or-stream
;; we just ignore files that don't exist ;; we just ignore files that don't exist
(input filename (input filename
:direction :input :direction :input

View File

@ -47,12 +47,13 @@
Returns how many rows where read and processed." Returns how many rows where read and processed."
(let ((filenames (case (source-type fixed) (let ((filenames (case (source-type fixed)
(:stdin (list (source csv)))
(:inline (list (car (source fixed)))) (:inline (list (car (source fixed))))
(:regex (source fixed)) (:regex (source fixed))
(t (list (source fixed)))))) (t (list (source fixed))))))
(loop for filename in filenames (loop for filename in filenames
do do
(with-open-file (with-open-file-or-stream
;; we just ignore files that don't exist ;; we just ignore files that don't exist
(input filename (input filename
:direction :input :direction :input

View File

@ -133,6 +133,19 @@
;;; ;;;
;;; Some common tools for file based sources, such as CSV and FIXED ;;; Some common tools for file based sources, such as CSV and FIXED
;;; ;;;
(defmacro with-open-file-or-stream ((&whole arguments
stream filename-or-stream
&key &allow-other-keys)
&body body)
"Generate a with-open-file call, or just bind STREAM varialbe to the
FILENAME-OR-STREAM stream when this variable is of type STREAM."
`(typecase ,filename-or-stream
(stream (let ((,stream *standard-input*))
,@body))
(t (with-open-file (,stream ,filename-or-stream ,@(cddr arguments))
,@body))))
(defun get-pathname (dbname table-name &key (csv-path-root *csv-path-root*)) (defun get-pathname (dbname table-name &key (csv-path-root *csv-path-root*))
"Return a pathname where to read or write the file data" "Return a pathname where to read or write the file data"
(make-pathname (make-pathname

View File

@ -5,7 +5,7 @@ OUT = $(TESTS:.load=.out)
REMOTE = archive.load bossa-all.load bossa.load census-places.load dbf-zip.load REMOTE = archive.load bossa-all.load bossa.load census-places.load dbf-zip.load
LOCAL = $(filter-out $(REMOTE:.load=.out),$(OUT)) LOCAL = $(filter-out $(REMOTE:.load=.out),$(OUT))
PGLOADER ?= ../build/pgloader.exe PGLOADER ?= ../build/bin/pgloader
local: prepare $(LOCAL) local: prepare $(LOCAL)
@ -48,6 +48,9 @@ sakila.out: sakila sakila.load
-$(PGLOADER) sakila.load -$(PGLOADER) sakila.load
@echo @echo
csv-districts-stdin.out: csv-districts-stdin.load
cat data/2013_Gaz_113CDs_national.txt | $(PGLOADER) $^
# General case where we do NOT expect any error # General case where we do NOT expect any error
%.out: %.load %.out: %.load
$(PGLOADER) $< $(PGLOADER) $<

View File

@ -0,0 +1,47 @@
/*
* The data file comes from the US census website:
*
* http://www.census.gov/geo/maps-data/data/gazetteer2013.html
*
* We import it directly into pgloader git repository so that we have at
* least a CSV test where we read from a local file...
*/
LOAD CSV
FROM stdin
(
usps, -- United States Postal Service State Abbreviation
geoid, -- Geographic Identifier
aland, -- Land Area (square meters)
awater, -- Water Area (square meters)
aland_sqmi, -- SQMI Land Area (square miles)
awater_sqmi, -- SQMI Water Area (square miles)
intptlat, -- Latitude (decimal degrees)
intptlong -- Longitude (decimal degrees)
)
INTO postgresql:///pgloader?districts
(
usps, geoid, aland, awater, aland_sqmi, awater_sqmi,
location point using (format nil "(~a,~a)" intptlong intptlat)
)
WITH truncate,
skip header = 1,
batch rows = 200,
batch size = 1024 kB,
batch concurrency = 3,
fields terminated by '\t'
BEFORE LOAD DO
$$ drop table if exists districts; $$,
$$ create table districts (
usps text,
geoid text,
aland bigint,
awater bigint,
aland_sqmi double precision,
awater_sqmi double precision,
location point
);
$$;