mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
In order to later be able to have more worker threads sharing the load (multiple readers and/or writers, maybe more specialized threads too), have all the stats be managed centrally by a single thread. We already have a "monitor" thread that get passed log messages so that the output buffer is not subject to race conditions, extend its use to also deal with statistics messages. In the current code, we send a message each time we read a row. In some future commits we should probably reduce the messaging here to something like one message per batch in the common case. Also, as a nice side effect of the code simplification and refactoring this fixes #283 wherein the before/after sections of individual CSV files within an ARCHIVE command where not counted in the reporting.
64 lines
2.0 KiB
Fish
64 lines
2.0 KiB
Fish
/*
|
|
* Loading from a ZIP archive containing CSV files. The full test can be
|
|
* done with using the archive found at
|
|
* http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip
|
|
*
|
|
* And a very light version of this data set is found at
|
|
* http://pgsql.tapoueh.org/temp/foo.zip for quick testing.
|
|
*/
|
|
|
|
LOAD ARCHIVE
|
|
FROM http://pgsql.tapoueh.org/temp/foo.zip
|
|
INTO postgresql:///ip4r
|
|
|
|
BEFORE LOAD
|
|
DO
|
|
$$ create extension if not exists ip4r; $$,
|
|
$$ create schema if not exists geolite; $$
|
|
|
|
EXECUTE 'geolite.sql'
|
|
|
|
LOAD CSV
|
|
FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/
|
|
WITH ENCODING iso-8859-1
|
|
(
|
|
locId,
|
|
country,
|
|
region [ null if blanks ],
|
|
city [ null if blanks ],
|
|
postalCode [ null if blanks ],
|
|
latitude,
|
|
longitude,
|
|
metroCode [ null if blanks ],
|
|
areaCode [ null if blanks ]
|
|
)
|
|
INTO postgresql:///ip4r?geolite.location
|
|
(
|
|
locid,country,region,city,postalCode,
|
|
location point using (format nil "(~a,~a)" longitude latitude),
|
|
metroCode,areaCode
|
|
)
|
|
WITH skip header = 2,
|
|
fields optionally enclosed by '"',
|
|
fields escaped by double-quote,
|
|
fields terminated by ','
|
|
|
|
AND LOAD CSV
|
|
FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/
|
|
WITH ENCODING iso-8859-1
|
|
(
|
|
startIpNum, endIpNum, locId
|
|
)
|
|
INTO postgresql:///ip4r?geolite.blocks
|
|
(
|
|
iprange ip4r using (ip-range startIpNum endIpNum),
|
|
locId
|
|
)
|
|
WITH skip header = 2,
|
|
fields optionally enclosed by '"',
|
|
fields escaped by double-quote,
|
|
fields terminated by ','
|
|
|
|
AFTER LOAD DO
|
|
$$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$;
|