mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
Add a Citus distribution test case, from the citus tutorial.
This commit is contained in:
parent
d3b21ac54d
commit
7b487ddaca
1
test/citus/.gitignore
vendored
Normal file
1
test/citus/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
*.csv
|
20
test/citus/Makefile
Normal file
20
test/citus/Makefile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
DATASET = companies campaigns ads clicks impressions geo_ips
|
||||||
|
CSV = $(addsuffix .csv,$(DATASET))
|
||||||
|
DROP = DROP TABLE IF EXISTS companies, campaigns, ads, clicks, impressions, geo_ips
|
||||||
|
|
||||||
|
all: schema data ;
|
||||||
|
|
||||||
|
schema:
|
||||||
|
psql --single-transaction -c "$(DROP)" -d hackathon
|
||||||
|
psql --single-transaction -f company.sql -d hackathon
|
||||||
|
|
||||||
|
data: fetch
|
||||||
|
psql -f copy.sql -d hackathon
|
||||||
|
../../build/bin/pgloader ./data.load
|
||||||
|
|
||||||
|
fetch: $(CSV) ;
|
||||||
|
|
||||||
|
%.csv:
|
||||||
|
curl -O https://examples.citusdata.com/mt_ref_arch/$@
|
||||||
|
|
||||||
|
.PHONY: schema data fetch
|
42
test/citus/README.md
Normal file
42
test/citus/README.md
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# Citus Multi-Tenant Automatic Distribution
|
||||||
|
|
||||||
|
In this test case we follow the following documentation:
|
||||||
|
|
||||||
|
https://docs.citusdata.com/en/v7.5/use_cases/multi_tenant.html
|
||||||
|
|
||||||
|
We install the schema before Citus migration, and load the data without the
|
||||||
|
backfilling that is already done. For that we use pgloader to ignore the
|
||||||
|
company_id column in the tables that didn't have this column prior to the
|
||||||
|
Citus migration effort.
|
||||||
|
|
||||||
|
Then the following `company.load` file contains the pgloader command that
|
||||||
|
runs a full migration from PostgreSQL to Citus:
|
||||||
|
|
||||||
|
```
|
||||||
|
load database
|
||||||
|
from pgsql:///hackathon
|
||||||
|
into pgsql://localhost:9700/dim
|
||||||
|
|
||||||
|
with include drop, reset no sequences
|
||||||
|
|
||||||
|
distribute companies using id;
|
||||||
|
```
|
||||||
|
|
||||||
|
Tables are marked distributed, the company_id column is added where it's
|
||||||
|
needed, primary keys and foreign keys definitions are altered to the new
|
||||||
|
model, and finally the data is backfilled automatically in the target table
|
||||||
|
thanks to generating queries like the following:
|
||||||
|
|
||||||
|
~~~
|
||||||
|
SELECT "campaigns".company_id::text,
|
||||||
|
"impressions".id::text,
|
||||||
|
"impressions".ad_id::text,
|
||||||
|
"impressions".seen_at::text,
|
||||||
|
"impressions".site_url::text,
|
||||||
|
"impressions".cost_per_impression_usd::text,
|
||||||
|
"impressions".user_ip::text,
|
||||||
|
"impressions".user_data::text
|
||||||
|
FROM "public"."impressions"
|
||||||
|
JOIN "public"."ads" ON impressions.ad_id = ads.id
|
||||||
|
JOIN "public"."campaigns" ON ads.campaign_id = campaigns.id
|
||||||
|
~~~
|
12
test/citus/company.load
Normal file
12
test/citus/company.load
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
load database
|
||||||
|
from pgsql:///hackathon
|
||||||
|
into pgsql://localhost:9700/dim
|
||||||
|
|
||||||
|
with include drop, reset no sequences
|
||||||
|
|
||||||
|
distribute companies using id
|
||||||
|
-- distribute campaigns using company_id
|
||||||
|
-- distribute ads using company_id from campaigns
|
||||||
|
-- distribute clicks using company_id from ads, campaigns
|
||||||
|
-- distribute impressions using company_id from ads, campaigns
|
||||||
|
;
|
51
test/citus/company.sql
vendored
Normal file
51
test/citus/company.sql
vendored
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
CREATE TABLE companies (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
name text NOT NULL,
|
||||||
|
image_url text,
|
||||||
|
created_at timestamp without time zone NOT NULL,
|
||||||
|
updated_at timestamp without time zone NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE campaigns (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
company_id bigint REFERENCES companies (id),
|
||||||
|
name text NOT NULL,
|
||||||
|
cost_model text NOT NULL,
|
||||||
|
state text NOT NULL,
|
||||||
|
monthly_budget bigint,
|
||||||
|
blacklisted_site_urls text[],
|
||||||
|
created_at timestamp without time zone NOT NULL,
|
||||||
|
updated_at timestamp without time zone NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE ads (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
campaign_id bigint REFERENCES campaigns (id),
|
||||||
|
name text NOT NULL,
|
||||||
|
image_url text,
|
||||||
|
target_url text,
|
||||||
|
impressions_count bigint DEFAULT 0,
|
||||||
|
clicks_count bigint DEFAULT 0,
|
||||||
|
created_at timestamp without time zone NOT NULL,
|
||||||
|
updated_at timestamp without time zone NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE clicks (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
ad_id bigint REFERENCES ads (id),
|
||||||
|
clicked_at timestamp without time zone NOT NULL,
|
||||||
|
site_url text NOT NULL,
|
||||||
|
cost_per_click_usd numeric(20,10),
|
||||||
|
user_ip inet NOT NULL,
|
||||||
|
user_data jsonb NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE impressions (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
ad_id bigint REFERENCES ads (id),
|
||||||
|
seen_at timestamp without time zone NOT NULL,
|
||||||
|
site_url text NOT NULL,
|
||||||
|
cost_per_impression_usd numeric(20,10),
|
||||||
|
user_ip inet NOT NULL,
|
||||||
|
user_data jsonb NOT NULL
|
||||||
|
);
|
5
test/citus/copy.sql
vendored
Normal file
5
test/citus/copy.sql
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
\copy companies from 'companies.csv' with csv
|
||||||
|
\copy campaigns from 'campaigns.csv' with csv
|
||||||
|
-- \copy ads from 'ads.csv' with csv
|
||||||
|
-- \copy clicks from 'clicks.csv' with csv
|
||||||
|
-- \copy impressions from 'impressions.csv' with csv
|
68
test/citus/data.load
Normal file
68
test/citus/data.load
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
--
|
||||||
|
-- Ads
|
||||||
|
--
|
||||||
|
load csv
|
||||||
|
from ads.csv
|
||||||
|
(
|
||||||
|
id, company_id, campaign_id, name, image_url, target_url,
|
||||||
|
impressions_count, clicks_count, created_at, updated_at
|
||||||
|
)
|
||||||
|
|
||||||
|
into postgresql:///hackathon
|
||||||
|
|
||||||
|
target table ads
|
||||||
|
target columns
|
||||||
|
(
|
||||||
|
id, campaign_id, name, image_url, target_url,
|
||||||
|
impressions_count, clicks_count, created_at, updated_at
|
||||||
|
)
|
||||||
|
|
||||||
|
with fields optionally enclosed by '"',
|
||||||
|
fields escaped by double-quote,
|
||||||
|
fields terminated by ',';
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Clicks
|
||||||
|
--
|
||||||
|
load csv
|
||||||
|
from clicks.csv
|
||||||
|
(
|
||||||
|
id, company_id, ad_id, clicked_at, site_url, cost_per_click_usd,
|
||||||
|
user_ip, user_data
|
||||||
|
)
|
||||||
|
|
||||||
|
into postgresql:///hackathon
|
||||||
|
|
||||||
|
target table clicks
|
||||||
|
target columns
|
||||||
|
(
|
||||||
|
id, ad_id, clicked_at, site_url, cost_per_click_usd, user_ip, user_data
|
||||||
|
)
|
||||||
|
|
||||||
|
with fields optionally enclosed by '"',
|
||||||
|
fields escaped by double-quote,
|
||||||
|
fields terminated by ',';
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Impressions
|
||||||
|
--
|
||||||
|
load csv
|
||||||
|
from impressions.csv
|
||||||
|
(
|
||||||
|
id, company_id, ad_id, seen_at, site_url,
|
||||||
|
cost_per_impression_usd, user_ip, user_data
|
||||||
|
)
|
||||||
|
|
||||||
|
into postgresql:///hackathon
|
||||||
|
|
||||||
|
target table impressions
|
||||||
|
target columns
|
||||||
|
(
|
||||||
|
id, ad_id, seen_at, site_url, cost_per_impression_usd, user_ip, user_data
|
||||||
|
)
|
||||||
|
|
||||||
|
with drop indexes,
|
||||||
|
fields optionally enclosed by '"',
|
||||||
|
fields escaped by double-quote,
|
||||||
|
fields terminated by ',';
|
Loading…
Reference in New Issue
Block a user