mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
Add a Citus distribution test case, from the citus tutorial.
This commit is contained in:
parent
d3b21ac54d
commit
7b487ddaca
1
test/citus/.gitignore
vendored
Normal file
1
test/citus/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.csv
|
20
test/citus/Makefile
Normal file
20
test/citus/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
DATASET = companies campaigns ads clicks impressions geo_ips
|
||||
CSV = $(addsuffix .csv,$(DATASET))
|
||||
DROP = DROP TABLE IF EXISTS companies, campaigns, ads, clicks, impressions, geo_ips
|
||||
|
||||
all: schema data ;
|
||||
|
||||
schema:
|
||||
psql --single-transaction -c "$(DROP)" -d hackathon
|
||||
psql --single-transaction -f company.sql -d hackathon
|
||||
|
||||
data: fetch
|
||||
psql -f copy.sql -d hackathon
|
||||
../../build/bin/pgloader ./data.load
|
||||
|
||||
fetch: $(CSV) ;
|
||||
|
||||
%.csv:
|
||||
curl -O https://examples.citusdata.com/mt_ref_arch/$@
|
||||
|
||||
.PHONY: schema data fetch
|
42
test/citus/README.md
Normal file
42
test/citus/README.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Citus Multi-Tenant Automatic Distribution
|
||||
|
||||
In this test case we follow the following documentation:
|
||||
|
||||
https://docs.citusdata.com/en/v7.5/use_cases/multi_tenant.html
|
||||
|
||||
We install the schema before Citus migration, and load the data without the
|
||||
backfilling that is already done. For that we use pgloader to ignore the
|
||||
company_id column in the tables that didn't have this column prior to the
|
||||
Citus migration effort.
|
||||
|
||||
Then the following `company.load` file contains the pgloader command that
|
||||
runs a full migration from PostgreSQL to Citus:
|
||||
|
||||
```
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
distribute companies using id;
|
||||
```
|
||||
|
||||
Tables are marked distributed, the company_id column is added where it's
|
||||
needed, primary keys and foreign keys definitions are altered to the new
|
||||
model, and finally the data is backfilled automatically in the target table
|
||||
thanks to generating queries like the following:
|
||||
|
||||
~~~
|
||||
SELECT "campaigns".company_id::text,
|
||||
"impressions".id::text,
|
||||
"impressions".ad_id::text,
|
||||
"impressions".seen_at::text,
|
||||
"impressions".site_url::text,
|
||||
"impressions".cost_per_impression_usd::text,
|
||||
"impressions".user_ip::text,
|
||||
"impressions".user_data::text
|
||||
FROM "public"."impressions"
|
||||
JOIN "public"."ads" ON impressions.ad_id = ads.id
|
||||
JOIN "public"."campaigns" ON ads.campaign_id = campaigns.id
|
||||
~~~
|
12
test/citus/company.load
Normal file
12
test/citus/company.load
Normal file
@ -0,0 +1,12 @@
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
distribute companies using id
|
||||
-- distribute campaigns using company_id
|
||||
-- distribute ads using company_id from campaigns
|
||||
-- distribute clicks using company_id from ads, campaigns
|
||||
-- distribute impressions using company_id from ads, campaigns
|
||||
;
|
51
test/citus/company.sql
vendored
Normal file
51
test/citus/company.sql
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
CREATE TABLE companies (
|
||||
id bigserial PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
image_url text,
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE campaigns (
|
||||
id bigserial PRIMARY KEY,
|
||||
company_id bigint REFERENCES companies (id),
|
||||
name text NOT NULL,
|
||||
cost_model text NOT NULL,
|
||||
state text NOT NULL,
|
||||
monthly_budget bigint,
|
||||
blacklisted_site_urls text[],
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE ads (
|
||||
id bigserial PRIMARY KEY,
|
||||
campaign_id bigint REFERENCES campaigns (id),
|
||||
name text NOT NULL,
|
||||
image_url text,
|
||||
target_url text,
|
||||
impressions_count bigint DEFAULT 0,
|
||||
clicks_count bigint DEFAULT 0,
|
||||
created_at timestamp without time zone NOT NULL,
|
||||
updated_at timestamp without time zone NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE clicks (
|
||||
id bigserial PRIMARY KEY,
|
||||
ad_id bigint REFERENCES ads (id),
|
||||
clicked_at timestamp without time zone NOT NULL,
|
||||
site_url text NOT NULL,
|
||||
cost_per_click_usd numeric(20,10),
|
||||
user_ip inet NOT NULL,
|
||||
user_data jsonb NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE impressions (
|
||||
id bigserial PRIMARY KEY,
|
||||
ad_id bigint REFERENCES ads (id),
|
||||
seen_at timestamp without time zone NOT NULL,
|
||||
site_url text NOT NULL,
|
||||
cost_per_impression_usd numeric(20,10),
|
||||
user_ip inet NOT NULL,
|
||||
user_data jsonb NOT NULL
|
||||
);
|
5
test/citus/copy.sql
vendored
Normal file
5
test/citus/copy.sql
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
\copy companies from 'companies.csv' with csv
|
||||
\copy campaigns from 'campaigns.csv' with csv
|
||||
-- \copy ads from 'ads.csv' with csv
|
||||
-- \copy clicks from 'clicks.csv' with csv
|
||||
-- \copy impressions from 'impressions.csv' with csv
|
68
test/citus/data.load
Normal file
68
test/citus/data.load
Normal file
@ -0,0 +1,68 @@
|
||||
--
|
||||
-- Ads
|
||||
--
|
||||
load csv
|
||||
from ads.csv
|
||||
(
|
||||
id, company_id, campaign_id, name, image_url, target_url,
|
||||
impressions_count, clicks_count, created_at, updated_at
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table ads
|
||||
target columns
|
||||
(
|
||||
id, campaign_id, name, image_url, target_url,
|
||||
impressions_count, clicks_count, created_at, updated_at
|
||||
)
|
||||
|
||||
with fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
||||
|
||||
--
|
||||
-- Clicks
|
||||
--
|
||||
load csv
|
||||
from clicks.csv
|
||||
(
|
||||
id, company_id, ad_id, clicked_at, site_url, cost_per_click_usd,
|
||||
user_ip, user_data
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table clicks
|
||||
target columns
|
||||
(
|
||||
id, ad_id, clicked_at, site_url, cost_per_click_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
with fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
||||
|
||||
|
||||
--
|
||||
-- Impressions
|
||||
--
|
||||
load csv
|
||||
from impressions.csv
|
||||
(
|
||||
id, company_id, ad_id, seen_at, site_url,
|
||||
cost_per_impression_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
into postgresql:///hackathon
|
||||
|
||||
target table impressions
|
||||
target columns
|
||||
(
|
||||
id, ad_id, seen_at, site_url, cost_per_impression_usd, user_ip, user_data
|
||||
)
|
||||
|
||||
with drop indexes,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ',';
|
Loading…
Reference in New Issue
Block a user