mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
Add initial support for Citus distribution from pgloader.
The idea is for pgloader to tweak the schema from a description of the sharding model, the distribute clause. Here's an example of such a clause: distribute company using id distribute campaign using company_id distribute ads using company_id from campaign distribute clicks using company_id from ads, campaign Given such commands, pgloader adds the distibution key to the table when needed, to the primary key definition of the table, and also to the foreign keys that are pointing to the changed primary key. Then when SELECTing the data from the source database, the idea is for pgloader to automatically JOIN the base table with the source table where to find the distribution key, in case it was just added in the schema. Finally, pgloader also calls the following Citus commands: SELECT create_distributed_table('company', 'id'); SELECT create_distributed_table('campaign', 'company_id'); SELECT create_distributed_table('ads', 'company_id'); SELECT create_distributed_table('clicks', 'company_id');
This commit is contained in:
parent
344d0ca61b
commit
381ac9d1a2
@ -69,6 +69,7 @@
|
|||||||
(:file "quoting" :depends-on ("utils"))
|
(:file "quoting" :depends-on ("utils"))
|
||||||
(:file "catalog" :depends-on ("quoting"))
|
(:file "catalog" :depends-on ("quoting"))
|
||||||
(:file "alter-table" :depends-on ("catalog"))
|
(:file "alter-table" :depends-on ("catalog"))
|
||||||
|
(:file "citus" :depends-on ("catalog"))
|
||||||
|
|
||||||
;; State, monitoring, reporting
|
;; State, monitoring, reporting
|
||||||
(:file "reject" :depends-on ("state"))
|
(:file "reject" :depends-on ("state"))
|
||||||
@ -95,6 +96,7 @@
|
|||||||
:components
|
:components
|
||||||
((:file "connection")
|
((:file "connection")
|
||||||
(:file "pgsql-ddl")
|
(:file "pgsql-ddl")
|
||||||
|
(:file "pgsql-ddl-citus")
|
||||||
(:file "pgsql-schema")
|
(:file "pgsql-schema")
|
||||||
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
|
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
|
||||||
(:file "pgsql-trigger")
|
(:file "pgsql-trigger")
|
||||||
@ -239,6 +241,7 @@
|
|||||||
(:file "command-cast-rules")
|
(:file "command-cast-rules")
|
||||||
(:file "command-materialize-views")
|
(:file "command-materialize-views")
|
||||||
(:file "command-alter-table")
|
(:file "command-alter-table")
|
||||||
|
(:file "command-distribute")
|
||||||
(:file "command-mysql")
|
(:file "command-mysql")
|
||||||
(:file "command-including-like")
|
(:file "command-including-like")
|
||||||
(:file "command-mssql")
|
(:file "command-mssql")
|
||||||
|
@ -19,7 +19,8 @@
|
|||||||
set-table-oids
|
set-table-oids
|
||||||
materialize-views
|
materialize-views
|
||||||
foreign-keys
|
foreign-keys
|
||||||
include-drop)
|
include-drop
|
||||||
|
distribute)
|
||||||
"Prepare the target PostgreSQL database: create tables casting datatypes
|
"Prepare the target PostgreSQL database: create tables casting datatypes
|
||||||
from the MySQL definitions, prepare index definitions and create target
|
from the MySQL definitions, prepare index definitions and create target
|
||||||
tables for materialized views.
|
tables for materialized views.
|
||||||
@ -114,7 +115,15 @@
|
|||||||
:use-result-as-rows t)
|
:use-result-as-rows t)
|
||||||
(create-views catalog
|
(create-views catalog
|
||||||
:include-drop include-drop
|
:include-drop include-drop
|
||||||
:client-min-messages :error))))
|
:client-min-messages :error)))
|
||||||
|
|
||||||
|
;; Citus Support
|
||||||
|
(when distribute
|
||||||
|
(with-stats-collection ("Citus Distribute Tables" :section :pre)
|
||||||
|
(let ((citus-sql
|
||||||
|
(loop :for rule :in distribute
|
||||||
|
:collect (format-create-sql rule))))
|
||||||
|
(pgsql-execute citus-sql :client-min-messages :notice)))))
|
||||||
|
|
||||||
;; log the catalog we just fetched and (maybe) merged
|
;; log the catalog we just fetched and (maybe) merged
|
||||||
(log-message :data "CATALOG: ~s" catalog))
|
(log-message :data "CATALOG: ~s" catalog))
|
||||||
@ -213,9 +222,10 @@
|
|||||||
:reset-sequences reset-sequences))))
|
:reset-sequences reset-sequences))))
|
||||||
|
|
||||||
|
|
||||||
(defun process-catalog (copy catalog &key alter-table alter-schema)
|
(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
|
||||||
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
|
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
|
||||||
rewriting, pgloader level alter schema and alter table commands."
|
rewriting, pgloader level alter schema and alter table commands."
|
||||||
|
|
||||||
;; cast the catalog into something PostgreSQL can work on
|
;; cast the catalog into something PostgreSQL can work on
|
||||||
(cast catalog)
|
(cast catalog)
|
||||||
|
|
||||||
@ -229,7 +239,11 @@
|
|||||||
;; if asked, now alter the catalog with given rules: the alter-table
|
;; if asked, now alter the catalog with given rules: the alter-table
|
||||||
;; keyword parameter actually contains a set of alter table rules.
|
;; keyword parameter actually contains a set of alter table rules.
|
||||||
(when alter-table
|
(when alter-table
|
||||||
(alter-table catalog alter-table)))
|
(alter-table catalog alter-table))
|
||||||
|
|
||||||
|
;; we also support schema changes necessary for Citus distribution
|
||||||
|
(when distribute
|
||||||
|
(pgloader.catalog::citus-distribute-schema catalog distribute)))
|
||||||
|
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
@ -256,6 +270,7 @@
|
|||||||
(foreign-keys t)
|
(foreign-keys t)
|
||||||
(reindex nil)
|
(reindex nil)
|
||||||
(after-schema nil)
|
(after-schema nil)
|
||||||
|
distribute
|
||||||
only-tables
|
only-tables
|
||||||
including
|
including
|
||||||
excluding
|
excluding
|
||||||
@ -326,7 +341,8 @@
|
|||||||
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
|
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
|
||||||
(process-catalog copy catalog
|
(process-catalog copy catalog
|
||||||
:alter-table alter-table
|
:alter-table alter-table
|
||||||
:alter-schema alter-schema)
|
:alter-schema alter-schema
|
||||||
|
:distribute distribute)
|
||||||
|
|
||||||
;; if asked, first drop/create the tables on the PostgreSQL side
|
;; if asked, first drop/create the tables on the PostgreSQL side
|
||||||
(handler-case
|
(handler-case
|
||||||
@ -341,7 +357,8 @@
|
|||||||
:include-drop include-drop
|
:include-drop include-drop
|
||||||
:foreign-keys foreign-keys
|
:foreign-keys foreign-keys
|
||||||
:set-table-oids set-table-oids
|
:set-table-oids set-table-oids
|
||||||
:materialize-views materialize-views)
|
:materialize-views materialize-views
|
||||||
|
:distribute distribute)
|
||||||
|
|
||||||
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
|
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
|
||||||
;; to run it.
|
;; to run it.
|
||||||
|
@ -98,6 +98,7 @@
|
|||||||
#:table-index-list
|
#:table-index-list
|
||||||
#:table-fkey-list
|
#:table-fkey-list
|
||||||
#:table-trigger-list
|
#:table-trigger-list
|
||||||
|
#:table-citus-rule
|
||||||
|
|
||||||
#:extension-name
|
#:extension-name
|
||||||
#:extension-schema
|
#:extension-schema
|
||||||
@ -208,6 +209,15 @@
|
|||||||
#:match-rule-action
|
#:match-rule-action
|
||||||
#:match-rule-args
|
#:match-rule-args
|
||||||
|
|
||||||
|
#:citus-reference-table
|
||||||
|
#:citus-distributed-table
|
||||||
|
#:make-citus-reference-table
|
||||||
|
#:make-citus-distributed-table
|
||||||
|
#:citus-reference-table-table
|
||||||
|
#:citus-distributed-table-table
|
||||||
|
#:citus-distributed-table-using
|
||||||
|
#:citus-distributed-table-from
|
||||||
|
|
||||||
#:format-table-name))
|
#:format-table-name))
|
||||||
|
|
||||||
(defpackage #:pgloader.state
|
(defpackage #:pgloader.state
|
||||||
|
48
src/parsers/command-distribute.lisp
Normal file
48
src/parsers/command-distribute.lisp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#|
|
||||||
|
distribute billers using id
|
||||||
|
distribute bills using biller_id
|
||||||
|
distribute receivable_accounts using biller_id
|
||||||
|
distribute payments using biller_id
|
||||||
|
|
||||||
|
distribute splits using biller_id
|
||||||
|
from receivable_accounts
|
||||||
|
|
||||||
|
distribute ach_accounts as reference table
|
||||||
|
|#
|
||||||
|
|
||||||
|
(in-package :pgloader.parser)
|
||||||
|
|
||||||
|
(defun create-table-from-dsn-table-name (dsn-table-name
|
||||||
|
&optional (schema-name "public"))
|
||||||
|
(let ((table (create-table (cdr (second dsn-table-name)))))
|
||||||
|
(unless (table-schema table)
|
||||||
|
(setf (table-schema table)
|
||||||
|
(make-schema :catalog nil
|
||||||
|
:source-name schema-name
|
||||||
|
:name (apply-identifier-case schema-name))))
|
||||||
|
table))
|
||||||
|
|
||||||
|
(defrule distribute-reference (and kw-distribute dsn-table-name
|
||||||
|
kw-as kw-reference kw-table)
|
||||||
|
(:lambda (d-r)
|
||||||
|
(make-citus-reference-table :table (create-table-from-dsn-table-name d-r))))
|
||||||
|
|
||||||
|
(defrule distribute-using (and kw-distribute dsn-table-name
|
||||||
|
kw-using maybe-quoted-namestring)
|
||||||
|
(:lambda (d-u)
|
||||||
|
(make-citus-distributed-table :table (create-table-from-dsn-table-name d-u)
|
||||||
|
:using (make-column :name (fourth d-u)))))
|
||||||
|
|
||||||
|
(defrule distribute-using-from (and kw-distribute dsn-table-name
|
||||||
|
kw-using maybe-quoted-namestring
|
||||||
|
kw-from (+ maybe-quoted-namestring))
|
||||||
|
(:lambda (d-u-f)
|
||||||
|
(make-citus-distributed-table :table (create-table-from-dsn-table-name d-u-f)
|
||||||
|
:using (make-column :name (fourth d-u-f))
|
||||||
|
:from (apply #'create-table (sixth d-u-f)))))
|
||||||
|
|
||||||
|
(defrule distribute-commands (+ (or distribute-using-from
|
||||||
|
distribute-using
|
||||||
|
distribute-reference))
|
||||||
|
(:lambda (commands)
|
||||||
|
(cons :distribute commands)))
|
@ -103,6 +103,9 @@
|
|||||||
(def-keyword-rule "trim")
|
(def-keyword-rule "trim")
|
||||||
(def-keyword-rule "unquoted")
|
(def-keyword-rule "unquoted")
|
||||||
(def-keyword-rule "delimiter")
|
(def-keyword-rule "delimiter")
|
||||||
|
;; option for Citus support
|
||||||
|
(def-keyword-rule "distribute")
|
||||||
|
(def-keyword-rule "reference")
|
||||||
;; option for MySQL imports
|
;; option for MySQL imports
|
||||||
(def-keyword-rule "schema")
|
(def-keyword-rule "schema")
|
||||||
(def-keyword-rule "schemas")
|
(def-keyword-rule "schemas")
|
||||||
|
@ -80,7 +80,8 @@
|
|||||||
decoding-tables-as
|
decoding-tables-as
|
||||||
before-load
|
before-load
|
||||||
after-schema
|
after-schema
|
||||||
after-load))
|
after-load
|
||||||
|
distribute-commands))
|
||||||
(:lambda (clauses-list)
|
(:lambda (clauses-list)
|
||||||
(alexandria:alist-plist clauses-list)))
|
(alexandria:alist-plist clauses-list)))
|
||||||
|
|
||||||
@ -109,6 +110,7 @@
|
|||||||
alter-table alter-schema
|
alter-table alter-schema
|
||||||
((:including incl))
|
((:including incl))
|
||||||
((:excluding excl))
|
((:excluding excl))
|
||||||
|
distribute
|
||||||
&allow-other-keys)
|
&allow-other-keys)
|
||||||
`(lambda ()
|
`(lambda ()
|
||||||
(let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
|
(let* ((*default-cast-rules* ',*pgsql-default-cast-rules*)
|
||||||
@ -133,6 +135,7 @@
|
|||||||
:set-table-oids t
|
:set-table-oids t
|
||||||
:on-error-stop on-error-stop
|
:on-error-stop on-error-stop
|
||||||
:after-schema ',after-schema
|
:after-schema ',after-schema
|
||||||
|
:distribute ',distribute
|
||||||
,@(remove-batch-control-option options))
|
,@(remove-batch-control-option options))
|
||||||
|
|
||||||
,(sql-code-block pg-dst-db-conn :post after "after load"))))
|
,(sql-code-block pg-dst-db-conn :post after "after load"))))
|
||||||
@ -143,7 +146,7 @@
|
|||||||
pg-dst-db-uri
|
pg-dst-db-uri
|
||||||
&key
|
&key
|
||||||
gucs casts before after after-schema options
|
gucs casts before after after-schema options
|
||||||
alter-table alter-schema
|
alter-table alter-schema distribute
|
||||||
including excluding decoding)
|
including excluding decoding)
|
||||||
source
|
source
|
||||||
(cond (*dry-run*
|
(cond (*dry-run*
|
||||||
@ -158,6 +161,7 @@
|
|||||||
:options options
|
:options options
|
||||||
:alter-table alter-table
|
:alter-table alter-table
|
||||||
:alter-schema alter-schema
|
:alter-schema alter-schema
|
||||||
|
:distribute distribute
|
||||||
:including including
|
:including including
|
||||||
:excluding excluding
|
:excluding excluding
|
||||||
:decoding decoding))))))
|
:decoding decoding))))))
|
||||||
|
18
src/pgsql/pgsql-ddl-citus.lisp
Normal file
18
src/pgsql/pgsql-ddl-citus.lisp
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
;;;
|
||||||
|
;;; PostgreSQL Citus support for calling functions.
|
||||||
|
;;;
|
||||||
|
|
||||||
|
(in-package :pgloader.pgsql)
|
||||||
|
|
||||||
|
(defmethod format-create-sql ((rule citus-reference-table)
|
||||||
|
&key (stream nil) if-not-exists)
|
||||||
|
(declare (ignore if-not-exists))
|
||||||
|
(format stream "SELECT create_reference_table('~a');"
|
||||||
|
(format-table-name (citus-reference-table-table rule))))
|
||||||
|
|
||||||
|
(defmethod format-create-sql ((rule citus-distributed-table)
|
||||||
|
&key (stream nil) if-not-exists)
|
||||||
|
(declare (ignore if-not-exists))
|
||||||
|
(format stream "SELECT create_distributed_table('~a', '~a');"
|
||||||
|
(format-table-name (citus-distributed-table-table rule))
|
||||||
|
(column-name (citus-distributed-table-using rule))))
|
@ -198,7 +198,7 @@
|
|||||||
(loop
|
(loop
|
||||||
:for (schema-name name oid
|
:for (schema-name name oid
|
||||||
table-schema table-name
|
table-schema table-name
|
||||||
primary unique sql conname condef)
|
primary unique cols sql conname condef)
|
||||||
:in (query nil
|
:in (query nil
|
||||||
(format nil
|
(format nil
|
||||||
(sql "/pgsql/list-all-indexes.sql")
|
(sql "/pgsql/list-all-indexes.sql")
|
||||||
@ -222,7 +222,7 @@
|
|||||||
:table table
|
:table table
|
||||||
:primary primary
|
:primary primary
|
||||||
:unique unique
|
:unique unique
|
||||||
:columns nil
|
:columns (split-sequence:split-sequence #\, cols)
|
||||||
:sql sql
|
:sql sql
|
||||||
:conname (unless (eq :null conname)
|
:conname (unless (eq :null conname)
|
||||||
(ensure-quoted conname))
|
(ensure-quoted conname))
|
||||||
|
@ -9,6 +9,11 @@
|
|||||||
r.relname,
|
r.relname,
|
||||||
indisprimary,
|
indisprimary,
|
||||||
indisunique,
|
indisunique,
|
||||||
|
(select string_agg(attname, ',')
|
||||||
|
from pg_attribute
|
||||||
|
where attrelid = r.oid
|
||||||
|
and array[attnum::integer] <@ indkey::integer[]
|
||||||
|
) as cols,
|
||||||
pg_get_indexdef(indexrelid),
|
pg_get_indexdef(indexrelid),
|
||||||
c.conname,
|
c.conname,
|
||||||
pg_get_constraintdef(c.oid)
|
pg_get_constraintdef(c.oid)
|
||||||
|
@ -50,7 +50,8 @@
|
|||||||
(defstruct table source-name name schema oid comment storage-parameter-list
|
(defstruct table source-name name schema oid comment storage-parameter-list
|
||||||
;; field is for SOURCE
|
;; field is for SOURCE
|
||||||
;; column is for TARGET
|
;; column is for TARGET
|
||||||
field-list column-list index-list fkey-list trigger-list)
|
;; citus is an extra slot for citus support
|
||||||
|
field-list column-list index-list fkey-list trigger-list citus-rule)
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
;;; When migrating from PostgreSQL to PostgreSQL we might have to install
|
;;; When migrating from PostgreSQL to PostgreSQL we might have to install
|
||||||
|
89
src/utils/citus.lisp
Normal file
89
src/utils/citus.lisp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
;;;
|
||||||
|
;;; Citus support in pgloader allows to declare what needs to change in the
|
||||||
|
;;; source schema in terms of Citus concepts: reference and distributed
|
||||||
|
;;; table.
|
||||||
|
;;;
|
||||||
|
|
||||||
|
#|
|
||||||
|
distribute billers using id
|
||||||
|
distribute bills using biller_id
|
||||||
|
distribute receivable_accounts using biller_id
|
||||||
|
distribute payments using biller_id
|
||||||
|
|
||||||
|
distribute splits using biller_id
|
||||||
|
from receivable_accounts
|
||||||
|
|
||||||
|
distribute ach_accounts as reference table
|
||||||
|
|#
|
||||||
|
|
||||||
|
|
||||||
|
(in-package #:pgloader.catalog)
|
||||||
|
|
||||||
|
(defstruct citus-reference-table table)
|
||||||
|
(defstruct citus-distributed-table table using from)
|
||||||
|
|
||||||
|
(defun citus-distribute-schema (catalog distribution-rules)
|
||||||
|
"Distribute a CATALOG with given user provided DISTRIBUTION-RULES."
|
||||||
|
(loop :for rule :in distribution-rules
|
||||||
|
:do (let ((table (citus-find-table catalog (citus-rule-table rule))))
|
||||||
|
(apply-citus-rule rule table))))
|
||||||
|
|
||||||
|
(defun citus-rule-table (rule)
|
||||||
|
(etypecase rule
|
||||||
|
(citus-reference-table (citus-reference-table-table rule))
|
||||||
|
(citus-distributed-table (citus-distributed-table-table rule))))
|
||||||
|
|
||||||
|
(defun citus-find-table (catalog table)
|
||||||
|
(let* ((table-name (table-name table))
|
||||||
|
(schema-name (schema-name (table-schema table))))
|
||||||
|
(find-table (find-schema catalog schema-name) table-name)))
|
||||||
|
|
||||||
|
(defgeneric apply-citus-rule (rule table)
|
||||||
|
(:documentation "Apply a Citus distribution RULE to given TABLE."))
|
||||||
|
|
||||||
|
(defmethod apply-citus-rule ((rule citus-reference-table) (table table))
|
||||||
|
;; for a reference table, we have nothing to do really.
|
||||||
|
(setf (table-citus-rule table) rule))
|
||||||
|
|
||||||
|
(defmethod apply-citus-rule ((rule citus-distributed-table) (table table))
|
||||||
|
(setf (table-citus-rule table) rule)
|
||||||
|
|
||||||
|
;; ok now we need to check if the USING column exists or if we need to add
|
||||||
|
;; it to our model
|
||||||
|
(let ((column (find (column-name (citus-distributed-table-using rule))
|
||||||
|
(table-field-list table)
|
||||||
|
:test #'string=
|
||||||
|
:key #'column-name)))
|
||||||
|
(assert (not (null column)))
|
||||||
|
|
||||||
|
(if column
|
||||||
|
|
||||||
|
;; add it to the PKEY definition, in first position
|
||||||
|
(let* ((index (find-if #'index-primary (table-index-list table)))
|
||||||
|
(idxcol (find (column-name (citus-distributed-table-using rule))
|
||||||
|
(index-columns index)
|
||||||
|
:test #'string=)))
|
||||||
|
(assert (not (null index)))
|
||||||
|
(unless idxcol
|
||||||
|
;; add a new column
|
||||||
|
(push (column-name (citus-distributed-table-using rule))
|
||||||
|
(index-columns index))
|
||||||
|
;; now remove origin schema sql and condef, we need to redo them
|
||||||
|
(setf (index-sql index) nil)
|
||||||
|
(setf (index-condef index) nil)))
|
||||||
|
|
||||||
|
;; the column doesn't exist, we need to find it in the :FROM rule
|
||||||
|
(let* ((from-table
|
||||||
|
(citus-find-table (schema-catalog (table-schema table))
|
||||||
|
(citus-distributed-table-from rule)))
|
||||||
|
(column-definition
|
||||||
|
(find (column-name (citus-distributed-table-using rule))
|
||||||
|
(table-field-list from-table)
|
||||||
|
:test #'string=
|
||||||
|
:key #'column-name)))
|
||||||
|
(assert (not (null from-table)))
|
||||||
|
(push (make-column :name (column-name column-definition)
|
||||||
|
:type-name (column-type-name column-definition)
|
||||||
|
:nullable (column-nullable column-definition)
|
||||||
|
:transform (column-transform column-definition))
|
||||||
|
(table-column-list table))))))
|
Loading…
Reference in New Issue
Block a user