pgloader/pgloader.asd
Dimitri Fontaine 381ac9d1a2 Add initial support for Citus distribution from pgloader.
The idea is for pgloader to tweak the schema from a description of the
sharding model, the distribute clause. Here's an example of such a clause:

   distribute company using id
   distribute campaign using company_id
   distribute ads using company_id from campaign
   distribute clicks using company_id from ads, campaign

Given such commands, pgloader adds the distibution key to the table when
needed, to the primary key definition of the table, and also to the foreign
keys that are pointing to the changed primary key.

Then when SELECTing the data from the source database, the idea is for
pgloader to automatically JOIN the base table with the source table where to
find the distribution key, in case it was just added in the schema.

Finally, pgloader also calls the following Citus commands:

  SELECT create_distributed_table('company', 'id');
  SELECT create_distributed_table('campaign', 'company_id');
  SELECT create_distributed_table('ads', 'company_id');
  SELECT create_distributed_table('clicks', 'company_id');
2018-10-10 14:35:12 -07:00

276 lines
12 KiB
Common Lisp

;;;; pgloader.asd
(asdf:defsystem #:pgloader
:serial t
:description "Load data into PostgreSQL"
:author "Dimitri Fontaine <dim@tapoueh.org>"
:license "The PostgreSQL Licence"
:depends-on (#:uiop ; host system integration
#:cl-log ; logging
#:postmodern ; PostgreSQL protocol implementation
#:cl-postgres ; low level bits for COPY streaming
#:simple-date ; FIXME: recheck dependency
#:qmynd ; MySQL protocol implemenation
#:split-sequence ; some parsing is made easy
#:cl-csv ; full CSV reader
#:cl-fad ; file and directories
#:lparallel ; threads, workers, queues
#:esrap ; parser generator
#:alexandria ; utils
#:drakma ; http client, download archives
#:flexi-streams ; streams
#:usocket ; UDP / syslog
#:local-time ; UDP date parsing
#:command-line-arguments ; for the main function
#:abnf ; ABNF parser generator (for syslog)
#:db3 ; DBF version 3 file reader
#:ixf ; IBM IXF file format reader
#:py-configparser ; Read old-style INI config files
#:sqlite ; Query a SQLite file
#:cl-base64 ; Decode base64 data
#:trivial-backtrace ; For --debug cli usage
#:cl-markdown ; To produce the website
#:metabang-bind ; the bind macro
#:mssql ; M$ SQL connectivity
#:uuid ; Transforming MS SQL unique identifiers
#:quri ; decode URI parameters
#:cl-ppcre ; Perl Compatible Regular Expressions
#:cl-mustache ; Logic-less templates
#:yason ; JSON routines
#:closer-mop ; introspection
#:zs3 ; integration with AWS S3 for Redshift
)
:components
((:module "src"
:components
((:file "params")
(:file "package" :depends-on ("params"))
(:module "monkey"
:components
((:file "bind")
(:file "mssql")))
(:module "utils"
:depends-on ("package" "params")
:components
((:file "charsets")
(:file "logs")
(:file "utils")
(:file "state")
;; user defined transforms package and pgloader
;; provided ones
(:file "transforms")
;; PostgreSQL related utils
(:file "read-sql-files")
(:file "queries")
(:file "quoting" :depends-on ("utils"))
(:file "catalog" :depends-on ("quoting"))
(:file "alter-table" :depends-on ("catalog"))
(:file "citus" :depends-on ("catalog"))
;; State, monitoring, reporting
(:file "reject" :depends-on ("state"))
(:file "pretty-print-state" :depends-on ("state"))
(:file "report" :depends-on ("state"
"pretty-print-state"
"utils"
"catalog"))
(:file "monitor" :depends-on ("logs"
"state"
"reject"
"report"))
(:file "threads" :depends-on ("monitor"))
(:file "archive" :depends-on ("monitor"))
;; generic connection api
(:file "connection" :depends-on ("monitor"
"archive"))))
;; package pgloader.pgsql
(:module pgsql
:depends-on ("package" "params" "utils")
:serial t
:components
((:file "connection")
(:file "pgsql-ddl")
(:file "pgsql-ddl-citus")
(:file "pgsql-schema")
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
(:file "pgsql-trigger")
(:file "pgsql-index-filter")
(:file "pgsql-finalize-catalogs")
(:file "pgsql-create-schema"
:depends-on ("pgsql-trigger"))))
;; Source format specific implementations
(:module sources
:depends-on ("monkey" ; mssql driver patches
"params"
"package"
"pgsql"
"utils")
:components
((:module "common"
:serial t
:components
((:file "api")
(:file "methods")
(:file "md-methods")
(:file "casting-rules")
(:file "files-and-pathnames")
(:file "project-fields")))
(:module "csv"
:depends-on ("common")
:components
((:file "csv-guess")
;; (:file "csv-database")
(:file "csv")))
(:file "fixed"
:depends-on ("common" "csv"))
(:file "copy"
:depends-on ("common" "csv"))
(:module "db3"
:depends-on ("common" "csv")
:components
((:file "db3-schema")
(:file "db3" :depends-on ("db3-schema"))))
(:module "ixf"
:depends-on ("common")
:components
((:file "ixf-schema")
(:file "ixf" :depends-on ("ixf-schema"))))
;(:file "syslog") ; experimental...
(:module "sqlite"
:serial t
:depends-on ("common")
:components
((:file "sqlite-cast-rules")
(:file "sqlite-schema")
(:file "sqlite")))
(:module "mssql"
:serial t
:depends-on ("common")
:components
((:file "mssql-cast-rules")
(:file "mssql-schema")
(:file "mssql")
(:file "mssql-index-filters")))
(:module "mysql"
:serial t
:depends-on ("common")
:components
((:file "mysql-cast-rules")
(:file "mysql-connection")
(:file "mysql-schema")
(:file "mysql")))
(:module "pgsql"
:serial t
:depends-on ("common")
:components ((:file "pgsql-cast-rules")
(:file "pgsql")))))
;; package pgloader.copy
(:module "pg-copy"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources")
:serial t
:components
((:file "copy-batch")
(:file "copy-format")
(:file "copy-db-write")
(:file "copy-rows-in-stream")
(:file "copy-rows-in-batch")
(:file "copy-rows-in-batch-through-s3")
(:file "copy-retry-batch")
(:file "copy-from-queue")))
(:module "load"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources")
:serial t
:components
((:file "api")
(:file "copy-data")
(:file "load-file")
(:file "migrate-database")))
(:module "parsers"
:depends-on ("params"
"package"
"utils"
"pgsql"
"sources"
"monkey")
:serial t
:components
((:file "parse-ini")
(:file "template")
(:file "command-utils")
(:file "command-keywords")
(:file "command-regexp")
(:file "parse-pgpass")
(:file "command-db-uri")
(:file "command-source")
(:file "command-options")
(:file "command-sql-block")
(:file "command-sexp")
(:file "command-csv")
(:file "command-ixf")
(:file "command-fixed")
(:file "command-copy")
(:file "command-dbf")
(:file "command-cast-rules")
(:file "command-materialize-views")
(:file "command-alter-table")
(:file "command-distribute")
(:file "command-mysql")
(:file "command-including-like")
(:file "command-mssql")
(:file "command-sqlite")
(:file "command-pgsql")
(:file "command-archive")
(:file "command-parser")
(:file "parse-sqlite-type-name")
(:file "date-format")))
;; the main entry file, used when building a stand-alone
;; executable image
(:file "api" :depends-on ("params"
"package"
"utils"
"parsers"
"sources"))
(:module "regress"
:depends-on ("params" "package" "utils" "pgsql" "api")
:components ((:file "regress")))
(:file "main" :depends-on ("params"
"package"
"utils"
"parsers"
"sources"
"api"
"regress"))))))