From 42e9e521e07e14a10dc8f70e1db162b69e389572 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 11 Apr 2016 17:40:52 +0200 Subject: [PATCH] Add option "max parallel create index". By default, pgloader will start as many parallel CREATE INDEX commands as the maximum number of indexes you have on any single table that takes part in the load. As this number might be so great as to exhaust the target PostgreSQL server (e.g. maintenance_work_mem), we add an option to limit that to something reasonnable when the source schema isn't. Fix #386 in which 150 indexes are found on a single source table. --- pgloader.1 | 8 +++++++- pgloader.1.md | 6 ++++++ src/parsers/command-copy.lisp | 1 + src/parsers/command-csv.lisp | 1 + src/parsers/command-fixed.lisp | 1 + src/parsers/command-keywords.lisp | 2 ++ src/parsers/command-mssql.lisp | 1 + src/parsers/command-mysql.lisp | 1 + src/parsers/command-options.lisp | 6 ++++++ src/parsers/command-sqlite.lisp | 1 + src/pgsql/schema.lisp | 9 +++++++-- src/sources/common/api.lisp | 1 + src/sources/common/db-methods.lisp | 4 +++- src/sources/common/md-methods.lisp | 3 +++ test/sakila.load | 3 ++- 15 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pgloader.1 b/pgloader.1 index d3182b4..6c24293 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -1,7 +1,7 @@ .\" generated with Ronn/v0.7.3 .\" http://github.com/rtomayko/ronn/tree/0.7.3 . -.TH "PGLOADER" "1" "March 2016" "ff" "" +.TH "PGLOADER" "1" "April 2016" "ff" "" . .SH "NAME" \fBpgloader\fR \- PostgreSQL data loader @@ -498,6 +498,9 @@ With \fBworkers = 8, concurrency = 1\fR, we then are able to work on several uni .P As the \fBCREATE INDEX\fR threads started by pgloader are only waiting until PostgreSQL is done with the real work, those threads are \fINOT\fR counted into the concurrency levels as detailed here\. . +.P +By default, as many \fBCREATE INDEX\fR threads as the maximum number of indexes per table are found in your source schema\. It is possible to set the \fBmax parallel create index\fR \fIWITH\fR option to another number in case there\'s just too many of them to create\. +. .SH "SOURCE FORMATS" pgloader supports the following input formats: . @@ -635,6 +638,9 @@ In addition, the following settings are available: .IP "\(bu" 4 \fIconcurrency = C\fR . +.IP "\(bu" 4 +\fImax parallel create index = I\fR +. .IP "" 0 . .IP diff --git a/pgloader.1.md b/pgloader.1.md index 86d044d..4c10bf6 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -449,6 +449,11 @@ As the `CREATE INDEX` threads started by pgloader are only waiting until PostgreSQL is done with the real work, those threads are *NOT* counted into the concurrency levels as detailed here. +By default, as many `CREATE INDEX` threads as the maximum number of indexes +per table are found in your source schema. It is possible to set the `max +parallel create index` *WITH* option to another number in case there's just +too many of them to create. + ## SOURCE FORMATS pgloader supports the following input formats: @@ -568,6 +573,7 @@ Some clauses are common to all commands: - *workers = W* - *concurrency = C* + - *max parallel create index = I* See section A NOTE ABOUT PARALLELISM for more details. diff --git a/src/parsers/command-copy.lisp b/src/parsers/command-copy.lisp index 2ab5559..997522f 100644 --- a/src/parsers/command-copy.lisp +++ b/src/parsers/command-copy.lisp @@ -38,6 +38,7 @@ option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-drop-indexes option-disable-triggers diff --git a/src/parsers/command-csv.lisp b/src/parsers/command-csv.lisp index 75271b9..2dec5e2 100644 --- a/src/parsers/command-csv.lisp +++ b/src/parsers/command-csv.lisp @@ -108,6 +108,7 @@ option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-disable-triggers option-drop-indexes diff --git a/src/parsers/command-fixed.lisp b/src/parsers/command-fixed.lisp index dc25809..92a2cb7 100644 --- a/src/parsers/command-fixed.lisp +++ b/src/parsers/command-fixed.lisp @@ -48,6 +48,7 @@ option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-drop-indexes option-disable-triggers diff --git a/src/parsers/command-keywords.lisp b/src/parsers/command-keywords.lisp index 375eb75..a4c2e2a 100644 --- a/src/parsers/command-keywords.lisp +++ b/src/parsers/command-keywords.lisp @@ -54,6 +54,8 @@ (def-keyword-rule "rows") (def-keyword-rule "size") (def-keyword-rule "concurrency") + (def-keyword-rule "max") + (def-keyword-rule "parallel") (def-keyword-rule "reject") (def-keyword-rule "file") (def-keyword-rule "log") diff --git a/src/parsers/command-mssql.lisp b/src/parsers/command-mssql.lisp index 4a792fe..a9688d0 100644 --- a/src/parsers/command-mssql.lisp +++ b/src/parsers/command-mssql.lisp @@ -19,6 +19,7 @@ option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-disable-triggers option-data-only diff --git a/src/parsers/command-mysql.lisp b/src/parsers/command-mysql.lisp index 51eb53e..5f6fb72 100644 --- a/src/parsers/command-mysql.lisp +++ b/src/parsers/command-mysql.lisp @@ -12,6 +12,7 @@ option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-disable-triggers option-data-only diff --git a/src/parsers/command-options.lisp b/src/parsers/command-options.lisp index 88c9e35..54d6b25 100644 --- a/src/parsers/command-options.lisp +++ b/src/parsers/command-options.lisp @@ -37,6 +37,12 @@ (bind (((_ _ nb) concurrency)) (cons :concurrency (parse-integer (text nb)))))) +(defrule option-max-parallel-create-index + (and kw-max kw-parallel kw-create kw-index equal-sign + (+ (digit-char-p character))) + (:lambda (opt) + (cons :max-parallel-create-index (parse-integer (text (sixth opt)))))) + (defrule option-batch-rows (and kw-batch kw-rows equal-sign (+ (digit-char-p character))) (:lambda (batch-rows) diff --git a/src/parsers/command-sqlite.lisp b/src/parsers/command-sqlite.lisp index 967ddf9..d3d8f68 100644 --- a/src/parsers/command-sqlite.lisp +++ b/src/parsers/command-sqlite.lisp @@ -18,6 +18,7 @@ load database option-batch-rows option-batch-size option-batch-concurrency + option-max-parallel-create-index option-truncate option-disable-triggers option-data-only diff --git a/src/pgsql/schema.lisp b/src/pgsql/schema.lisp index e72829a..6866ac5 100644 --- a/src/pgsql/schema.lisp +++ b/src/pgsql/schema.lisp @@ -473,14 +473,19 @@ (with-stats-collection ("drop indexes" :section section) (drop-indexes section table))))))) -(defun create-indexes-again (target table &key (section :post) drop-indexes) +(defun create-indexes-again (target table + &key + max-parallel-create-index + (section :post) + drop-indexes) "Create the indexes that we dropped previously." (when (and (table-index-list table) drop-indexes) (let* ((*preserve-index-names* t) ;; we get the list of indexes from PostgreSQL catalogs, so don't ;; question their spelling, just quote them. (*identifier-case* :quote) - (idx-kernel (make-kernel (count-indexes table))) + (idx-kernel (make-kernel (or max-parallel-create-index + (count-indexes table)))) (idx-channel (let ((lp:*kernel* idx-kernel)) (lp:make-channel)))) (let ((pkeys diff --git a/src/sources/common/api.lisp b/src/sources/common/api.lisp index c990bb3..7dc4b72 100644 --- a/src/sources/common/api.lisp +++ b/src/sources/common/api.lisp @@ -79,6 +79,7 @@ &key worker-count concurrency + max-parallel-create-index truncate data-only schema-only diff --git a/src/sources/common/db-methods.lisp b/src/sources/common/db-methods.lisp index 599ee64..a865069 100644 --- a/src/sources/common/db-methods.lisp +++ b/src/sources/common/db-methods.lisp @@ -139,6 +139,7 @@ (on-error-stop *on-error-stop*) (worker-count 4) (concurrency 1) + max-parallel-create-index (truncate nil) (disable-triggers nil) (data-only nil) @@ -178,7 +179,8 @@ (max-indexes (when create-indexes (max-indexes-per-table catalog))) (idx-kernel (when (and max-indexes (< 0 max-indexes)) - (make-kernel max-indexes))) + (make-kernel (or max-parallel-create-index + max-indexes)))) (idx-channel (when idx-kernel (let ((lp:*kernel* idx-kernel)) (lp:make-channel))))) diff --git a/src/sources/common/md-methods.lisp b/src/sources/common/md-methods.lisp index 5ec8e8b..6d670c8 100644 --- a/src/sources/common/md-methods.lisp +++ b/src/sources/common/md-methods.lisp @@ -76,6 +76,8 @@ disable-triggers drop-indexes + max-parallel-create-index + ;; generic API, but ignored here (worker-count 4) (concurrency 1) @@ -146,4 +148,5 @@ ;; re-create the indexes from the target table entry (create-indexes-again (target-db copy) (target copy) + :max-parallel-create-index max-parallel-create-index :drop-indexes drop-indexes)) diff --git a/test/sakila.load b/test/sakila.load index ee85b7a..6f0f8e6 100644 --- a/test/sakila.load +++ b/test/sakila.load @@ -7,7 +7,8 @@ load database -- WITH batch rows = 10000 - WITH concurrency = 1, workers = 6 + WITH concurrency = 1, workers = 6, + max parallel create index = 4 SET maintenance_work_mem to '128MB', work_mem to '12MB',