Implement resetting all sequences at the end of the streaming.

This commit is contained in:
Dimitri Fontaine 2013-02-26 17:06:26 +01:00
parent 8042dff854
commit f4ac2b67db
3 changed files with 58 additions and 4 deletions

View File

@ -421,8 +421,11 @@ GROUP BY table_name, index_name;" dbname)))
(let ((*standard-output* s) (*error-output* s))
(setf res ,@body)))
res))
(pgstate-incf ,state ,table-name :rows res :secs secs))
(report-pgtable-stats ,state ,table-name)))
(pgstate-incf ,state ,table-name :rows res :secs secs)
(report-pgtable-stats ,state ,table-name)
;; once those numbers are reporting, forget about them in the total
(pgstate-decf ,state ,table-name :rows res))))
(defun stream-database (dbname
&key
@ -469,6 +472,11 @@ GROUP BY table_name, index_name;" dbname)))
(with-silent-timing *state* dbname
(format nil "~:[~;DROP then ~]CREATE INDEXES" include-drop)
(pgsql-create-indexes dbname :include-drop include-drop)))
;; don't forget to reset sequences
(with-silent-timing *state* dbname "RESET SEQUENCES"
(pgloader.pgsql:reset-all-sequences dbname))
;; and report the total time spent on the operation
(report-pgstate-stats *state* "Total streaming time"))))

View File

@ -123,6 +123,7 @@
#:list-databases
#:list-tables
#:list-tables-cols
#:reset-all-sequences
#:execute
#:get-date-columns
#:format-row))

View File

@ -12,8 +12,8 @@
(defparameter *copy-batch-split* 5
"Number of batches in which to split a batch with bad data")
(defparameter *pgconn*
'("gdb" "none" "localhost" :port 5432)
(defvar *pgconn*
'("dim" "none" "localhost" :port 5432)
"Connection string to the local database")
;(setq *pgconn* '("dim" "none" "localhost" :port 54393))
@ -81,6 +81,51 @@ select relname, array_agg(case when typname in ('date', 'timestamptz')
")
collect (cons relname cols))))
(defun reset-all-sequences (dbname)
"Reset all sequences to the max value of the column they are attached to."
(pomo:with-connection
(get-connection-string dbname)
(pomo:with-transaction ()
;; have the processing all happen server-side
(pomo:execute "
DO $$
DECLARE
r record;
BEGIN
FOR r in
SELECT 'select '
|| trim(trailing ')'
from replace(pg_get_expr(d.adbin, d.adrelid),
'nextval', 'setval'))
|| ', (select max(' || a.attname || ') from only '
|| nspname || '.' || relname || '));' as sql
FROM pg_class c
JOIN pg_namespace n on n.oid = c.relnamespace
JOIN pg_attribute a on a.attrelid = c.oid
JOIN pg_attrdef d on d.adrelid = a.attrelid
and d.adnum = a.attnum
and a.atthasdef
WHERE relkind = 'r' and a.attnum > 0
and pg_get_expr(d.adbin, d.adrelid) ~ '^nextval'
LOOP
EXECUTE r.sql;
END LOOP;
END;
$$; ")
;; we still insist on returning how many sequences were reset
(pomo:query "
SELECT count(*)
FROM pg_class c
JOIN pg_namespace n on n.oid = c.relnamespace
JOIN pg_attribute a on a.attrelid = c.oid
JOIN pg_attrdef d on d.adrelid = a.attrelid
and d.adnum = a.attnum
and a.atthasdef
WHERE relkind = 'r' and a.attnum > 0
and pg_get_expr(d.adbin, d.adrelid) ~ '^nextval'"
:single))))
(defun get-date-columns (table-name pgsql-table-list)
"Given a PGSQL-TABLE-LIST as per function list-tables, return a list of
row numbers containing dates (those have to be reformated)"