Implement support for non-btree indexes (e.g. MySQL spatial keys).

When pgloader fetches the index list from a source database, it doesn't
fetch information about access methods for the indexes: I don't even know if
the overlap in between index access methods from one RDMBS to another covers
more than just btree...

It could happen that MySQL indexes a "geometry" column tho. This datatype is
converted automatically to "point" by pgloader, which is good. But the index
creation would fail with the following error message:

  Database error 42704: data type point has no default operator class for access method "btree"

In this patch when setting up the target schema we issue a PostgreSQL
catalog query to dynamically list those datatypes without btree support and
fetch their opclasses, with an hard-coded preference to GiST, then GIN, so
as to be able to automatically use the proper access method when btree isn't
available. And now pgloader transparently issues the proper statement:

  CREATE INDEX idx_168468_idx_location ON pagila.address USING gist(location);

Currently this exploration is limited to indexes with a single column. To
implement the general case we would need a more complex lookup: we would
have to find the intersection of all the supported access methods for all
involved columns.

Of course we might need to do that someday. One step at a time is plenty
good enough tho.
This commit is contained in:
Dimitri Fontaine 2017-07-06 00:42:43 +02:00
parent 8405c331a9
commit 26d372bca3
5 changed files with 55 additions and 3 deletions

View File

@ -71,6 +71,7 @@
#:catalog-name
#:catalog-schema-list
#:catalog-types-without-btree
#:schema-name
#:schema-catalog
@ -410,6 +411,7 @@
;; postgresql identifiers
#:list-reserved-keywords
#:list-typenames-without-btree-support
;; postgresql user provided gucs
#:sanitize-user-gucs

View File

@ -338,6 +338,29 @@
;;;
;;; PostgreSQL version specific support, that we get once connected
;;;
(defun list-typenames-without-btree-support ()
"Fetch PostgresQL data types without btree support, so that it's possible
to later CREATE INDEX ... ON ... USING gist(...), or even something else
than gist. "
(loop :for (typename access-methods) :in
(pomo:query "
select typname,
array_agg(amname order by amname <> 'gist', amname <> 'gin')
from pg_type
join pg_opclass on pg_opclass.opcintype = pg_type.oid
join pg_am on pg_am.oid = pg_opclass.opcmethod
where substring(typname from 1 for 1) <> '_'
and not exists
(
select amname
from pg_am am
join pg_opclass c on am.oid = c.opcmethod
join pg_type t on c.opcintype = t.oid
where amname = 'btree' and t.oid = pg_type.oid
)
group by typname;")
:collect (cons typename access-methods)))
(defun list-reserved-keywords (pgconn)
"Connect to PostgreSQL DBNAME and fetch reserved keywords."
(handler-case

View File

@ -153,7 +153,8 @@
(build-identifier "_"
"idx"
(table-oid (index-table index))
(index-name index)))))
(index-name index))))
(access-method (index-access-method index)))
(cond
((or (index-primary index)
(and (index-condef index) (index-unique index)))
@ -185,10 +186,11 @@
(t
(or (index-sql index)
(format stream
"CREATE~:[~; UNIQUE~] INDEX ~a ON ~a (~{~a~^, ~})~@[ WHERE ~a~];"
"CREATE~:[~; UNIQUE~] INDEX ~a ON ~a ~@[USING ~a~](~{~a~^, ~})~@[ WHERE ~a~];"
(index-unique index)
index-name
(format-table-name table)
access-method
(index-columns index)
(index-filter index)))))))
@ -214,6 +216,28 @@
(format stream "DROP INDEX~:[~; IF EXISTS~] ~@[~s.~]~s~@[ CASCADE~];"
if-exists schema-name index-name cascade)))))
(defun index-access-method (index)
"Compute PostgreSQL access method for index. If defaults to btree, but
some types such as POINTS or BOX have no support for btree. If a MySQL
point column has an index in MySQL, then create a GiST index for it in
PostgreSQL."
(when (= 1 (length (index-columns index)))
;; we only process single-index columns at the moment, which is a simpler
;; problem space and usefull enough to get started.
(let* ((idx-cols (index-columns index))
(tbl-cols (table-column-list (index-table index)))
(idx-types (loop :for idx-col :in idx-cols
:collect (column-type-name
(find idx-col tbl-cols
:test #'string-equal
:key #'column-name))))
(nobtree (catalog-types-without-btree
(schema-catalog (table-schema (index-table index))))))
(let* ((idx-type (first idx-types))
(method (cdr (assoc idx-type nobtree :test #'string=))))
(when method
(aref method 0))))))
;;;
;;; Foreign Keys

View File

@ -27,6 +27,9 @@
(log-message :notice "Prepare PostgreSQL database.")
(with-pgsql-transaction (:pgconn (target-db copy))
(setf (catalog-types-without-btree catalog)
(list-typenames-without-btree-support))
(when create-schemas
(with-stats-collection ("Create Schemas" :section :pre
:use-result-as-read t

View File

@ -42,7 +42,7 @@
;;; Column structures details depend on the specific source type and are
;;; implemented in each source separately.
;;;
(defstruct catalog name schema-list)
(defstruct catalog name schema-list types-without-btree)
(defstruct schema source-name name catalog table-list view-list)
(defstruct table source-name name schema oid comment storage-parameter-list
;; field is for SOURCE