From a4983130748ebab67034cba1de308061ad804e16 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 14 Sep 2017 15:40:34 +0200 Subject: [PATCH] Implement support for MySQL FULLTEXT indexes. PostgreSQL btree indexes are limited in the size of the values they can index: values must fit in an index page (8kB). So when porting a MySQL full text index over full documents, we might get into an error like the following: index row size 2872 exceeds maximum 2712 for index "idx_5199509_search" To fix, query MySQL for the index type which is FULLTEXT rather than BTREE in those cases, and port it over to a PostgreSQL Full Text index with an hard-coded 'simple' configuration, such as the following test case: CREATE INDEX idx_75421_search ON mysql.fcm_batches USING gin(to_tsvector('simple', raw_payload)); Of course users might want to use a better configuration, including proper dictionnary for the documents. When using PostgreSQL each document may have its own configuration attached and yet they can all get indexed into the same index, so that's a task for the application developpers, not for pgloader. In passing, fix the list-typenames-without-btree-support.sql query to return separate entries for each index type rather than an {array,representation} of the result, as Postmodern won't turn the PostgreSQL array into a Common Lisp array by default. I'm kept wondering how it worked before. Fix #569. --- src/package.lisp | 1 + src/pgsql/pgsql-ddl.lisp | 63 +++++++++++-------- .../list-typenames-without-btree-support.sql | 2 +- src/sources/mysql/mysql-schema.lisp | 3 +- src/sources/mysql/sql/list-all-indexes.sql | 3 +- src/utils/catalog.lisp | 3 +- 6 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/package.lisp b/src/package.lisp index 1aa7217..813614f 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -109,6 +109,7 @@ #:column-extra #:index-name + #:index-type #:index-oid #:index-schema #:index-table diff --git a/src/pgsql/pgsql-ddl.lisp b/src/pgsql/pgsql-ddl.lisp index a6f1b2a..45b123f 100644 --- a/src/pgsql/pgsql-ddl.lisp +++ b/src/pgsql/pgsql-ddl.lisp @@ -163,8 +163,7 @@ (build-identifier "_" "idx" (table-oid (index-table index)) - (index-name index)))) - (access-method (index-access-method index))) + (index-name index))))) (cond ((or (index-primary index) (and (index-condef index) (index-unique index))) @@ -195,14 +194,16 @@ (t (or (index-sql index) - (format stream - "CREATE~:[~; UNIQUE~] INDEX ~a ON ~a ~@[USING ~a~](~{~a~^, ~})~@[ WHERE ~a~];" - (index-unique index) - index-name - (format-table-name table) - access-method - (index-columns index) - (index-filter index))))))) + (multiple-value-bind (access-method expression) + (index-access-method index) + (format stream + "CREATE~:[~; UNIQUE~] INDEX ~a ON ~a ~@[USING ~a~](~{~a~^, ~})~@[ WHERE ~a~];" + (index-unique index) + index-name + (format-table-name table) + access-method + (or expression (index-columns index)) + (index-filter index)))))))) (defmethod format-drop-sql ((index index) &key (stream nil) cascade if-exists) (let* ((schema-name (schema-name (index-schema index))) @@ -225,22 +226,32 @@ point column has an index in MySQL, then create a GiST index for it in PostgreSQL." (when (= 1 (length (index-columns index))) - ;; we only process single-index columns at the moment, which is a simpler - ;; problem space and usefull enough to get started. - (let* ((idx-cols (index-columns index)) - (tbl-cols (table-column-list (index-table index))) - (idx-types (loop :for idx-col :in idx-cols - :collect (column-type-name - (find idx-col tbl-cols - :test #'string-equal - :key #'column-name)))) - (nobtree (catalog-types-without-btree - (schema-catalog (table-schema (index-table index)))))) - (let* ((idx-type (first idx-types)) - (method (when (stringp idx-type) - (cdr (assoc idx-type nobtree :test #'string=))))) - (when method - (aref method 0)))))) + (cond ((string= "FULLTEXT" (index-type index)) + ;; we have a MySQL Full Text index, so we create a GIN index + (values "gin" + (list + (format nil "to_tsvector('simple', ~a)" + (first (index-columns index)))))) + + (t + ;; we only process single-index columns at the moment, which is a + ;; simpler problem space and usefull enough to get started. + (let* ((idx-cols (index-columns index)) + (tbl-cols (table-column-list (index-table index))) + (idx-types (loop :for idx-col :in idx-cols + :collect (column-type-name + (find idx-col tbl-cols + :test #'string-equal + :key #'column-name)))) + (nobtree (catalog-types-without-btree + (schema-catalog (table-schema (index-table index)))))) + (let* ((idx-type (first idx-types)) + (method (when (stringp idx-type) + (cdr (assoc idx-type nobtree :test #'string=))))) + (when method + (values method idx-cols))))) + (t + (values))))) ;;; diff --git a/src/pgsql/sql/list-typenames-without-btree-support.sql b/src/pgsql/sql/list-typenames-without-btree-support.sql index bd55fe0..5819100 100644 --- a/src/pgsql/sql/list-typenames-without-btree-support.sql +++ b/src/pgsql/sql/list-typenames-without-btree-support.sql @@ -1,5 +1,5 @@ select typname, - array_agg(amname order by amname <> 'gist', amname <> 'gin') + (array_agg(amname order by amname <> 'gist', amname <> 'gin'))[1] from pg_type join pg_opclass on pg_opclass.opcintype = pg_type.oid join pg_am on pg_am.oid = pg_opclass.opcmethod diff --git a/src/sources/mysql/mysql-schema.lisp b/src/sources/mysql/mysql-schema.lisp index 0e79a31..8559ccd 100644 --- a/src/sources/mysql/mysql-schema.lisp +++ b/src/sources/mysql/mysql-schema.lisp @@ -116,7 +116,7 @@ excluding) "Get the list of MySQL index definitions per table." (loop - :for (table-name name non-unique cols) + :for (table-name name index-type non-unique cols) :in (mysql-query (format nil (sql "/mysql/list-all-indexes.sql") (db-name *connection*) @@ -131,6 +131,7 @@ (make-index :name name ; further processing is needed :schema schema :table table + :type index-type :primary (string= name "PRIMARY") :unique (string= "0" non-unique) :columns (mapcar diff --git a/src/sources/mysql/sql/list-all-indexes.sql b/src/sources/mysql/sql/list-all-indexes.sql index 2d3fd32..49b4218 100644 --- a/src/sources/mysql/sql/list-all-indexes.sql +++ b/src/sources/mysql/sql/list-all-indexes.sql @@ -6,7 +6,8 @@ -- filter-list-to-where-clause incuding -- excluding -- filter-list-to-where-clause excluding - SELECT table_name, index_name, sum(non_unique), + SELECT table_name, index_name, index_type, + sum(non_unique), cast(GROUP_CONCAT(column_name order by seq_in_index) as char) FROM information_schema.statistics WHERE table_schema = '~a' diff --git a/src/utils/catalog.lisp b/src/utils/catalog.lisp index bdf9b96..b2e24c7 100644 --- a/src/utils/catalog.lisp +++ b/src/utils/catalog.lisp @@ -81,7 +81,8 @@ ;;; install them again at proper times. ;;; (defstruct index - name oid schema table primary unique columns sql conname condef filter fk-deps) + name oid schema table type primary unique + columns sql conname condef filter fk-deps) ;;; ;;; Triggers and trigger procedures, no args support (yet?)