From 5e18cfd7d451bcd22f9d90142f2f98f8614fa323 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 21 Mar 2016 23:39:45 +0100 Subject: [PATCH] Implement support for partial indexes. MS SQL has a notion of a "filtered index" that matches the notion of a PostgreSQL partial index: the index only applies to the rows matching the index WHERE clause, or filter. The WHERE clause in both case are limited to simple expressions over a base table's row at a time, so we implement a limited WHERE clause parser for MS SQL filters and a transformation routine to rewrite the clause in PostgreSQL slang. In passing, we transform the filter constants using the same transformation functions as in the CAST rules, so that e.g. a MS SQL bit(1) value that got transformed into a PostgreSQL boolean is properly translated, as in the following example: MS SQL: "([deleted]=(0))" (that's from the catalogs) PostgreSQL: deleted = 'f' Of course the parser is still very badly tested, let's see what happens in the wild now. (Should) Fix #365. --- pgloader.asd | 4 +- src/package.lisp | 11 ++ src/pgsql/schema.lisp | 48 +++++- src/sources/common/db-methods.lisp | 5 +- src/sources/mssql/mssql-index-filters.lisp | 162 +++++++++++++++++++++ src/sources/mssql/mssql-schema.lisp | 8 +- 6 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 src/sources/mssql/mssql-index-filters.lisp diff --git a/pgloader.asd b/pgloader.asd index 0bdf198..ea56386 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -190,7 +190,9 @@ :depends-on ("mssql-cast-rules")) (:file "mssql" :depends-on ("mssql-cast-rules" - "mssql-schema")))) + "mssql-schema")) + (:file "mssql-index-filters" + :depends-on ("mssql")))) (:module "mysql" :depends-on ("common") diff --git a/src/package.lisp b/src/package.lisp index 0631394..eb360fb 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -367,8 +367,12 @@ #:format-pgsql-drop-fkey #:drop-pgsql-fkeys #:create-pgsql-fkeys + #:pgsql-index + #:pgsql-index-filter #:make-pgsql-index #:index-table-name + #:translate-index-filter + #:process-index-definitions #:format-pgsql-create-index #:create-indexes-in-kernel #:set-table-oids @@ -680,6 +684,13 @@ #:copy-database #:list-tables)) +(defpackage #:pgloader.mssql.index-filter + (:use #:cl #:esrap #:pgloader.utils #:pgloader.mssql) + (:import-from #:pgloader.pgsql + #:pgsql-index + #:pgsql-index-filter + #:translate-index-filter)) + (defpackage #:pgloader.syslog (:use #:cl #:pgloader.params #:pgloader.utils) (:import-from #:pgloader.pgsql diff --git a/src/pgsql/schema.lisp b/src/pgsql/schema.lisp index b95748e..3dcd095 100644 --- a/src/pgsql/schema.lisp +++ b/src/pgsql/schema.lisp @@ -257,7 +257,7 @@ ;; the struct is used both for supporting new index creation from non ;; PostgreSQL system and for drop/create indexes when using the 'drop ;; indexes' option (in CSV mode and the like) - name schema table-oid primary unique columns sql conname condef) + name schema table-oid primary unique columns sql conname condef filter) (defgeneric format-pgsql-create-index (table index) (:documentation @@ -267,6 +267,16 @@ (:documentation "Return the PostgreSQL command to drop an Index.")) +(defgeneric translate-index-filter (table index sql-dialect) + (:documentation + "Translate the filter clause of INDEX in PostgreSQL slang.")) + +(defmethod translate-index-filter ((table table) + (index pgsql-index) + (sql-dialect t)) + "Implement a default facility that does nothing." + nil) + (defmethod format-pgsql-create-index ((table table) (index pgsql-index)) "Generate the PostgreSQL statement list to rebuild a Foreign Key" (let* ((index-name (if (and *preserve-index-names* @@ -286,11 +296,13 @@ ;; ensure good concurrency here, don't take the ACCESS EXCLUSIVE ;; LOCK on the table before we have the index done already (or (pgsql-index-sql index) - (format nil "CREATE UNIQUE INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~});" + (format nil + "CREATE UNIQUE INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~})~@[ WHERE ~a~];" (pgsql-index-schema index) index-name (format-table-name table) - (pgsql-index-columns index))) + (pgsql-index-columns index) + (pgsql-index-filter index))) (format nil ;; don't use the index schema name here, PostgreSQL doesn't ;; like it, might be implicit from the table's schema @@ -308,12 +320,14 @@ (t (or (pgsql-index-sql index) - (format nil "CREATE~:[~; UNIQUE~] INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~});" + (format nil + "CREATE~:[~; UNIQUE~] INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~})~@[ WHERE ~a~];" (pgsql-index-unique index) (pgsql-index-schema index) index-name (format-table-name table) - (pgsql-index-columns index))))))) + (pgsql-index-columns index) + (pgsql-index-filter index))))))) (defmethod format-pgsql-drop-index ((table table) (index pgsql-index)) "Generate the PostgreSQL statement to DROP the index." @@ -330,6 +344,30 @@ (t (format nil "DROP INDEX ~@[~a.~]~a;" schema-name index-name))))) + +;;; +;;; API to rewrite index WHERE clauses (filter) +;;; +(defgeneric process-index-definitions (object &key sql-dialect) + (:documentation "Rewrite all indexes filters in given catalog OBJECT.")) + +(defmethod process-index-definitions ((catalog catalog) &key sql-dialect) + "Rewrite all index filters in CATALOG." + (loop :for schema :in (catalog-schema-list catalog) + :do (process-index-definitions schema :sql-dialect sql-dialect))) + +(defmethod process-index-definitions ((schema schema) &key sql-dialect) + "Rewrite all index filters in CATALOG." + (loop :for table :in (schema-table-list schema) + :do (process-index-definitions table :sql-dialect sql-dialect))) + +(defmethod process-index-definitions ((table table) &key sql-dialect) + "Rewrite all index filter in TABLE." + (loop :for index :in (table-index-list table) + :do (let ((pg-filter (translate-index-filter table index sql-dialect))) + (log-message :info "tranlate-index-filter: ~s" pg-filter) + (setf (pgsql-index-filter index) pg-filter)))) + ;;; ;;; Parallel index building. ;;; diff --git a/src/sources/common/db-methods.lisp b/src/sources/common/db-methods.lisp index 105d177..4ad39e2 100644 --- a/src/sources/common/db-methods.lisp +++ b/src/sources/common/db-methods.lisp @@ -169,6 +169,9 @@ ;; cast the catalog into something PostgreSQL can work on (cast catalog) + ;; support code for index filters (where clauses) + (process-index-definitions catalog :sql-dialect (class-name (class-of copy))) + ;; if asked, now alter the catalog with given rules: the alter-table ;; keyword parameter actually contains a set of alter table rules. (when alter-table @@ -272,7 +275,7 @@ :use-result-as-read t :use-result-as-rows t) (loop :for count :below (count-indexes catalog) - :do (lp:receive-result idx-channel)) + :do (lp:receive-result idx-channel)) (lp:end-kernel :wait t) (count-indexes catalog)))) diff --git a/src/sources/mssql/mssql-index-filters.lisp b/src/sources/mssql/mssql-index-filters.lisp new file mode 100644 index 0000000..558fc00 --- /dev/null +++ b/src/sources/mssql/mssql-index-filters.lisp @@ -0,0 +1,162 @@ +;;; +;;; Tools to translate MS SQL index definitions into PostgreSQL +;;; CREATE INDEX ... WHERE ... +;;; clauses. +;;; + +(in-package #:pgloader.mssql.index-filter) + +(defmethod translate-index-filter ((table table) + (index pgsql-index) + (sql-dialect (eql 'copy-mssql))) + "Transform given MS SQL index filter to PostgreSQL slang." + (labels ((process-expr (expression) + (destructuring-bind (operator identifier &optional argument) + expression + (let* ((pg-id (apply-identifier-case identifier)) + (col (find pg-id + (table-column-list table) + :key #'column-name + :test #'string=))) + (assert (not (null col))) + (list operator pg-id (when argument + (funcall (column-transform col) + argument))))))) + (when (pgsql-index-filter index) + (let* ((raw-expr (parse-index-filter-clause (pgsql-index-filter index))) + (pg-expr + (loop :for node :in raw-expr + :collect (typecase node + (string node) ; that's "and" / "or" + (list (process-expr node)))))) + ;; now reformat the expression into SQL + (with-output-to-string (s) + (loop :for node :in pg-expr + :do (typecase node + (string (format s " ~a " node)) + (list + (destructuring-bind (op id &optional arg) node + (format s "~a ~a~@[ '~a'~]" id op arg)))))))))) + +(defun parse-index-filter-clause (filter) + "Parse the filter clause for a MS SQL index, see + https://technet.microsoft.com/en-us/library/cc280372.aspx" + (parse 'mssql-index-where-clause filter)) + + +;;; +;;; Esrap parser for the index WHERE clause +;;; +;;; Examples: +;;; "([deleted]=(0))" +;;; EndDate IS NOT NULL +;;; EndDate IN ('20000825', '20000908', '20000918') +;;; EndDate IS NOT NULL AND ComponentID = 5 AND StartDate > '01/01/2008' +;;; +(defrule whitespace (+ (or #\Space #\Tab #\Newline)) (:constant #\Space)) + +(defrule punct (or #\, #\- #\_) (:text t)) + +(defrule namestring (and (or #\_ (alpha-char-p character)) + (* (or (alpha-char-p character) + (digit-char-p character) + punct))) + (:text t)) + +(defrule mssql-bracketed-identifier (and "[" namestring "]") + (:lambda (id) (text (second id)))) + +(defrule mssql-identifier (or mssql-bracketed-identifier namestring)) + +(defrule = "=" (:constant :=)) +(defrule <= "<=" (:constant :<=)) +(defrule >= ">=" (:constant :>=)) +(defrule < "<" (:constant :<)) +(defrule > ">" (:constant :>)) +(defrule <> "<>" (:constant :<>)) +(defrule != "!=" (:constant :<>)) + +(defrule mssql-operator (and (? whitespace) (or = <= < >= > <> !=)) + (:lambda (op) (second op))) + +(defrule number (+ (digit-char-p character)) (:text t)) +(defrule quoted (and "'" (+ (not "'")) "'") (:lambda (q) (text (second q)))) + +(defrule mssql-constant-parens (and (? whitespace) + "(" + (or number quoted) + ")") + (:function third)) + +(defrule mssql-constant-no-parens (and (? whitespace) (or number quoted)) + (:function second)) + +(defrule mssql-constant (or mssql-constant-parens mssql-constant-no-parens)) + +(defrule mssql-is-not-null (and (? whitespace) + (~ "is") whitespace + (~ "not") whitespace + (~ "null")) + (:constant :is-not-null)) + +(defrule mssql-is-null (and (? whitespace) + (~ "is") whitespace + (~ "null")) + (:constant :is-null)) + +(defrule mssql-where-is-null (and mssql-identifier (or mssql-is-null + mssql-is-not-null)) + (:lambda (is-null) (list (second is-null) (first is-null)))) + +(defrule mssql-where-id-op-const + (and mssql-identifier mssql-operator mssql-constant) + (:lambda (op) (list (second op) (first op) (third op)))) + +(defrule mssql-where-const-op-id + (and mssql-constant mssql-operator mssql-identifier) + ;; always put identifier first + (:lambda (op) (list (second op) (third op) (first op)))) + +(defrule mssql-where-op (or mssql-where-id-op-const + mssql-where-const-op-id)) + +(defrule another-constant (and "," (? whitespace) mssql-constant) + (:function third)) + +(defrule mssql-in-list (and "(" mssql-constant (* another-constant) ")") + (:lambda (in) (list* (second in) (third in)))) + +(defrule mssql-where-in (and (? whitespace) (~ "in") mssql-in-list) + (:function third)) + +(defrule mssql-where-between (and (? whitespace) + mssql-identifier + (~ "between") + mssql-constant + (~ "and") + mssql-constant) + (:function rest)) + +(defrule mssql-index-filter (and (? "(") + (? whitespace) + (or mssql-where-op + mssql-where-is-null + mssql-where-in + mssql-where-between) + (? whitespace) + (? ")")) + (:function third)) + +(defrule another-index-filter (and (? whitespace) + (or (~ "and") (~ "or")) + (? whitespace) + mssql-index-filter) + (:lambda (another) (list (second another) (fourth another)))) + +(defrule mssql-index-where-clause (and mssql-index-filter + (* another-index-filter)) + (:lambda (where) + (list* (first where) + (loop :for (logical-op filter) :in (second where) + :collect logical-op + :collect filter)))) diff --git a/src/sources/mssql/mssql-schema.lisp b/src/sources/mssql/mssql-schema.lisp index da49202..02ec254 100644 --- a/src/sources/mssql/mssql-schema.lisp +++ b/src/sources/mssql/mssql-schema.lisp @@ -156,14 +156,15 @@ order by c.table_schema, c.table_name, c.ordinal_position" (defun list-all-indexes (catalog &key including excluding) "Get the list of MSSQL index definitions per table." (loop - :for (schema-name table-name index-name col unique pkey) + :for (schema-name table-name index-name col unique pkey filter) :in (mssql-query (format nil " select schema_name(schema_id) as SchemaName, o.name as TableName, REPLACE(i.name, '.', '_') as IndexName, co.[name] as ColumnName, i.is_unique, - i.is_primary_key + i.is_primary_key, + i.filter_definition from sys.indexes i join sys.objects o on i.object_id = o.object_id @@ -199,7 +200,8 @@ order by SchemaName, (pg-index (make-pgsql-index :name index-name :primary (= pkey 1) :unique (= unique 1) - :columns nil)) + :columns nil + :filter filter)) (index (maybe-add-index table index-name pg-index :key #'pgloader.pgsql::pgsql-index-name))) (push-to-end col (pgloader.pgsql::pgsql-index-columns index)))