Implement support for partial indexes.

MS SQL has a notion of a "filtered index" that matches the notion of a
PostgreSQL partial index: the index only applies to the rows matching
the index WHERE clause, or filter.

The WHERE clause in both case are limited to simple expressions over a
base table's row at a time, so we implement a limited WHERE clause
parser for MS SQL filters and a transformation routine to rewrite the
clause in PostgreSQL slang.

In passing, we transform the filter constants using the same
transformation functions as in the CAST rules, so that e.g. a MS SQL
bit(1) value that got transformed into a PostgreSQL boolean is properly
translated, as in the following example:

  MS SQL:     "([deleted]=(0))"  (that's from the catalogs)
  PostgreSQL: deleted = 'f'

Of course the parser is still very badly tested, let's see what happens
in the wild now.

(Should) Fix #365.
This commit is contained in:
Dimitri Fontaine 2016-03-21 23:39:45 +01:00
parent 8fc9a474d9
commit 5e18cfd7d4
6 changed files with 228 additions and 10 deletions

View File

@ -190,7 +190,9 @@
:depends-on ("mssql-cast-rules"))
(:file "mssql"
:depends-on ("mssql-cast-rules"
"mssql-schema"))))
"mssql-schema"))
(:file "mssql-index-filters"
:depends-on ("mssql"))))
(:module "mysql"
:depends-on ("common")

View File

@ -367,8 +367,12 @@
#:format-pgsql-drop-fkey
#:drop-pgsql-fkeys
#:create-pgsql-fkeys
#:pgsql-index
#:pgsql-index-filter
#:make-pgsql-index
#:index-table-name
#:translate-index-filter
#:process-index-definitions
#:format-pgsql-create-index
#:create-indexes-in-kernel
#:set-table-oids
@ -680,6 +684,13 @@
#:copy-database
#:list-tables))
(defpackage #:pgloader.mssql.index-filter
(:use #:cl #:esrap #:pgloader.utils #:pgloader.mssql)
(:import-from #:pgloader.pgsql
#:pgsql-index
#:pgsql-index-filter
#:translate-index-filter))
(defpackage #:pgloader.syslog
(:use #:cl #:pgloader.params #:pgloader.utils)
(:import-from #:pgloader.pgsql

View File

@ -257,7 +257,7 @@
;; the struct is used both for supporting new index creation from non
;; PostgreSQL system and for drop/create indexes when using the 'drop
;; indexes' option (in CSV mode and the like)
name schema table-oid primary unique columns sql conname condef)
name schema table-oid primary unique columns sql conname condef filter)
(defgeneric format-pgsql-create-index (table index)
(:documentation
@ -267,6 +267,16 @@
(:documentation
"Return the PostgreSQL command to drop an Index."))
(defgeneric translate-index-filter (table index sql-dialect)
(:documentation
"Translate the filter clause of INDEX in PostgreSQL slang."))
(defmethod translate-index-filter ((table table)
(index pgsql-index)
(sql-dialect t))
"Implement a default facility that does nothing."
nil)
(defmethod format-pgsql-create-index ((table table) (index pgsql-index))
"Generate the PostgreSQL statement list to rebuild a Foreign Key"
(let* ((index-name (if (and *preserve-index-names*
@ -286,11 +296,13 @@
;; ensure good concurrency here, don't take the ACCESS EXCLUSIVE
;; LOCK on the table before we have the index done already
(or (pgsql-index-sql index)
(format nil "CREATE UNIQUE INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~});"
(format nil
"CREATE UNIQUE INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~})~@[ WHERE ~a~];"
(pgsql-index-schema index)
index-name
(format-table-name table)
(pgsql-index-columns index)))
(pgsql-index-columns index)
(pgsql-index-filter index)))
(format nil
;; don't use the index schema name here, PostgreSQL doesn't
;; like it, might be implicit from the table's schema
@ -308,12 +320,14 @@
(t
(or (pgsql-index-sql index)
(format nil "CREATE~:[~; UNIQUE~] INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~});"
(format nil
"CREATE~:[~; UNIQUE~] INDEX ~@[~a.~]~a ON ~a (~{~a~^, ~})~@[ WHERE ~a~];"
(pgsql-index-unique index)
(pgsql-index-schema index)
index-name
(format-table-name table)
(pgsql-index-columns index)))))))
(pgsql-index-columns index)
(pgsql-index-filter index)))))))
(defmethod format-pgsql-drop-index ((table table) (index pgsql-index))
"Generate the PostgreSQL statement to DROP the index."
@ -330,6 +344,30 @@
(t
(format nil "DROP INDEX ~@[~a.~]~a;" schema-name index-name)))))
;;;
;;; API to rewrite index WHERE clauses (filter)
;;;
(defgeneric process-index-definitions (object &key sql-dialect)
(:documentation "Rewrite all indexes filters in given catalog OBJECT."))
(defmethod process-index-definitions ((catalog catalog) &key sql-dialect)
"Rewrite all index filters in CATALOG."
(loop :for schema :in (catalog-schema-list catalog)
:do (process-index-definitions schema :sql-dialect sql-dialect)))
(defmethod process-index-definitions ((schema schema) &key sql-dialect)
"Rewrite all index filters in CATALOG."
(loop :for table :in (schema-table-list schema)
:do (process-index-definitions table :sql-dialect sql-dialect)))
(defmethod process-index-definitions ((table table) &key sql-dialect)
"Rewrite all index filter in TABLE."
(loop :for index :in (table-index-list table)
:do (let ((pg-filter (translate-index-filter table index sql-dialect)))
(log-message :info "tranlate-index-filter: ~s" pg-filter)
(setf (pgsql-index-filter index) pg-filter))))
;;;
;;; Parallel index building.
;;;

View File

@ -169,6 +169,9 @@
;; cast the catalog into something PostgreSQL can work on
(cast catalog)
;; support code for index filters (where clauses)
(process-index-definitions catalog :sql-dialect (class-name (class-of copy)))
;; if asked, now alter the catalog with given rules: the alter-table
;; keyword parameter actually contains a set of alter table rules.
(when alter-table
@ -272,7 +275,7 @@
:use-result-as-read t
:use-result-as-rows t)
(loop :for count :below (count-indexes catalog)
:do (lp:receive-result idx-channel))
:do (lp:receive-result idx-channel))
(lp:end-kernel :wait t)
(count-indexes catalog))))

View File

@ -0,0 +1,162 @@
;;;
;;; Tools to translate MS SQL index definitions into PostgreSQL
;;; CREATE INDEX ... WHERE ...
;;; clauses.
;;;
(in-package #:pgloader.mssql.index-filter)
(defmethod translate-index-filter ((table table)
(index pgsql-index)
(sql-dialect (eql 'copy-mssql)))
"Transform given MS SQL index filter to PostgreSQL slang."
(labels ((process-expr (expression)
(destructuring-bind (operator identifier &optional argument)
expression
(let* ((pg-id (apply-identifier-case identifier))
(col (find pg-id
(table-column-list table)
:key #'column-name
:test #'string=)))
(assert (not (null col)))
(list operator pg-id (when argument
(funcall (column-transform col)
argument)))))))
(when (pgsql-index-filter index)
(let* ((raw-expr (parse-index-filter-clause (pgsql-index-filter index)))
(pg-expr
(loop :for node :in raw-expr
:collect (typecase node
(string node) ; that's "and" / "or"
(list (process-expr node))))))
;; now reformat the expression into SQL
(with-output-to-string (s)
(loop :for node :in pg-expr
:do (typecase node
(string (format s " ~a " node))
(list
(destructuring-bind (op id &optional arg) node
(format s "~a ~a~@[ '~a'~]" id op arg))))))))))
(defun parse-index-filter-clause (filter)
"Parse the filter clause for a MS SQL index, see
https://technet.microsoft.com/en-us/library/cc280372.aspx"
(parse 'mssql-index-where-clause filter))
;;;
;;; Esrap parser for the index WHERE clause
;;;
;;; Examples:
;;; "([deleted]=(0))"
;;; EndDate IS NOT NULL
;;; EndDate IN ('20000825', '20000908', '20000918')
;;; EndDate IS NOT NULL AND ComponentID = 5 AND StartDate > '01/01/2008'
;;;
(defrule whitespace (+ (or #\Space #\Tab #\Newline)) (:constant #\Space))
(defrule punct (or #\, #\- #\_) (:text t))
(defrule namestring (and (or #\_ (alpha-char-p character))
(* (or (alpha-char-p character)
(digit-char-p character)
punct)))
(:text t))
(defrule mssql-bracketed-identifier (and "[" namestring "]")
(:lambda (id) (text (second id))))
(defrule mssql-identifier (or mssql-bracketed-identifier namestring))
(defrule = "=" (:constant :=))
(defrule <= "<=" (:constant :<=))
(defrule >= ">=" (:constant :>=))
(defrule < "<" (:constant :<))
(defrule > ">" (:constant :>))
(defrule <> "<>" (:constant :<>))
(defrule != "!=" (:constant :<>))
(defrule mssql-operator (and (? whitespace) (or = <= < >= > <> !=))
(:lambda (op) (second op)))
(defrule number (+ (digit-char-p character)) (:text t))
(defrule quoted (and "'" (+ (not "'")) "'") (:lambda (q) (text (second q))))
(defrule mssql-constant-parens (and (? whitespace)
"("
(or number quoted)
")")
(:function third))
(defrule mssql-constant-no-parens (and (? whitespace) (or number quoted))
(:function second))
(defrule mssql-constant (or mssql-constant-parens mssql-constant-no-parens))
(defrule mssql-is-not-null (and (? whitespace)
(~ "is") whitespace
(~ "not") whitespace
(~ "null"))
(:constant :is-not-null))
(defrule mssql-is-null (and (? whitespace)
(~ "is") whitespace
(~ "null"))
(:constant :is-null))
(defrule mssql-where-is-null (and mssql-identifier (or mssql-is-null
mssql-is-not-null))
(:lambda (is-null) (list (second is-null) (first is-null))))
(defrule mssql-where-id-op-const
(and mssql-identifier mssql-operator mssql-constant)
(:lambda (op) (list (second op) (first op) (third op))))
(defrule mssql-where-const-op-id
(and mssql-constant mssql-operator mssql-identifier)
;; always put identifier first
(:lambda (op) (list (second op) (third op) (first op))))
(defrule mssql-where-op (or mssql-where-id-op-const
mssql-where-const-op-id))
(defrule another-constant (and "," (? whitespace) mssql-constant)
(:function third))
(defrule mssql-in-list (and "(" mssql-constant (* another-constant) ")")
(:lambda (in) (list* (second in) (third in))))
(defrule mssql-where-in (and (? whitespace) (~ "in") mssql-in-list)
(:function third))
(defrule mssql-where-between (and (? whitespace)
mssql-identifier
(~ "between")
mssql-constant
(~ "and")
mssql-constant)
(:function rest))
(defrule mssql-index-filter (and (? "(")
(? whitespace)
(or mssql-where-op
mssql-where-is-null
mssql-where-in
mssql-where-between)
(? whitespace)
(? ")"))
(:function third))
(defrule another-index-filter (and (? whitespace)
(or (~ "and") (~ "or"))
(? whitespace)
mssql-index-filter)
(:lambda (another) (list (second another) (fourth another))))
(defrule mssql-index-where-clause (and mssql-index-filter
(* another-index-filter))
(:lambda (where)
(list* (first where)
(loop :for (logical-op filter) :in (second where)
:collect logical-op
:collect filter))))

View File

@ -156,14 +156,15 @@ order by c.table_schema, c.table_name, c.ordinal_position"
(defun list-all-indexes (catalog &key including excluding)
"Get the list of MSSQL index definitions per table."
(loop
:for (schema-name table-name index-name col unique pkey)
:for (schema-name table-name index-name col unique pkey filter)
:in (mssql-query (format nil "
select schema_name(schema_id) as SchemaName,
o.name as TableName,
REPLACE(i.name, '.', '_') as IndexName,
co.[name] as ColumnName,
i.is_unique,
i.is_primary_key
i.is_primary_key,
i.filter_definition
from sys.indexes i
join sys.objects o on i.object_id = o.object_id
@ -199,7 +200,8 @@ order by SchemaName,
(pg-index (make-pgsql-index :name index-name
:primary (= pkey 1)
:unique (= unique 1)
:columns nil))
:columns nil
:filter filter))
(index (maybe-add-index table index-name pg-index
:key #'pgloader.pgsql::pgsql-index-name)))
(push-to-end col (pgloader.pgsql::pgsql-index-columns index)))