pgloader/src/parsers/command-keywords.lisp
Dimitri Fontaine 6d66280fa5 Review parallelism and memory behavior.
The previous patch made format-vector-row allocate its memory in one go
rather than byte after byte with vector-push-extend. In this patch we review
our usage of batches and parallelism.

Now the reader pushes each row directly to the lparallel queue and writers
concurrently consume from it, cook batches in COPY format, and then send
that chunk of data down to PostgreSQL. When looking at runtime profiles, the
time spent writing in PostgreSQL is a fraction of the time spent reading
from MySQL, so we consider that the writing thread has enough time to do the
data mungling without slowing us down.

The most interesting factor here is the memory behavor of pgloader, which
seems more stable than before, and easier to cope with for SBCL's GC.

Note that batch concurrency is no more, replaced by prefetch rows: the
reader thread no longer build batches and the count of items in the reader
queue is now a number a rows, not of batches of them.

Anyway, with this patch in I can't reproduce the following issues:

Fixes #337, Fixes #420.
2017-06-27 23:10:33 +02:00

152 lines
4.4 KiB
Common Lisp

;;;
;;; Parse the pgloader commands grammar
;;;
(in-package :pgloader.parser)
;;;
;;; Keywords
;;;
(defmacro def-keyword-rule (keyword)
(let ((rule-name (read-from-string (format nil "kw-~a" keyword)))
(constant (read-from-string (format nil ":~a" keyword))))
`(defrule ,rule-name (and ignore-whitespace (~ ,keyword) ignore-whitespace)
(:constant ',constant))))
(eval-when (:load-toplevel :compile-toplevel :execute)
(def-keyword-rule "load")
(def-keyword-rule "data")
(def-keyword-rule "from")
(def-keyword-rule "csv")
(def-keyword-rule "dbf")
(def-keyword-rule "ixf")
(def-keyword-rule "fixed")
(def-keyword-rule "copy")
(def-keyword-rule "into")
(def-keyword-rule "with")
(def-keyword-rule "when")
(def-keyword-rule "set")
(def-keyword-rule "database")
(def-keyword-rule "messages")
(def-keyword-rule "matches")
(def-keyword-rule "in")
(def-keyword-rule "directory")
(def-keyword-rule "registering")
(def-keyword-rule "cast")
(def-keyword-rule "column")
(def-keyword-rule "target")
(def-keyword-rule "columns")
(def-keyword-rule "type")
(def-keyword-rule "extra")
(def-keyword-rule "include")
(def-keyword-rule "drop")
(def-keyword-rule "not")
(def-keyword-rule "to")
(def-keyword-rule "no")
(def-keyword-rule "null")
(def-keyword-rule "default")
(def-keyword-rule "typemod")
(def-keyword-rule "using")
(def-keyword-rule "getenv")
(def-keyword-rule "on")
(def-keyword-rule "error")
(def-keyword-rule "stop")
(def-keyword-rule "parameters")
;; option for loading from a file
(def-keyword-rule "workers")
(def-keyword-rule "batch")
(def-keyword-rule "rows")
(def-keyword-rule "prefetch")
(def-keyword-rule "size")
(def-keyword-rule "concurrency")
(def-keyword-rule "max")
(def-keyword-rule "parallel")
(def-keyword-rule "reject")
(def-keyword-rule "file")
(def-keyword-rule "log")
(def-keyword-rule "level")
(def-keyword-rule "encoding")
(def-keyword-rule "timezone")
(def-keyword-rule "decoding")
(def-keyword-rule "truncate")
(def-keyword-rule "disable")
(def-keyword-rule "triggers")
(def-keyword-rule "lines")
(def-keyword-rule "having")
(def-keyword-rule "fields")
(def-keyword-rule "optionally")
(def-keyword-rule "enclosed")
(def-keyword-rule "by")
(def-keyword-rule "escaped")
(def-keyword-rule "terminated")
(def-keyword-rule "escape")
(def-keyword-rule "mode")
(def-keyword-rule "nullif")
(def-keyword-rule "blank")
(def-keyword-rule "trim")
(def-keyword-rule "both")
(def-keyword-rule "left")
(def-keyword-rule "right")
(def-keyword-rule "whitespace")
(def-keyword-rule "from")
(def-keyword-rule "for")
(def-keyword-rule "skip")
(def-keyword-rule "header")
(def-keyword-rule "null")
(def-keyword-rule "if")
(def-keyword-rule "as")
(def-keyword-rule "blanks")
(def-keyword-rule "date")
(def-keyword-rule "format")
(def-keyword-rule "keep")
(def-keyword-rule "trim")
(def-keyword-rule "unquoted")
(def-keyword-rule "delimiter")
;; option for MySQL imports
(def-keyword-rule "schema")
(def-keyword-rule "schemas")
(def-keyword-rule "only")
(def-keyword-rule "drop")
(def-keyword-rule "alter")
(def-keyword-rule "create")
(def-keyword-rule "rename")
(def-keyword-rule "materialize")
(def-keyword-rule "reset")
(def-keyword-rule "table")
(def-keyword-rule "name")
(def-keyword-rule "names")
(def-keyword-rule "tables")
(def-keyword-rule "views")
(def-keyword-rule "index")
(def-keyword-rule "indexes")
(def-keyword-rule "preserve")
(def-keyword-rule "uniquify")
(def-keyword-rule "sequences")
(def-keyword-rule "foreign")
(def-keyword-rule "keys")
(def-keyword-rule "downcase")
(def-keyword-rule "quote")
(def-keyword-rule "identifiers")
(def-keyword-rule "including")
(def-keyword-rule "excluding")
(def-keyword-rule "like")
;; option for loading from an archive
(def-keyword-rule "archive")
(def-keyword-rule "before")
(def-keyword-rule "after")
(def-keyword-rule "finally")
(def-keyword-rule "and")
(def-keyword-rule "do")
(def-keyword-rule "execute")
(def-keyword-rule "filename")
(def-keyword-rule "filenames")
(def-keyword-rule "matching")
(def-keyword-rule "first")
(def-keyword-rule "all"))
(defrule kw-auto-increment (and "auto_increment" (* (or #\Tab #\Space)))
(:constant :auto-increment))
(defrule kw-postgresql (or (~ "pgsql") (~ "postgresql")))
(defrule kw-mysql (~ "mysql"))