Add a COPY command parser, using esrap.

This commit is contained in:
Dimitri Fontaine 2013-05-09 15:44:17 +02:00
parent 06bbc8cdda
commit 22246ccd2d
4 changed files with 193 additions and 1 deletions

View File

@ -53,6 +53,32 @@ Some notes about what I intend to be working on next.
- commands: `LOAD` and `INI` formats
- compat with `SQL*Loader` format
Here's an example of the grammar to consider:
COPY cluttured
FROM 'cluttered/cluttered.data'
(a, c newline escaped by \, b)
AS text
WITH field_sep = ^, field_count = 3;
LOAD foo
FROM 'path/to/file'
AS text
CASE WHEN 1:2 = "43"
THEN table(a, c)
SPEC (a sep ';',
b sep '=', -- field is not loaded
c sep ';')
WHEN 001:003 = "HDR"
THEN table(a, c)
SPEC (a, b, c)
WITH field_sep = ','
END
SET maintenance_work_mem TO '128 MB';
Pick one, or maybe have the two of them?
### error management
- error management with a local buffer (done)
@ -94,6 +120,20 @@ offer some other languages (cl-awk etc).
- user-defined columns (constants, functions of other rows)
- column re-ordering
Have a try at something approaching:
WITH data AS (
COPY FROM ...
RETURNING x, y
)
SELECT foo(x), bar(y)
FROM data
WHERE ...
A part of that needs to happen client-side, another part server-side, and
the grammar has to make it clear what happens where. Maybe add a WHERE
clause to the `COPY` or `LOAD` grammar for the client.
#### UI
- add a web controler with pretty monitoring

View File

@ -31,6 +31,11 @@
#:report-pgtable-stats
#:report-pgstate-stats))
(defpackage #:pgloader.parser
(:use #:cl #:esrap #:pgloader.params)
(:export #:parse-load
#:parse-copy))
(defpackage #:pgloader.queue
(:use #:cl)
(:export #:map-pop-queue

144
parser.lisp Normal file
View File

@ -0,0 +1,144 @@
;;;
;;; Parse the pgloader commands grammar
;;;
(in-package :pgloader.parser)
(defparameter *default-postgresql-port* 5432)
;;
;; Some useful rules
;;
(defrule whitespace (+ (or #\space #\tab #\newline))
(:constant 'whitespace))
(defrule ignore-whitespace (* whitespace)
(:constant nil))
(defrule punct (or #\, #\- #\_)
(:text t))
(defrule namestring (and (alpha-char-p character)
(* (or (alpha-char-p character)
(digit-char-p character)
punct)))
(:text t)))
(defrule quoted-namestring (and #\' namestring #\')
(:destructure (open name close) (declare (ignore open close)) name))
(defrule name (or namestring quoted-namestring)
(:text t))
(defrule trimmed-name (and ignore-whitespace name)
(:destructure (whitespace name) (declare (ignore whitespace)) name))))
;;
;; Parse PostgreSQL database connection strings
;;
;; at postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...]
;;
;; http://www.postgresql.org/docs/9.2/static/libpq-connect.html#LIBPQ-CONNSTRING
;;
(defrule dsn-port (and ":" (* (digit-char-p character)))
(:destructure (colon digits &aux (port (coerce digits 'string)))
(declare (ignore colon))
(list :port (if (null digits)
*default-postgresql-port*
(parse-integer port)))))
(defrule dsn-user-password (and namestring
(? (and ":" (? namestring)))
"@")
(:lambda (args)
(destructuring-bind (username &optional password)
(butlast args)
;; password looks like '(":" "password")
(list :user username :password (cadr password)))))
(defrule hostname (and namestring (? (and "." hostname)))
(:text t))
(defrule dsn-hostname (and hostname (? dsn-port))
(:destructure (hostname &optional port)
(append (list :host hostname)
(or port
(list :port *default-postgresql-port*)))))
(defrule dsn-dbname (and "/" namestring)
(:destructure (slash dbname)
(declare (ignore slash))
(list :dbname dbname)))
(defrule postgresql-connection-uri (and "postgresql://"
(? dsn-user-password)
(? dsn-hostname)
dsn-dbname)
(:lambda (uri)
(destructuring-bind (&key user
password
(host "localhost")
(port 5432)
dbname)
;; ignore the postgresql:// prefix, (first uri)
(append (second uri) (third uri) (fourth uri))
(list :user user
:password password
:host host
:port port
:dbname dbname))))
(defrule target-dsn (and "at" ignore-whitespace connection-uri)
(:destructure (at whitespace uri) (declare (ignore at whitespace)) uri))
;;
;; The main target parsing
;;
;; COPY target-table-name AT connection-uri
;; COPY foo AT postgresql://user@localhost:5432/dbname
;;
(defrule target (and "COPY" trimmed-name (? (and ignore-whitespace target-dsn)))
(:destructure (copy target &optional dsn)
(declare (ignore copy))
(append (list :table-name target) (cadr dsn))))
;;
;; Source parsing (filename)
;;
;; parsing filename
(defun filename-character-p (char)
(let ((extras (coerce "/\.-_!@#$%^&*() " 'list)))
(or (member char extras)
(alphanumericp char))))
(defrule filename (and #\'
(* (filename-character-p character))
#\')
(:destructure (open f close)
(declare (ignore open close))
(parse-namestring (coerce f 'string))))
(defrule trimmed-filename (and ignore-whitespace filename)
(:destructure (whitespace filename) (declare (ignore whitespace)) filename))
(defrule source (and "FROM" trimmed-filename)
(:destructure (from source)
(declare (ignore from))
source))
;;
;; Putting it all together, the COPY command
;;
;; The output format is Lisp code using the pgloader API.
;;
(defrule copy (and target ignore-whitespace source)
(:destructure (target whitespace source)
(declare (ignore whitespace))
(destructuring-bind (&key table-name user password host port dbname)
target
`(lambda (&key (*pgconn-host* ,host)
(*pgconn-port* ,port)
(*pgconn-user* ,user)
(*pgconn-pass* ,password))
(pgloader.pgsql:copy-from-file ,dbname ,table-name ,source)))))

View File

@ -13,13 +13,16 @@
#:cl-mysql ; CFFI binding to libmysqlclient-dev
#:split-sequence ; some parsing is made easy
#:cl-csv ; full CSV reader
#:lparallel) ; threads, workers, queues
#:lparallel ; threads, workers, queues
#:esrap ; parser generator
)
:components ((:file "params")
(:file "package" :depends-on ("params"))
(:file "utils" :depends-on ("package"))
(:file "pgloader" :depends-on ("package" "utils"))
;; those are one-package-per-file
(:file "parser" :depends-on ("package" "params"))
(:file "queue" :depends-on ("package")) ; package pgloader.queue
(:file "csv" :depends-on ("package")) ; package pgloader.csv