diff --git a/README.md b/README.md index 29835c9..0ad4a94 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,32 @@ Some notes about what I intend to be working on next. - commands: `LOAD` and `INI` formats - compat with `SQL*Loader` format +Here's an example of the grammar to consider: + + COPY cluttured + FROM 'cluttered/cluttered.data' + (a, c newline escaped by \, b) + AS text + WITH field_sep = ^, field_count = 3; + + LOAD foo + FROM 'path/to/file' + AS text + CASE WHEN 1:2 = "43" + THEN table(a, c) + SPEC (a sep ';', + b sep '=', -- field is not loaded + c sep ';') + + WHEN 001:003 = "HDR" + THEN table(a, c) + SPEC (a, b, c) + WITH field_sep = ',' + END + SET maintenance_work_mem TO '128 MB'; + +Pick one, or maybe have the two of them? + ### error management - error management with a local buffer (done) @@ -94,6 +120,20 @@ offer some other languages (cl-awk etc). - user-defined columns (constants, functions of other rows) - column re-ordering +Have a try at something approaching: + + WITH data AS ( + COPY FROM ... + RETURNING x, y + ) + SELECT foo(x), bar(y) + FROM data + WHERE ... + +A part of that needs to happen client-side, another part server-side, and +the grammar has to make it clear what happens where. Maybe add a WHERE +clause to the `COPY` or `LOAD` grammar for the client. + #### UI - add a web controler with pretty monitoring diff --git a/package.lisp b/package.lisp index c0f0322..bb04017 100644 --- a/package.lisp +++ b/package.lisp @@ -31,6 +31,11 @@ #:report-pgtable-stats #:report-pgstate-stats)) +(defpackage #:pgloader.parser + (:use #:cl #:esrap #:pgloader.params) + (:export #:parse-load + #:parse-copy)) + (defpackage #:pgloader.queue (:use #:cl) (:export #:map-pop-queue diff --git a/parser.lisp b/parser.lisp new file mode 100644 index 0000000..460620d --- /dev/null +++ b/parser.lisp @@ -0,0 +1,144 @@ +;;; +;;; Parse the pgloader commands grammar +;;; + +(in-package :pgloader.parser) + +(defparameter *default-postgresql-port* 5432) + +;; +;; Some useful rules +;; +(defrule whitespace (+ (or #\space #\tab #\newline)) + (:constant 'whitespace)) + +(defrule ignore-whitespace (* whitespace) + (:constant nil)) + +(defrule punct (or #\, #\- #\_) + (:text t)) + +(defrule namestring (and (alpha-char-p character) + (* (or (alpha-char-p character) + (digit-char-p character) + punct))) + (:text t))) + +(defrule quoted-namestring (and #\' namestring #\') + (:destructure (open name close) (declare (ignore open close)) name)) + +(defrule name (or namestring quoted-namestring) + (:text t)) + +(defrule trimmed-name (and ignore-whitespace name) + (:destructure (whitespace name) (declare (ignore whitespace)) name)))) + +;; +;; Parse PostgreSQL database connection strings +;; +;; at postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] +;; +;; http://www.postgresql.org/docs/9.2/static/libpq-connect.html#LIBPQ-CONNSTRING +;; +(defrule dsn-port (and ":" (* (digit-char-p character))) + (:destructure (colon digits &aux (port (coerce digits 'string))) + (declare (ignore colon)) + (list :port (if (null digits) + *default-postgresql-port* + (parse-integer port))))) + +(defrule dsn-user-password (and namestring + (? (and ":" (? namestring))) + "@") + (:lambda (args) + (destructuring-bind (username &optional password) + (butlast args) + ;; password looks like '(":" "password") + (list :user username :password (cadr password))))) + +(defrule hostname (and namestring (? (and "." hostname))) + (:text t)) + +(defrule dsn-hostname (and hostname (? dsn-port)) + (:destructure (hostname &optional port) + (append (list :host hostname) + (or port + (list :port *default-postgresql-port*))))) + +(defrule dsn-dbname (and "/" namestring) + (:destructure (slash dbname) + (declare (ignore slash)) + (list :dbname dbname))) + +(defrule postgresql-connection-uri (and "postgresql://" + (? dsn-user-password) + (? dsn-hostname) + dsn-dbname) + (:lambda (uri) + (destructuring-bind (&key user + password + (host "localhost") + (port 5432) + dbname) + ;; ignore the postgresql:// prefix, (first uri) + (append (second uri) (third uri) (fourth uri)) + (list :user user + :password password + :host host + :port port + :dbname dbname)))) + +(defrule target-dsn (and "at" ignore-whitespace connection-uri) + (:destructure (at whitespace uri) (declare (ignore at whitespace)) uri)) + +;; +;; The main target parsing +;; +;; COPY target-table-name AT connection-uri +;; COPY foo AT postgresql://user@localhost:5432/dbname +;; +(defrule target (and "COPY" trimmed-name (? (and ignore-whitespace target-dsn))) + (:destructure (copy target &optional dsn) + (declare (ignore copy)) + (append (list :table-name target) (cadr dsn)))) + +;; +;; Source parsing (filename) +;; + +;; parsing filename +(defun filename-character-p (char) + (let ((extras (coerce "/\.-_!@#$%^&*() " 'list))) + (or (member char extras) + (alphanumericp char)))) + +(defrule filename (and #\' + (* (filename-character-p character)) + #\') + (:destructure (open f close) + (declare (ignore open close)) + (parse-namestring (coerce f 'string)))) + +(defrule trimmed-filename (and ignore-whitespace filename) + (:destructure (whitespace filename) (declare (ignore whitespace)) filename)) + +(defrule source (and "FROM" trimmed-filename) + (:destructure (from source) + (declare (ignore from)) + source)) + +;; +;; Putting it all together, the COPY command +;; +;; The output format is Lisp code using the pgloader API. +;; +(defrule copy (and target ignore-whitespace source) + (:destructure (target whitespace source) + (declare (ignore whitespace)) + (destructuring-bind (&key table-name user password host port dbname) + target + `(lambda (&key (*pgconn-host* ,host) + (*pgconn-port* ,port) + (*pgconn-user* ,user) + (*pgconn-pass* ,password)) + (pgloader.pgsql:copy-from-file ,dbname ,table-name ,source))))) diff --git a/pgloader.asd b/pgloader.asd index 5e8fb37..5806d6b 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -13,13 +13,16 @@ #:cl-mysql ; CFFI binding to libmysqlclient-dev #:split-sequence ; some parsing is made easy #:cl-csv ; full CSV reader - #:lparallel) ; threads, workers, queues + #:lparallel ; threads, workers, queues + #:esrap ; parser generator + ) :components ((:file "params") (:file "package" :depends-on ("params")) (:file "utils" :depends-on ("package")) (:file "pgloader" :depends-on ("package" "utils")) ;; those are one-package-per-file + (:file "parser" :depends-on ("package" "params")) (:file "queue" :depends-on ("package")) ; package pgloader.queue (:file "csv" :depends-on ("package")) ; package pgloader.csv