From 0fd3e1877fce536c9d39bb7200b501ededea5207 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Tue, 22 Oct 2013 21:14:05 +0200 Subject: [PATCH] Remove "internal" libs that are now published separately at github. --- Makefile | 13 +- lib/abnf/README.md | 197 -------------- lib/abnf/abnf.asd | 14 - lib/abnf/abnf.lisp | 586 ------------------------------------------ lib/abnf/example.lisp | 38 --- lib/abnf/package.lisp | 8 - lib/db3/README.md | 77 ------ lib/db3/db3.asd | 12 - lib/db3/db3.lisp | 218 ---------------- lib/db3/package.lisp | 11 - 10 files changed, 10 insertions(+), 1164 deletions(-) delete mode 100644 lib/abnf/README.md delete mode 100644 lib/abnf/abnf.asd delete mode 100644 lib/abnf/abnf.lisp delete mode 100644 lib/abnf/example.lisp delete mode 100644 lib/abnf/package.lisp delete mode 100644 lib/db3/README.md delete mode 100644 lib/db3/db3.asd delete mode 100644 lib/db3/db3.lisp delete mode 100644 lib/db3/package.lisp diff --git a/Makefile b/Makefile index 7745edc..de32e8e 100644 --- a/Makefile +++ b/Makefile @@ -14,16 +14,23 @@ docs: pandoc pgloader.1.md -o pgloader.html pandoc pgloader.1.md -o pgloader.pdf +~/quicklisp/local-projects/cl-abnf: + git clone https://github.com/dimitri/cl-abnf.git $@ + +~/quicklisp/local-projects/cl-db3: + git clone https://github.com/dimitri/cl-db3.git $@ + ~/quicklisp/local-projects/Postmodern: git clone https://github.com/marijnh/Postmodern.git $@ cd ~/quicklisp/local-projects/Postmodern/ && patch -p1 < $(POMO_PATCH) -postmodern: ~/quicklisp/local-projects/Postmodern ; - ~/quicklisp/local-projects/cl-csv: git clone -b empty-strings-and-nil https://github.com/dimitri/cl-csv.git $@ +cl-abnf: ~/quicklisp/local-projects/cl-abnf ; +cl-db3: ~/quicklisp/local-projects/cl-db3 ; cl-csv: ~/quicklisp/local-projects/cl-csv ; +postmodern: ~/quicklisp/local-projects/Postmodern ; ~/quicklisp/setup.lisp: curl -o ~/quicklisp.lisp http://beta.quicklisp.org/quicklisp.lisp @@ -39,7 +46,7 @@ $(ASDF_CONF): asdf-config: $(ASDF_CONF) ; -$(LIBS): quicklisp $(ASDF_CONF) postmodern cl-csv +$(LIBS): quicklisp $(ASDF_CONF) cl-abnf cl-db3 postmodern cl-csv sbcl --load ~/quicklisp/setup.lisp \ --eval '(ql:quickload "pgloader")' \ --eval '(quit)' diff --git a/lib/abnf/README.md b/lib/abnf/README.md deleted file mode 100644 index 0be3624..0000000 --- a/lib/abnf/README.md +++ /dev/null @@ -1,197 +0,0 @@ -# ABNF DEFINITION OF ABNF - -This Common Lisp librairie implements a parser generator for the ABNF -grammar format as described in [http://tools.ietf.org/html/rfc2234](RFC -2234). - -The generated parser is a regular expression scanner provided by the -[http://weitz.de/cl-ppcre/](cl-ppcre) lib, which means that we can't parse -recursive grammar definition. One such definition is the ABNF definition as -given by the RFC. Fortunately, as you have this lib, you most probably don't -need to generate another parser to handle that particular ABNF grammar. - -## Installation - -The system has been made Quicklisp ready. - - $ cd ~/quicklisp/local-projects/ - $ git clone git://git.tapoueh.org/pgloader.git - * (ql:quickload "abnf") - -Currently the ABNF system is maintained as part of the `pgloader` tool as a -central piece of its syslog message parser facility. - -## Usage - -The `parse-abnf-grammar` function expects the grammar to be parsed as a -string, and also needs the top level rule name of the grammar you're -interested into, as a symbol or a string. You can also give a list of rule -names that you want to capture, they will be capture in the order in which -they are needed to expand the given top-level rule. - -The `parse-abnf-grammar` function returns a `cl-ppcre` scanner. - -~~~ {#example.lisp .commonlisp .numberLines} -(defvar *timestamp-abnf* - " TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME - FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY - DATE-FULLYEAR = 4DIGIT - DATE-MONTH = 2DIGIT ; 01-12 - DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on - ; month/year - FULL-TIME = PARTIAL-TIME TIME-OFFSET - PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND - [TIME-SECFRAC] - TIME-HOUR = 2DIGIT ; 00-23 - TIME-MINUTE = 2DIGIT ; 00-59 - TIME-SECOND = 2DIGIT ; 00-59 - TIME-SECFRAC = \".\" 1*6DIGIT - TIME-OFFSET = \"Z\" / TIME-NUMOFFSET - TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE - - NILVALUE = \"-\" " - "A timestamp ABNF grammar.") - -(let ((scanner (abnf:parse-abnf-grammar *timestamp-abnf* - :timestamp - :registering-rules '(:full-date)))) - (cl-ppcre:register-groups-bind (date) - (scanner "2013-09-08T00:02:03.123456Z+02:00") - date)) -~~~ - -In the previous usage example the `let` block returns `"2013-09-08"`. - -## ABNF grammar - -This library supports the ABNF grammar as given in RFC 2234, with additional -support for plain regular expressions. - -### Parsed grammar - -Here's the RFC syntax: - - rulelist = 1*( rule / (*c-wsp c-nl) ) - - rule = rulename defined-as elements c-nl - ; continues if next line starts - ; with white space - - rulename = ALPHA *(ALPHA / DIGIT / "-") - - defined-as = *c-wsp ("=" / "=/") *c-wsp - ; basic rules definition and - ; incremental alternatives - - elements = alternation *c-wsp - - c-wsp = WSP / (c-nl WSP) - - c-nl = comment / CRLF - ; comment or newline - - comment = ";" *(WSP / VCHAR) CRLF - - alternation = concatenation - *(*c-wsp "/" *c-wsp concatenation) - - concatenation = repetition *(1*c-wsp repetition) - - repetition = [repeat] element - - repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) - - element = rulename / group / option / - char-val / num-val / prose-val / regex - ; regex is an addition of this lib, see above - - group = "(" *c-wsp alternation *c-wsp ")" - - option = "[" *c-wsp alternation *c-wsp "]" - - char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE - ; quoted string of SP and VCHAR - ; without DQUOTE - - num-val = "%" (bin-val / dec-val / hex-val) - - bin-val = "b" 1*BIT - [ 1*("." 1*BIT) / ("-" 1*BIT) ] - ; series of concatenated bit values - ; or single ONEOF range - - dec-val = "d" 1*DIGIT - [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] - - hex-val = "x" 1*HEXDIG - [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] - - prose-val = "<" *(%x20-3D / %x3F-7E) ">" - ; bracketed string of SP and VCHAR - ; without angles - ; prose description, to be used as - ; last resort - -### Core rules - -Those parts of the grammar are always provided, they are the *defaults* -rules of the ABNF definition. - - ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - - BIT = "0" / "1" - - CHAR = %x01-7F - ; any 7-bit US-ASCII character, excluding NUL - - CR = %x0D - ; carriage return - - CRLF = CR LF - ; Internet standard newline - - CTL = %x00-1F / %x7F - ; controls - - DIGIT = %x30-39 - ; 0-9 - - DQUOTE = %x22 - ; " (Double Quote) - - HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - - HTAB = %x09 - ; horizontal tab - - LF = %x0A - ; linefeed - - LWSP = *(WSP / CRLF WSP) - ; linear white space (past newline) - - OCTET = %x00-FF - ; 8 bits of data - - SP = %x20 - -### Regex Support - -We add support for plain regexp in the `element` rule. A regexp is expected -to follow the form: - - regex = "~" delimiter expression delimiter - -The *expression* shouldn't contain the *delimiter* of course, and the -allowed delimiters are `~//`, `~[]`, `~{}`, `~()`, `~<>`, `~""`, `~''`, -`~||` and `~##`. If you have to build a regexp with more than one of those -delimiters in it, you can just concatenate multiple parts together like in -this example: - - complex-regex = ~/foo{bar}/ ~{baz/quux} - -That will be used in exactly the same way as the following example: - - complex-regex = ~ - - diff --git a/lib/abnf/abnf.asd b/lib/abnf/abnf.asd deleted file mode 100644 index df47444..0000000 --- a/lib/abnf/abnf.asd +++ /dev/null @@ -1,14 +0,0 @@ -;;;; abnf.asd - -(asdf:defsystem #:abnf - :serial t - :description "ABNF Parser Generator, per RFC2234" - :author "Dimitri Fontaine " - :license "WTFPL" - :depends-on (#:esrap ; parser generator - #:cl-ppcre ; regular expression - ) - :components ((:file "package") - (:file "abnf" :depends-on ("package")))) - - diff --git a/lib/abnf/abnf.lisp b/lib/abnf/abnf.lisp deleted file mode 100644 index c544a6f..0000000 --- a/lib/abnf/abnf.lisp +++ /dev/null @@ -1,586 +0,0 @@ -;;; -;;; Augmented BNF for Syntax Specifications: ABNF -;;; -;;; Parsing ABNF syntaxes so that we can offer users to edit them -;;; -;;; see http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10 -;;; and http://tools.ietf.org/html/rfc2234 -;;; -(in-package #:abnf) - -(defvar *abnf-rfc2234-abnf-definition* - " rulelist = 1*( rule / (*c-wsp c-nl) ) - - rule = rulename defined-as elements c-nl - ; continues if next line starts - ; with white space - - rulename = ALPHA *(ALPHA / DIGIT / \"-\") - - defined-as = *c-wsp (\"=\" / \"=/\") *c-wsp - ; basic rules definition and - ; incremental alternatives - - elements = alternation *c-wsp - - c-wsp = WSP / (c-nl WSP) - - c-nl = comment / CRLF - ; comment or newline - - comment = \";\" *(WSP / VCHAR) CRLF - - alternation = concatenation - *(*c-wsp \"/\" *c-wsp concatenation) - - concatenation = repetition *(1*c-wsp repetition) - - repetition = [repeat] element - - repeat = 1*DIGIT / (*DIGIT \"*\" *DIGIT) - - element = rulename / group / option / - char-val / num-val / prose-val - - group = \"(\" *c-wsp alternation *c-wsp \")\" - - option = \"[\" *c-wsp alternation *c-wsp \"]\" - - char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE - ; quoted string of SP and VCHAR without DQUOTE - - num-val = \"%\" (bin-val / dec-val / hex-val) - - bin-val = \"b\" 1*BIT - [ 1*(\".\" 1*BIT) / (\"-\" 1*BIT) ] - ; series of concatenated bit values - ; or single ONEOF range - - dec-val = \"d\" 1*DIGIT - [ 1*(\".\" 1*DIGIT) / (\"-\" 1*DIGIT) ] - - hex-val = \"x\" 1*HEXDIG - [ 1*(\".\" 1*HEXDIG) / (\"-\" 1*HEXDIG) ] - - prose-val = \"<\" *(%x20-3D / %x3F-7E) \">\" - ; bracketed string of SP and VCHAR without angles - ; prose description, to be used as last resort - " - "See http://tools.ietf.org/html/rfc2234#section-4") - -(defvar *abnf-rfc-syslog-draft-15* - "SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG] - - HEADER = VERSION SP FACILITY SP SEVERITY SP - TRUNCATE SP TIMESTAMP SP HOSTNAME - SP APP-NAME SP PROCID SP MSGID - VERSION = NONZERO-DIGIT 0*2DIGIT - FACILITY = \"0\" / (NONZERO-DIGIT 0*9DIGIT) - ; range 0..2147483647 ; - SEVERITY = \"0\" / \"1\" / \"2\" / \"3\" / \"4\" / \"5\" / - \"6\" / \"7\" - TRUNCATE = 2DIGIT - HOSTNAME = 1*255PRINTUSASCII - - APP-NAME = 1*48PRINTUSASCII - PROCID = \"-\" / 1*128PRINTUSASCII - MSGID = \"-\" / 1*32PRINTUSASCII - - TIMESTAMP = FULL-DATE \"T\" FULL-TIME - FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY - DATE-FULLYEAR = 4DIGIT - DATE-MONTH = 2DIGIT ; 01-12 - DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on - ; month/year ; - FULL-TIME = PARTIAL-TIME TIME-OFFSET - PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND - [TIME-SECFRAC] - TIME-HOUR = 2DIGIT ; 00-23 - TIME-MINUTE = 2DIGIT ; 00-59 - TIME-SECOND = 2DIGIT ; 00-58, 00-59, 00-60 based on leap - ; second rules ; - TIME-SECFRAC = \".\" 1*6DIGIT - TIME-OFFSET = \"Z\" / TIME-NUMOFFSET - TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE - - - STRUCTURED-DATA = 1*SD-ELEMENT / \"-\" - SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\" - SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34 - SD-ID = SD-NAME - PARAM-NAME = SD-NAME - PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and - ; ']' MUST be escaped. ; - SD-NAME = 1*32PRINTUSASCII - ; except '=', SP, ']', %d34 (\") ; - - MSG = UTF-8-STRING - UTF-8-STRING = *OCTET ; Any VALID UTF-8 String - ; \"shortest form\" MUST be used ; - - OCTET = %d00-255 - SP = %d32 - PRINTUSASCII = %d33-126 - NONZERO-DIGIT = \"1\" / \"2\" / \"3\" / \"4\" / \"5\" / - \"6\" / \"7\" / \"8\" / \"9\" - DIGIT = \"0\" / NONZERO-DIGIT" - "See http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10") - -(defvar *abnf-rsyslog* - (concatenate 'string - "RSYSLOG-MSG = \"<\" PRIVAL \">\" VERSION SP TIMESTAMP - SP HOSTNAME SP APP-NAME SP PROCID SP MSGID - SP [SD-ID SP] DATA - - DATA = ~/.*/ - - PRIVAL = 1*3DIGIT ; range 0 .. 191" - '(#\Newline #\Newline) - *abnf-rfc-syslog-draft-15*) - "See http://www.rsyslog.com/doc/syslog_protocol.html") - -(defvar *abnf-rfc5424-syslog-protocol* - " SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG] - - HEADER = PRI VERSION SP TIMESTAMP SP HOSTNAME - SP APP-NAME SP PROCID SP MSGID - PRI = \"<\" PRIVAL \">\" - PRIVAL = 1*3DIGIT ; range 0 .. 191 - VERSION = NONZERO-DIGIT 0*2DIGIT - HOSTNAME = NILVALUE / 1*255PRINTUSASCII - - APP-NAME = NILVALUE / 1*48PRINTUSASCII - PROCID = NILVALUE / 1*128PRINTUSASCII - MSGID = NILVALUE / 1*32PRINTUSASCII - - TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME - FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY - DATE-FULLYEAR = 4DIGIT - DATE-MONTH = 2DIGIT ; 01-12 - DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on - ; month/year - FULL-TIME = PARTIAL-TIME TIME-OFFSET - PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND - [TIME-SECFRAC] - TIME-HOUR = 2DIGIT ; 00-23 - TIME-MINUTE = 2DIGIT ; 00-59 - TIME-SECOND = 2DIGIT ; 00-59 - TIME-SECFRAC = \".\" 1*6DIGIT - TIME-OFFSET = \"Z\" / TIME-NUMOFFSET - TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE - - - STRUCTURED-DATA = NILVALUE / 1*SD-ELEMENT - SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\" - SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34 - SD-ID = SD-NAME - PARAM-NAME = SD-NAME - PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and - ; ']' MUST be escaped. - SD-NAME = 1*32PRINTUSASCII - ; except '=', SP, ']', %d34 (\") - - MSG = MSG-ANY / MSG-UTF8 - MSG-ANY = *OCTET ; not starting with BOM - MSG-UTF8 = BOM UTF-8-STRING - BOM = %xEF.BB.BF - - UTF-8-STRING = *OCTET ; UTF-8 string as specified - ; in RFC 3629 - - OCTET = %d00-255 - SP = %d32 - PRINTUSASCII = %d33-126 - NONZERO-DIGIT = %d49-57 - DIGIT = %d48 / NONZERO-DIGIT - NILVALUE = \"-\"" - "See http://tools.ietf.org/html/rfc5424#section-6") - -#| - -This table comes from http://tools.ietf.org/html/rfc2234#page-11 and 12. - - ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - BIT = "0" / "1" - CHAR = %x01-7F - CR = %x0D - CRLF = CR LF - CTL = %x00-1F / %x7F - DIGIT = %x30-39 - DQUOTE = %x22 - HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - HTAB = %x09 - LF = %x0A - LWSP = *(WSP / CRLF WSP) - OCTET = %x00-FF - SP = %x20 - VCHAR = %x21-7E - WSP = SP / HTAB -|# - -(defvar *abnf-default-rules* - `((:abnf-alpha (:char-class (:range #\A #\Z) (:range #\a #\z))) - (:abnf-bit (:char-class #\0 #\1)) - (:abnf-char (:char-class (:range ,(code-char #x1) ,(code-char #x7f)))) - (:abnf-cr #\Newline) - (:abnf-crlf (:sequence #\Newline #\Return)) - (:abnf-ctl (:char-class (:range ,(code-char #x0) ,(code-char #x1f)) - ,(code-char #x7f))) - (:abnf-digit (:char-class (:range #\0 #\9))) - (:abnf-dquote #\") - (:abnf-hexdig (:char-class (:range #\0 #\9) (:range #\A #\F))) - (:abnf-htab #\Tab) - (:abnf-lf #\Newline) - (:abnf-lwsp (:regex "\s+")) - (:abnf-octet (:char-class (:range ,(code-char #x0) ,(code-char #xff)))) - (:abnf-sp #\Space) - (:abnf-vchar (:char-class (:range ,(code-char #x21) ,(code-char #x7e)))) - (:abnf-wsp (:char-class #\Space #\Tab))) - "An alist of the usual rules needed for ABNF grammars") - -(defun rule-name-character-p (character) - (or (alphanumericp character) - (char= character #\-))) - -(defun vcharp (character) - (<= #x21 (char-code character) #x7E)) - -(defrule vchar (+ (vcharp character)) (:text t)) -(defrule wsp (or #\Space #\Tab) (:constant :wsp)) - -(defrule comment (and ";" (* (or wsp vchar)) #\Newline) (:constant :comment)) -(defrule c-nl (or comment #\Newline) (:constant :c-nl)) -(defrule c-wsp (or wsp c-nl) (:constant :c-wsp)) -(defrule n-wsp (* c-wsp) (:constant :c-wsp)) - -(defun rule-name-symbol (rule-name &key find-symbol) - "Turn the string we read in the ABNF into internal symbol." - (let ((symbol-fun (if find-symbol #'find-symbol #'intern)) - (symbol-name (string-upcase (format nil "abnf-~a" rule-name)))) - (funcall symbol-fun symbol-name :keyword))) - -(defrule rule-name (and (alpha-char-p character) - (+ (rule-name-character-p character))) - (:lambda (name) - (rule-name-symbol (text name)))) - -(defrule equal (and n-wsp #\= n-wsp) (:constant :equal)) -(defrule end-of-rule n-wsp (:constant :eor)) - -(defrule digit (digit-char-p character) - (:lambda (digit) - (parse-integer (text digit)))) - -(defrule digits (+ (digit-char-p character)) - (:lambda (digits) - (code-char (parse-integer (text digits))))) - -(defun char-val-char-p (character) - (let ((code (char-code character))) - (or (<= #x20 code #x21) - (<= #x23 code #x7E)))) - -(defrule char-val (and #\" (* (char-val-char-p character)) #\") - (:lambda (char) - (destructuring-bind (open val close) char - (declare (ignore open close)) - (text val)))) - -(defrule dec-string (and digits (+ (and "." digits))) - (:lambda (string) - (destructuring-bind (first rest) string - `(:sequence ,first ,@(mapcar #'cadr rest))))) - -(defrule dec-range (and digits "-" digits) - (:lambda (range) - (destructuring-bind (min sep max) range - (declare (ignore sep)) - `(:char-class (:range ,min ,max))))) - -(defrule dec-val (and "d" (or dec-string dec-range digits)) - (:lambda (dv) - (destructuring-bind (d val) dv - (declare (ignore d)) - val))) - -(defun hexadecimal-char-p (character) - (member character #. (quote (coerce "0123456789abcdefABCDEF" 'list)))) - -(defrule hexdigits (+ (hexadecimal-char-p character)) - (:lambda (hx) - (code-char (parse-integer (text hx) :radix 16)))) - -(defrule hex-string (and hexdigits (+ (and "." hexdigits))) - (:lambda (string) - (destructuring-bind (first rest) string - `(:sequence ,first ,@(mapcar #'cadr rest))))) - -(defrule hex-range (and hexdigits range-sep hexdigits) - (:lambda (range) - (destructuring-bind (min sep max) range - (declare (ignore sep)) - `(:char-class (:range ,min ,max))))) - -(defrule hex-val (and "x" (or hex-string hex-range hexdigits)) - (:lambda (dv) - (destructuring-bind (d val) dv - (declare (ignore d)) - val))) - -(defrule num-val (and "%" (or dec-val hex-val)) - (:lambda (nv) - (destructuring-bind (percent val) nv - (declare (ignore percent)) - val))) - -;;; allow to parse rule definitions without a separating blank line -(defrule rule-name-reference (and rule-name (! equal)) - (:lambda (ref) - (destructuring-bind (rule-name nil) ref - rule-name))) - -;;; what about allowing regular expressions directly? -(defun process-quoted-regex (pr) - "Helper function to process different kinds of quotes for regexps" - (destructuring-bind (open regex close) pr - (declare (ignore open close)) - `(:regex ,(text regex)))) - -(defrule single-quoted-regex (and #\' (+ (not #\')) #\') - (:function process-quoted-regex)) - -(defrule double-quoted-regex (and #\" (+ (not #\")) #\") - (:function process-quoted-regex)) - -(defrule parens-quoted-regex (and #\( (+ (not #\))) #\)) - (:function process-quoted-regex)) - -(defrule braces-quoted-regex (and #\{ (+ (not #\})) #\}) - (:function process-quoted-regex)) - -(defrule chevron-quoted-regex (and #\< (+ (not #\>)) #\>) - (:function process-quoted-regex)) - -(defrule brackets-quoted-regex (and #\[ (+ (not #\])) #\]) - (:function process-quoted-regex)) - -(defrule slash-quoted-regex (and #\/ (+ (not #\/)) #\/) - (:function process-quoted-regex)) - -(defrule pipe-quoted-regex (and #\| (+ (not #\|)) #\|) - (:function process-quoted-regex)) - -(defrule sharp-quoted-regex (and #\# (+ (not #\#)) #\#) - (:function process-quoted-regex)) - -(defrule quoted-regex (and "~" (or single-quoted-regex - double-quoted-regex - parens-quoted-regex - braces-quoted-regex - chevron-quoted-regex - brackets-quoted-regex - slash-quoted-regex - pipe-quoted-regex - sharp-quoted-regex)) - (:lambda (qr) - (destructuring-bind (tilde regex) qr - (declare (ignore tilde)) - regex))) - -(defrule element (or rule-name-reference char-val num-val quoted-regex)) - -(defrule number (+ (digit-char-p character)) - (:lambda (number) - (parse-integer (text number)))) - -(defrule repeat-var (and (? number) "*" (? number)) - (:lambda (rv) - (destructuring-bind (min star max) rv - (declare (ignore star)) - (cons (or min 0) max)))) - -(defrule repeat-specific number - (:lambda (number) - (cons number number))) - -(defrule repeat (or repeat-var repeat-specific)) - -(defrule repetition (and (? repeat) toplevel-element) - (:lambda (repetition) - (destructuring-bind (repeat element) repetition - (if repeat - (destructuring-bind (min . max) repeat - `(:non-greedy-repetition ,min ,max ,element)) - ;; no repeat clause - element)))) - -(defrule concatenation-element (and n-wsp repetition) - (:lambda (ce) - (destructuring-bind (n-wsp rep) ce - (declare (ignore n-wsp)) - rep))) - -(defrule concatenation (and repetition (* concatenation-element)) - (:lambda (concat) - (destructuring-bind (rep1 rlist) concat - (if rlist - `(:sequence ,@(list* rep1 rlist)) - ;; concatenation of a single element - rep1)))) - -(defrule alternation-element (and n-wsp "/" n-wsp concatenation) - (:lambda (ae) - (destructuring-bind (ws1 sl ws2 concatenation) ae - (declare (ignore ws1 sl ws2)) - concatenation))) - -(defrule alternation (and concatenation (* alternation-element)) - (:lambda (alternation) - (destructuring-bind (c1 clist) alternation - (if clist - `(:alternation ,@(list* c1 clist)) - c1)))) - -(defrule group (and "(" n-wsp alternation n-wsp ")") - (:lambda (group) - (destructuring-bind (open ws1 a ws2 close) group - (declare (ignore open close ws1 ws2)) - ;; we need the grouping when parsing the ABNF syntax, but once this - ;; parsing is done there's no ambiguity possible left and we don't - ;; need the grouping anymore in the resulting regular-expression parse - ;; tree. - a))) - -(defrule option (and "[" n-wsp alternation n-wsp "]") - (:lambda (option) - (destructuring-bind (open ws1 a ws2 close) option - (declare (ignore open close ws1 ws2)) - `(:non-greedy-repetition 0 1 ,a)))) - -(defrule toplevel-element (or group option element)) - -(defrule alternations (and n-wsp alternation) - (:lambda (alts) - (destructuring-bind (n-wsp alt) alts - (declare (ignore n-wsp)) - alt))) - -(defrule elements (and (+ alternations) end-of-rule) - (:lambda (alist) - (destructuring-bind (alts eor) alist - (declare (ignore eor)) - alts))) - -(defrule rule (and n-wsp rule-name equal elements) - (:lambda (rule) - (destructuring-bind (n-wsp rule-name eq definition) rule - (declare (ignore n-wsp eq)) - (cons rule-name definition)))) - -(defrule rule-list (+ rule)) - -;;; -;;; Now that we are able to transform ABNF rule set into an alist of -;;; cl-ppcre parse trees and references to other rules in the set, we need -;;; to expand each symbol's definition to get a real cl-ppcre scanner parse -;;; tree. -;;; -(defun expand-rule-definition (definition - rule-set - registering-rules - already-expanded-rules) - "Expand given rule DEFINITION within given RULE-SET" - (typecase definition - (list - ;; walk the definition and expand its elements - (loop - for element in definition - collect (expand-rule-definition element - rule-set - registering-rules - already-expanded-rules))) - - (symbol - (if (member definition '(:sequence - :alternation - :regex - :char-class - :range - :non-greedy-repetition)) - ;; that's a cl-ppcre scanner parse-tree symbol - ;; only put in that list those cl-ppcre symbols we actually produce - definition - - ;; here we have to actually expand the symbol - (progn - ;; first protect against infinite recursion - (when (member definition already-expanded-rules) - (error "Can not expand recursive rule: ~S." definition)) - - (destructuring-bind (rule-name rule-definition) - (or (assoc definition rule-set) - (assoc definition *abnf-default-rules*)) - (let* ((already-expanded-rules - (cons definition already-expanded-rules)) - - (expanded-definition - (expand-rule-definition rule-definition - rule-set - registering-rules - already-expanded-rules))) - (if (member rule-name registering-rules) - `(:register ,expanded-definition) - expanded-definition)))))) - - ;; all other types of data are "constants" in our parse-tree - (t definition))) - -(defun expand-rule (rule-name rule-set &optional registering-rules) - "Given a rule, expand it completely removing references to other parsed - rules" - (let ((rule (rule-name-symbol rule-name :find-symbol t))) - (destructuring-bind (rule-name definition) - (assoc rule rule-set) - `(:sequence - :start-anchor - ,(expand-rule-definition definition - rule-set - (loop - for rr in registering-rules - collect (rule-name-symbol rr :find-symbol t)) - (list rule-name)))))) - -(defun parse-abnf-grammar (abnf-string top-level-rule - &key registering-rules junk-allowed) - "Parse STRING as an ABNF grammar as defined in RFC 2234. Returns a cl-ppcre - scanner that will only match strings conforming to given grammar. - - See http://tools.ietf.org/html/rfc2234 for details about the ABNF specs. - Added to that grammar is support for regular expression, that are - expected in the ELEMENT production and spelled ~/regex/. The allowed - delimiters are: ~// ~[] ~{} ~() ~<> ~\"\" ~'' ~|| and ~##." - (let ((rule-set - (parse 'rule-list abnf-string :junk-allowed junk-allowed))) - (cl-ppcre:create-scanner - (expand-rule top-level-rule - ;; in case of duplicates only keep the latest addition - (remove-duplicates rule-set :key #'car) - registering-rules)))) - -(defun test (&key (times 10000)) - "This serves as a test and an example: if you're going to use the same - scanner more than one, be sure to compute it only once." - (let* ((cl-ppcre:*use-bmh-matchers* t) - (cl-ppcre:*optimize-char-classes* t) - (scanner - (parse-abnf-grammar *abnf-rfc-syslog-draft-15* - :timestamp - :registering-rules '(:full-date - :partial-time - :time-offset)))) - (loop - repeat times - do (cl-ppcre:register-groups-bind - (date time zone) - (scanner "2013-09-08T00:02:03.123456Z+02:00") - (list date time zone))))) diff --git a/lib/abnf/example.lisp b/lib/abnf/example.lisp deleted file mode 100644 index 2ec2cd7..0000000 --- a/lib/abnf/example.lisp +++ /dev/null @@ -1,38 +0,0 @@ -;;;; abnf-example.lisp - -(defpackage #:abnf-example - (:use #:cl #:abnf #:cl-ppcre)) - -(in-package #:abnf-example) - -(defvar *timestamp-abnf* - " TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME - FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY - DATE-FULLYEAR = 4DIGIT - DATE-MONTH = 2DIGIT ; 01-12 - DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on - ; month/year - FULL-TIME = PARTIAL-TIME TIME-OFFSET - PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND - [TIME-SECFRAC] - TIME-HOUR = 2DIGIT ; 00-23 - TIME-MINUTE = 2DIGIT ; 00-59 - TIME-SECOND = 2DIGIT ; 00-59 - TIME-SECFRAC = \".\" 1*6DIGIT - TIME-OFFSET = \"Z\" / TIME-NUMOFFSET - TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE - - NILVALUE = \"-\" " - "A timestamp ABNF grammar.") - -(let ((scanner (parse-abnf-grammar *timestamp-abnf* - :timestamp - :registering-rules '(:full-date)))) - (register-groups-bind (date) - (scanner "2013-09-08T00:02:03.123456Z+02:00") - date)) - -(let ((scanner (parse-abnf-grammar *timestamp-abnf* - :timestamp - :registering-rules '(:full-date)))) - (scan-to-strings scanner "2013-09-08T00:02:03.123456Z+02:00")) diff --git a/lib/abnf/package.lisp b/lib/abnf/package.lisp deleted file mode 100644 index 1bea5e6..0000000 --- a/lib/abnf/package.lisp +++ /dev/null @@ -1,8 +0,0 @@ -;;;; package.lisp - -(defpackage #:abnf - (:use #:cl #:esrap) - (:export #:*abnf-rfc-syslog-draft-15* - #:*abnf-rsyslog* - #:*abnf-rfc5424-syslog-protocol* - #:parse-abnf-grammar)) diff --git a/lib/db3/README.md b/lib/db3/README.md deleted file mode 100644 index 91a1266..0000000 --- a/lib/db3/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# DB3 - -A lib to read dbf files version 3. - -## Database file structure - -The structure of a dBASE III database file is composed of a header -and data records. The layout is given below. - -### dBASE III DATABASE FILE HEADER: - - +---------+-------------------+---------------------------------+ - | BYTE | CONTENTS | MEANING | - +---------+-------------------+---------------------------------+ - | 0 | 1 byte | dBASE III version number | - | | | (03H without a .DBT file) | - | | | (83H with a .DBT file) | - +---------+-------------------+---------------------------------+ - | 1-3 | 3 bytes | date of last update | - | | | (YY MM DD) in binary format | - +---------+-------------------+---------------------------------+ - | 4-7 | 32 bit number | number of records in data file | - +---------+-------------------+---------------------------------+ - | 8-9 | 16 bit number | length of header structure | - +---------+-------------------+---------------------------------+ - | 10-11 | 16 bit number | length of the record | - +---------+-------------------+---------------------------------+ - | 12-31 | 20 bytes | reserved bytes (version 1.00) | - +---------+-------------------+---------------------------------+ - | 32-n | 32 bytes each | field descriptor array | - | | | (see below) | --+ - +---------+-------------------+---------------------------------+ | - | n+1 | 1 byte | 0DH as the field terminator | | - +---------+-------------------+---------------------------------+ | - | - | - A FIELD DESCRIPTOR: <------------------------------------------+ - - +---------+-------------------+---------------------------------+ - | BYTE | CONTENTS | MEANING | - +---------+-------------------+---------------------------------+ - | 0-10 | 11 bytes | field name in ASCII zero-filled | - +---------+-------------------+---------------------------------+ - | 11 | 1 byte | field type in ASCII | - | | | (C N L D or M) | - +---------+-------------------+---------------------------------+ - | 12-15 | 32 bit number | field data address | - | | | (address is set in memory) | - +---------+-------------------+---------------------------------+ - | 16 | 1 byte | field length in binary | - +---------+-------------------+---------------------------------+ - | 17 | 1 byte | field decimal count in binary | - +---------+-------------------+---------------------------------+ - | 18-31 | 14 bytes | reserved bytes (version 1.00) | - +---------+-------------------+---------------------------------+ - -### The data records are layed out as follows: - - 1. Data records are preceeded by one byte that is a space (20H) if the - record is not deleted and an asterisk (2AH) if it is deleted. - - 2. Data fields are packed into records with no field separators or record - terminators. - - 3. Data types are stored in ASCII format as follows: - - DATA TYPE DATA RECORD STORAGE - --------- -------------------------------------------- - Character (ASCII characters) - Numeric - . 0 1 2 3 4 5 6 7 8 9 - Logical ? Y y N n T t F f (? when not initialized) - Memo (10 digits representing a .DBT block number) - Date (8 digits in YYYYMMDD format, such as - 19840704 for July 4, 1984) - ----------------------------------------------------------- - - diff --git a/lib/db3/db3.asd b/lib/db3/db3.asd deleted file mode 100644 index 2ca239d..0000000 --- a/lib/db3/db3.asd +++ /dev/null @@ -1,12 +0,0 @@ -;;;; abnf.asd - -(asdf:defsystem #:db3 - :serial t - :description "DB3 file reader" - :author "Xach" - :license "WTFPL" - :depends-on () - :components ((:file "package") - (:file "db3" :depends-on ("package")))) - - diff --git a/lib/db3/db3.lisp b/lib/db3/db3.lisp deleted file mode 100644 index bd4bfce..0000000 --- a/lib/db3/db3.lisp +++ /dev/null @@ -1,218 +0,0 @@ -;;;; http://xach.com/lisp/db3.lisp -;; -;; db3.lisp -#| - -Database file structure - -The structure of a dBASE III database file is composed of a header -and data records. The layout is given below. - - -dBASE III DATABASE FILE HEADER: - -+---------+-------------------+---------------------------------+ -| BYTE | CONTENTS | MEANING | -+---------+-------------------+---------------------------------+ -| 0 | 1 byte | dBASE III version number | -| | | (03H without a .DBT file) | -| | | (83H with a .DBT file) | -+---------+-------------------+---------------------------------+ -| 1-3 | 3 bytes | date of last update | -| | | (YY MM DD) in binary format | -+---------+-------------------+---------------------------------+ -| 4-7 | 32 bit number | number of records in data file | -+---------+-------------------+---------------------------------+ -| 8-9 | 16 bit number | length of header structure | -+---------+-------------------+---------------------------------+ -| 10-11 | 16 bit number | length of the record | -+---------+-------------------+---------------------------------+ -| 12-31 | 20 bytes | reserved bytes (version 1.00) | -+---------+-------------------+---------------------------------+ -| 32-n | 32 bytes each | field descriptor array | -| | | (see below) | --+ -+---------+-------------------+---------------------------------+ | -| n+1 | 1 byte | 0DH as the field terminator | | -+---------+-------------------+---------------------------------+ | - | - | -A FIELD DESCRIPTOR: <------------------------------------------+ - -+---------+-------------------+---------------------------------+ -| BYTE | CONTENTS | MEANING | -+---------+-------------------+---------------------------------+ -| 0-10 | 11 bytes | field name in ASCII zero-filled | -+---------+-------------------+---------------------------------+ -| 11 | 1 byte | field type in ASCII | -| | | (C N L D or M) | -+---------+-------------------+---------------------------------+ -| 12-15 | 32 bit number | field data address | -| | | (address is set in memory) | -+---------+-------------------+---------------------------------+ -| 16 | 1 byte | field length in binary | -+---------+-------------------+---------------------------------+ -| 17 | 1 byte | field decimal count in binary | -+---------+-------------------+---------------------------------+ -| 18-31 | 14 bytes | reserved bytes (version 1.00) | -+---------+-------------------+---------------------------------+ - - -The data records are layed out as follows: - - 1. Data records are preceeded by one byte that is a space (20H) if the - record is not deleted and an asterisk (2AH) if it is deleted. - - 2. Data fields are packed into records with no field separators or - record terminators. - - 3. Data types are stored in ASCII format as follows: - - DATA TYPE DATA RECORD STORAGE - --------- -------------------------------------------- - Character (ASCII characters) - Numeric - . 0 1 2 3 4 5 6 7 8 9 - Logical ? Y y N n T t F f (? when not initialized) - Memo (10 digits representing a .DBT block number) - Date (8 digits in YYYYMMDD format, such as - 19840704 for July 4, 1984) - -|# -(in-package :db3) - - -;;; reading binary stuff -(defun read-uint32 (stream) - (loop repeat 4 - for offset from 0 by 8 - for value = (read-byte stream) - then (logior (ash (read-byte stream) offset) value) - finally (return value))) - -(defun read-uint16 (stream) - (loop repeat 2 - for offset from 0 by 8 - for value = (read-byte stream) - then (logior (ash (read-byte stream) offset) value) - finally (return value))) - - - -;;; objects - -(defclass db3 () - ((version-number :accessor version-number) - (last-update :accessor last-update) - (record-count :accessor record-count) - (header-length :accessor header-length) - (record-length :accessor record-length) - (fields :accessor fields))) - - -(defclass db3-field () - ((name :accessor field-name) - (type :accessor field-type) - (data-address :accessor data-address) - (field-length :accessor field-length) - (field-count :accessor field-count))) - - -(defun asciiz->string (array) - (let* ((string-length (or (position 0 array) - (length array))) - (string (make-string string-length))) - (loop for i below string-length - do (setf (schar string i) (code-char (aref array i)))) - string)) - -(defun ascii->string (array) - (map 'string #'code-char array)) - - -(defun load-field-descriptor (stream) - (let ((field (make-instance 'db3-field)) - (name (make-array 11 :element-type '(unsigned-byte 8)))) - (read-sequence name stream) - (setf (field-name field) (asciiz->string name) - (field-type field) (code-char (read-byte stream)) - (data-address field) (read-uint32 stream) - (field-length field) (read-byte stream) - (field-count field) (read-byte stream)) - (loop repeat 14 do (read-byte stream)) - field)) - - -(defmethod field-count ((db3 db3)) - (1- (/ (1- (header-length db3)) 32))) - - -(defmethod load-header ((db3 db3) stream) - (let ((version (read-byte stream))) - (unless (= version #x03) - (error "Can't handle this file")) - (let ((year (read-byte stream)) - (month (read-byte stream)) - (day (read-byte stream))) - (setf (version-number db3) version - (last-update db3) (list year month day) - (record-count db3) (read-uint32 stream) - (header-length db3) (read-uint16 stream) - (record-length db3) (read-uint16 stream)) - (file-position stream 32) - (setf (fields db3) (loop repeat (field-count db3) - collect (load-field-descriptor stream))) - (assert (= (read-byte stream) #x0D)) - db3))) - - -(defmethod convert-field (type data) - (ascii->string data)) - -(defmethod convert-field ((type (eql #\C)) data) - (ascii->string data)) - - -(defmethod load-field (type length stream) - (let ((field (make-array length))) - (read-sequence field stream) - (convert-field type field))) - -(defmethod load-record ((db3 db3) stream) - (read-byte stream) - (loop with record = (make-array (field-count db3)) - for i below (field-count db3) - for field in (fields db3) - do (setf (svref record i) - (load-field (field-type field) (field-length field) stream)) - finally (return record))) - - -(defun write-record (record stream) - (loop for field across record - do - (write-char #\" stream) - (write-string field stream) - (write-string "\"," stream)) - (terpri stream)) - - -(defun dump-db3 (input output) - (with-open-file (stream input :direction :input - :element-type '(unsigned-byte 8)) - (with-open-file (ostream output :direction :output - :element-type 'character) - (let ((db3 (make-instance 'db3))) - (load-header db3 stream) - (loop repeat (record-count db3) - do (write-record (load-record db3 stream) ostream)) - db3)))) - -(defun sample-db3 (input ostream &key (sample-size 10)) - (with-open-file (stream input :direction :input - :element-type '(unsigned-byte 8)) - (let ((db3 (make-instance 'db3))) - (load-header db3 stream) - (loop - :repeat sample-size - :do (format ostream "~s~%" (load-record db3 stream))) - db3))) - diff --git a/lib/db3/package.lisp b/lib/db3/package.lisp deleted file mode 100644 index c1e824d..0000000 --- a/lib/db3/package.lisp +++ /dev/null @@ -1,11 +0,0 @@ -;;;; package.lisp - -(defpackage #:db3 - (:use #:cl) - (:export #:db3 ; the main class - #:load-header - #:record-count - #:load-record - #:write-record - #:dump-db3 - #:sample-db3))