Remove "internal" libs that are now published separately at github.

This commit is contained in:
Dimitri Fontaine 2013-10-22 21:14:05 +02:00
parent 02914741c1
commit 0fd3e1877f
10 changed files with 10 additions and 1164 deletions

View File

@ -14,16 +14,23 @@ docs:
pandoc pgloader.1.md -o pgloader.html
pandoc pgloader.1.md -o pgloader.pdf
~/quicklisp/local-projects/cl-abnf:
git clone https://github.com/dimitri/cl-abnf.git $@
~/quicklisp/local-projects/cl-db3:
git clone https://github.com/dimitri/cl-db3.git $@
~/quicklisp/local-projects/Postmodern:
git clone https://github.com/marijnh/Postmodern.git $@
cd ~/quicklisp/local-projects/Postmodern/ && patch -p1 < $(POMO_PATCH)
postmodern: ~/quicklisp/local-projects/Postmodern ;
~/quicklisp/local-projects/cl-csv:
git clone -b empty-strings-and-nil https://github.com/dimitri/cl-csv.git $@
cl-abnf: ~/quicklisp/local-projects/cl-abnf ;
cl-db3: ~/quicklisp/local-projects/cl-db3 ;
cl-csv: ~/quicklisp/local-projects/cl-csv ;
postmodern: ~/quicklisp/local-projects/Postmodern ;
~/quicklisp/setup.lisp:
curl -o ~/quicklisp.lisp http://beta.quicklisp.org/quicklisp.lisp
@ -39,7 +46,7 @@ $(ASDF_CONF):
asdf-config: $(ASDF_CONF) ;
$(LIBS): quicklisp $(ASDF_CONF) postmodern cl-csv
$(LIBS): quicklisp $(ASDF_CONF) cl-abnf cl-db3 postmodern cl-csv
sbcl --load ~/quicklisp/setup.lisp \
--eval '(ql:quickload "pgloader")' \
--eval '(quit)'

View File

@ -1,197 +0,0 @@
# ABNF DEFINITION OF ABNF
This Common Lisp librairie implements a parser generator for the ABNF
grammar format as described in [http://tools.ietf.org/html/rfc2234](RFC
2234).
The generated parser is a regular expression scanner provided by the
[http://weitz.de/cl-ppcre/](cl-ppcre) lib, which means that we can't parse
recursive grammar definition. One such definition is the ABNF definition as
given by the RFC. Fortunately, as you have this lib, you most probably don't
need to generate another parser to handle that particular ABNF grammar.
## Installation
The system has been made Quicklisp ready.
$ cd ~/quicklisp/local-projects/
$ git clone git://git.tapoueh.org/pgloader.git
* (ql:quickload "abnf")
Currently the ABNF system is maintained as part of the `pgloader` tool as a
central piece of its syslog message parser facility.
## Usage
The `parse-abnf-grammar` function expects the grammar to be parsed as a
string, and also needs the top level rule name of the grammar you're
interested into, as a symbol or a string. You can also give a list of rule
names that you want to capture, they will be capture in the order in which
they are needed to expand the given top-level rule.
The `parse-abnf-grammar` function returns a `cl-ppcre` scanner.
~~~ {#example.lisp .commonlisp .numberLines}
(defvar *timestamp-abnf*
" TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
DATE-FULLYEAR = 4DIGIT
DATE-MONTH = 2DIGIT ; 01-12
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year
FULL-TIME = PARTIAL-TIME TIME-OFFSET
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
[TIME-SECFRAC]
TIME-HOUR = 2DIGIT ; 00-23
TIME-MINUTE = 2DIGIT ; 00-59
TIME-SECOND = 2DIGIT ; 00-59
TIME-SECFRAC = \".\" 1*6DIGIT
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
NILVALUE = \"-\" "
"A timestamp ABNF grammar.")
(let ((scanner (abnf:parse-abnf-grammar *timestamp-abnf*
:timestamp
:registering-rules '(:full-date))))
(cl-ppcre:register-groups-bind (date)
(scanner "2013-09-08T00:02:03.123456Z+02:00")
date))
~~~
In the previous usage example the `let` block returns `"2013-09-08"`.
## ABNF grammar
This library supports the ABNF grammar as given in RFC 2234, with additional
support for plain regular expressions.
### Parsed grammar
Here's the RFC syntax:
rulelist = 1*( rule / (*c-wsp c-nl) )
rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space
rulename = ALPHA *(ALPHA / DIGIT / "-")
defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternatives
elements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newline
comment = ";" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)
concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-val / regex
; regex is an addition of this lib, see above
group = "(" *c-wsp alternation *c-wsp ")"
option = "[" *c-wsp alternation *c-wsp "]"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTE
num-val = "%" (bin-val / dec-val / hex-val)
bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range
dec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
### Core rules
Those parts of the grammar are always provided, they are the *defaults*
rules of the ABNF definition.
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
BIT = "0" / "1"
CHAR = %x01-7F
; any 7-bit US-ASCII character, excluding NUL
CR = %x0D
; carriage return
CRLF = CR LF
; Internet standard newline
CTL = %x00-1F / %x7F
; controls
DIGIT = %x30-39
; 0-9
DQUOTE = %x22
; " (Double Quote)
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
HTAB = %x09
; horizontal tab
LF = %x0A
; linefeed
LWSP = *(WSP / CRLF WSP)
; linear white space (past newline)
OCTET = %x00-FF
; 8 bits of data
SP = %x20
### Regex Support
We add support for plain regexp in the `element` rule. A regexp is expected
to follow the form:
regex = "~" delimiter expression delimiter
The *expression* shouldn't contain the *delimiter* of course, and the
allowed delimiters are `~//`, `~[]`, `~{}`, `~()`, `~<>`, `~""`, `~''`,
`~||` and `~##`. If you have to build a regexp with more than one of those
delimiters in it, you can just concatenate multiple parts together like in
this example:
complex-regex = ~/foo{bar}/ ~{baz/quux}
That will be used in exactly the same way as the following example:
complex-regex = ~<foo{bar}baz/quux>

View File

@ -1,14 +0,0 @@
;;;; abnf.asd
(asdf:defsystem #:abnf
:serial t
:description "ABNF Parser Generator, per RFC2234"
:author "Dimitri Fontaine <dim@tapoueh.org>"
:license "WTFPL"
:depends-on (#:esrap ; parser generator
#:cl-ppcre ; regular expression
)
:components ((:file "package")
(:file "abnf" :depends-on ("package"))))

View File

@ -1,586 +0,0 @@
;;;
;;; Augmented BNF for Syntax Specifications: ABNF
;;;
;;; Parsing ABNF syntaxes so that we can offer users to edit them
;;;
;;; see http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10
;;; and http://tools.ietf.org/html/rfc2234
;;;
(in-package #:abnf)
(defvar *abnf-rfc2234-abnf-definition*
" rulelist = 1*( rule / (*c-wsp c-nl) )
rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space
rulename = ALPHA *(ALPHA / DIGIT / \"-\")
defined-as = *c-wsp (\"=\" / \"=/\") *c-wsp
; basic rules definition and
; incremental alternatives
elements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newline
comment = \";\" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp \"/\" *c-wsp concatenation)
concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT \"*\" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-val
group = \"(\" *c-wsp alternation *c-wsp \")\"
option = \"[\" *c-wsp alternation *c-wsp \"]\"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR without DQUOTE
num-val = \"%\" (bin-val / dec-val / hex-val)
bin-val = \"b\" 1*BIT
[ 1*(\".\" 1*BIT) / (\"-\" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range
dec-val = \"d\" 1*DIGIT
[ 1*(\".\" 1*DIGIT) / (\"-\" 1*DIGIT) ]
hex-val = \"x\" 1*HEXDIG
[ 1*(\".\" 1*HEXDIG) / (\"-\" 1*HEXDIG) ]
prose-val = \"<\" *(%x20-3D / %x3F-7E) \">\"
; bracketed string of SP and VCHAR without angles
; prose description, to be used as last resort
"
"See http://tools.ietf.org/html/rfc2234#section-4")
(defvar *abnf-rfc-syslog-draft-15*
"SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG]
HEADER = VERSION SP FACILITY SP SEVERITY SP
TRUNCATE SP TIMESTAMP SP HOSTNAME
SP APP-NAME SP PROCID SP MSGID
VERSION = NONZERO-DIGIT 0*2DIGIT
FACILITY = \"0\" / (NONZERO-DIGIT 0*9DIGIT)
; range 0..2147483647 ;
SEVERITY = \"0\" / \"1\" / \"2\" / \"3\" / \"4\" / \"5\" /
\"6\" / \"7\"
TRUNCATE = 2DIGIT
HOSTNAME = 1*255PRINTUSASCII
APP-NAME = 1*48PRINTUSASCII
PROCID = \"-\" / 1*128PRINTUSASCII
MSGID = \"-\" / 1*32PRINTUSASCII
TIMESTAMP = FULL-DATE \"T\" FULL-TIME
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
DATE-FULLYEAR = 4DIGIT
DATE-MONTH = 2DIGIT ; 01-12
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year ;
FULL-TIME = PARTIAL-TIME TIME-OFFSET
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
[TIME-SECFRAC]
TIME-HOUR = 2DIGIT ; 00-23
TIME-MINUTE = 2DIGIT ; 00-59
TIME-SECOND = 2DIGIT ; 00-58, 00-59, 00-60 based on leap
; second rules ;
TIME-SECFRAC = \".\" 1*6DIGIT
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
STRUCTURED-DATA = 1*SD-ELEMENT / \"-\"
SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\"
SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34
SD-ID = SD-NAME
PARAM-NAME = SD-NAME
PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and
; ']' MUST be escaped. ;
SD-NAME = 1*32PRINTUSASCII
; except '=', SP, ']', %d34 (\") ;
MSG = UTF-8-STRING
UTF-8-STRING = *OCTET ; Any VALID UTF-8 String
; \"shortest form\" MUST be used ;
OCTET = %d00-255
SP = %d32
PRINTUSASCII = %d33-126
NONZERO-DIGIT = \"1\" / \"2\" / \"3\" / \"4\" / \"5\" /
\"6\" / \"7\" / \"8\" / \"9\"
DIGIT = \"0\" / NONZERO-DIGIT"
"See http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10")
(defvar *abnf-rsyslog*
(concatenate 'string
"RSYSLOG-MSG = \"<\" PRIVAL \">\" VERSION SP TIMESTAMP
SP HOSTNAME SP APP-NAME SP PROCID SP MSGID
SP [SD-ID SP] DATA
DATA = ~/.*/
PRIVAL = 1*3DIGIT ; range 0 .. 191"
'(#\Newline #\Newline)
*abnf-rfc-syslog-draft-15*)
"See http://www.rsyslog.com/doc/syslog_protocol.html")
(defvar *abnf-rfc5424-syslog-protocol*
" SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG]
HEADER = PRI VERSION SP TIMESTAMP SP HOSTNAME
SP APP-NAME SP PROCID SP MSGID
PRI = \"<\" PRIVAL \">\"
PRIVAL = 1*3DIGIT ; range 0 .. 191
VERSION = NONZERO-DIGIT 0*2DIGIT
HOSTNAME = NILVALUE / 1*255PRINTUSASCII
APP-NAME = NILVALUE / 1*48PRINTUSASCII
PROCID = NILVALUE / 1*128PRINTUSASCII
MSGID = NILVALUE / 1*32PRINTUSASCII
TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
DATE-FULLYEAR = 4DIGIT
DATE-MONTH = 2DIGIT ; 01-12
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year
FULL-TIME = PARTIAL-TIME TIME-OFFSET
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
[TIME-SECFRAC]
TIME-HOUR = 2DIGIT ; 00-23
TIME-MINUTE = 2DIGIT ; 00-59
TIME-SECOND = 2DIGIT ; 00-59
TIME-SECFRAC = \".\" 1*6DIGIT
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
STRUCTURED-DATA = NILVALUE / 1*SD-ELEMENT
SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\"
SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34
SD-ID = SD-NAME
PARAM-NAME = SD-NAME
PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and
; ']' MUST be escaped.
SD-NAME = 1*32PRINTUSASCII
; except '=', SP, ']', %d34 (\")
MSG = MSG-ANY / MSG-UTF8
MSG-ANY = *OCTET ; not starting with BOM
MSG-UTF8 = BOM UTF-8-STRING
BOM = %xEF.BB.BF
UTF-8-STRING = *OCTET ; UTF-8 string as specified
; in RFC 3629
OCTET = %d00-255
SP = %d32
PRINTUSASCII = %d33-126
NONZERO-DIGIT = %d49-57
DIGIT = %d48 / NONZERO-DIGIT
NILVALUE = \"-\""
"See http://tools.ietf.org/html/rfc5424#section-6")
#|
This table comes from http://tools.ietf.org/html/rfc2234#page-11 and 12.
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
BIT = "0" / "1"
CHAR = %x01-7F
CR = %x0D
CRLF = CR LF
CTL = %x00-1F / %x7F
DIGIT = %x30-39
DQUOTE = %x22
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
HTAB = %x09
LF = %x0A
LWSP = *(WSP / CRLF WSP)
OCTET = %x00-FF
SP = %x20
VCHAR = %x21-7E
WSP = SP / HTAB
|#
(defvar *abnf-default-rules*
`((:abnf-alpha (:char-class (:range #\A #\Z) (:range #\a #\z)))
(:abnf-bit (:char-class #\0 #\1))
(:abnf-char (:char-class (:range ,(code-char #x1) ,(code-char #x7f))))
(:abnf-cr #\Newline)
(:abnf-crlf (:sequence #\Newline #\Return))
(:abnf-ctl (:char-class (:range ,(code-char #x0) ,(code-char #x1f))
,(code-char #x7f)))
(:abnf-digit (:char-class (:range #\0 #\9)))
(:abnf-dquote #\")
(:abnf-hexdig (:char-class (:range #\0 #\9) (:range #\A #\F)))
(:abnf-htab #\Tab)
(:abnf-lf #\Newline)
(:abnf-lwsp (:regex "\s+"))
(:abnf-octet (:char-class (:range ,(code-char #x0) ,(code-char #xff))))
(:abnf-sp #\Space)
(:abnf-vchar (:char-class (:range ,(code-char #x21) ,(code-char #x7e))))
(:abnf-wsp (:char-class #\Space #\Tab)))
"An alist of the usual rules needed for ABNF grammars")
(defun rule-name-character-p (character)
(or (alphanumericp character)
(char= character #\-)))
(defun vcharp (character)
(<= #x21 (char-code character) #x7E))
(defrule vchar (+ (vcharp character)) (:text t))
(defrule wsp (or #\Space #\Tab) (:constant :wsp))
(defrule comment (and ";" (* (or wsp vchar)) #\Newline) (:constant :comment))
(defrule c-nl (or comment #\Newline) (:constant :c-nl))
(defrule c-wsp (or wsp c-nl) (:constant :c-wsp))
(defrule n-wsp (* c-wsp) (:constant :c-wsp))
(defun rule-name-symbol (rule-name &key find-symbol)
"Turn the string we read in the ABNF into internal symbol."
(let ((symbol-fun (if find-symbol #'find-symbol #'intern))
(symbol-name (string-upcase (format nil "abnf-~a" rule-name))))
(funcall symbol-fun symbol-name :keyword)))
(defrule rule-name (and (alpha-char-p character)
(+ (rule-name-character-p character)))
(:lambda (name)
(rule-name-symbol (text name))))
(defrule equal (and n-wsp #\= n-wsp) (:constant :equal))
(defrule end-of-rule n-wsp (:constant :eor))
(defrule digit (digit-char-p character)
(:lambda (digit)
(parse-integer (text digit))))
(defrule digits (+ (digit-char-p character))
(:lambda (digits)
(code-char (parse-integer (text digits)))))
(defun char-val-char-p (character)
(let ((code (char-code character)))
(or (<= #x20 code #x21)
(<= #x23 code #x7E))))
(defrule char-val (and #\" (* (char-val-char-p character)) #\")
(:lambda (char)
(destructuring-bind (open val close) char
(declare (ignore open close))
(text val))))
(defrule dec-string (and digits (+ (and "." digits)))
(:lambda (string)
(destructuring-bind (first rest) string
`(:sequence ,first ,@(mapcar #'cadr rest)))))
(defrule dec-range (and digits "-" digits)
(:lambda (range)
(destructuring-bind (min sep max) range
(declare (ignore sep))
`(:char-class (:range ,min ,max)))))
(defrule dec-val (and "d" (or dec-string dec-range digits))
(:lambda (dv)
(destructuring-bind (d val) dv
(declare (ignore d))
val)))
(defun hexadecimal-char-p (character)
(member character #. (quote (coerce "0123456789abcdefABCDEF" 'list))))
(defrule hexdigits (+ (hexadecimal-char-p character))
(:lambda (hx)
(code-char (parse-integer (text hx) :radix 16))))
(defrule hex-string (and hexdigits (+ (and "." hexdigits)))
(:lambda (string)
(destructuring-bind (first rest) string
`(:sequence ,first ,@(mapcar #'cadr rest)))))
(defrule hex-range (and hexdigits range-sep hexdigits)
(:lambda (range)
(destructuring-bind (min sep max) range
(declare (ignore sep))
`(:char-class (:range ,min ,max)))))
(defrule hex-val (and "x" (or hex-string hex-range hexdigits))
(:lambda (dv)
(destructuring-bind (d val) dv
(declare (ignore d))
val)))
(defrule num-val (and "%" (or dec-val hex-val))
(:lambda (nv)
(destructuring-bind (percent val) nv
(declare (ignore percent))
val)))
;;; allow to parse rule definitions without a separating blank line
(defrule rule-name-reference (and rule-name (! equal))
(:lambda (ref)
(destructuring-bind (rule-name nil) ref
rule-name)))
;;; what about allowing regular expressions directly?
(defun process-quoted-regex (pr)
"Helper function to process different kinds of quotes for regexps"
(destructuring-bind (open regex close) pr
(declare (ignore open close))
`(:regex ,(text regex))))
(defrule single-quoted-regex (and #\' (+ (not #\')) #\')
(:function process-quoted-regex))
(defrule double-quoted-regex (and #\" (+ (not #\")) #\")
(:function process-quoted-regex))
(defrule parens-quoted-regex (and #\( (+ (not #\))) #\))
(:function process-quoted-regex))
(defrule braces-quoted-regex (and #\{ (+ (not #\})) #\})
(:function process-quoted-regex))
(defrule chevron-quoted-regex (and #\< (+ (not #\>)) #\>)
(:function process-quoted-regex))
(defrule brackets-quoted-regex (and #\[ (+ (not #\])) #\])
(:function process-quoted-regex))
(defrule slash-quoted-regex (and #\/ (+ (not #\/)) #\/)
(:function process-quoted-regex))
(defrule pipe-quoted-regex (and #\| (+ (not #\|)) #\|)
(:function process-quoted-regex))
(defrule sharp-quoted-regex (and #\# (+ (not #\#)) #\#)
(:function process-quoted-regex))
(defrule quoted-regex (and "~" (or single-quoted-regex
double-quoted-regex
parens-quoted-regex
braces-quoted-regex
chevron-quoted-regex
brackets-quoted-regex
slash-quoted-regex
pipe-quoted-regex
sharp-quoted-regex))
(:lambda (qr)
(destructuring-bind (tilde regex) qr
(declare (ignore tilde))
regex)))
(defrule element (or rule-name-reference char-val num-val quoted-regex))
(defrule number (+ (digit-char-p character))
(:lambda (number)
(parse-integer (text number))))
(defrule repeat-var (and (? number) "*" (? number))
(:lambda (rv)
(destructuring-bind (min star max) rv
(declare (ignore star))
(cons (or min 0) max))))
(defrule repeat-specific number
(:lambda (number)
(cons number number)))
(defrule repeat (or repeat-var repeat-specific))
(defrule repetition (and (? repeat) toplevel-element)
(:lambda (repetition)
(destructuring-bind (repeat element) repetition
(if repeat
(destructuring-bind (min . max) repeat
`(:non-greedy-repetition ,min ,max ,element))
;; no repeat clause
element))))
(defrule concatenation-element (and n-wsp repetition)
(:lambda (ce)
(destructuring-bind (n-wsp rep) ce
(declare (ignore n-wsp))
rep)))
(defrule concatenation (and repetition (* concatenation-element))
(:lambda (concat)
(destructuring-bind (rep1 rlist) concat
(if rlist
`(:sequence ,@(list* rep1 rlist))
;; concatenation of a single element
rep1))))
(defrule alternation-element (and n-wsp "/" n-wsp concatenation)
(:lambda (ae)
(destructuring-bind (ws1 sl ws2 concatenation) ae
(declare (ignore ws1 sl ws2))
concatenation)))
(defrule alternation (and concatenation (* alternation-element))
(:lambda (alternation)
(destructuring-bind (c1 clist) alternation
(if clist
`(:alternation ,@(list* c1 clist))
c1))))
(defrule group (and "(" n-wsp alternation n-wsp ")")
(:lambda (group)
(destructuring-bind (open ws1 a ws2 close) group
(declare (ignore open close ws1 ws2))
;; we need the grouping when parsing the ABNF syntax, but once this
;; parsing is done there's no ambiguity possible left and we don't
;; need the grouping anymore in the resulting regular-expression parse
;; tree.
a)))
(defrule option (and "[" n-wsp alternation n-wsp "]")
(:lambda (option)
(destructuring-bind (open ws1 a ws2 close) option
(declare (ignore open close ws1 ws2))
`(:non-greedy-repetition 0 1 ,a))))
(defrule toplevel-element (or group option element))
(defrule alternations (and n-wsp alternation)
(:lambda (alts)
(destructuring-bind (n-wsp alt) alts
(declare (ignore n-wsp))
alt)))
(defrule elements (and (+ alternations) end-of-rule)
(:lambda (alist)
(destructuring-bind (alts eor) alist
(declare (ignore eor))
alts)))
(defrule rule (and n-wsp rule-name equal elements)
(:lambda (rule)
(destructuring-bind (n-wsp rule-name eq definition) rule
(declare (ignore n-wsp eq))
(cons rule-name definition))))
(defrule rule-list (+ rule))
;;;
;;; Now that we are able to transform ABNF rule set into an alist of
;;; cl-ppcre parse trees and references to other rules in the set, we need
;;; to expand each symbol's definition to get a real cl-ppcre scanner parse
;;; tree.
;;;
(defun expand-rule-definition (definition
rule-set
registering-rules
already-expanded-rules)
"Expand given rule DEFINITION within given RULE-SET"
(typecase definition
(list
;; walk the definition and expand its elements
(loop
for element in definition
collect (expand-rule-definition element
rule-set
registering-rules
already-expanded-rules)))
(symbol
(if (member definition '(:sequence
:alternation
:regex
:char-class
:range
:non-greedy-repetition))
;; that's a cl-ppcre scanner parse-tree symbol
;; only put in that list those cl-ppcre symbols we actually produce
definition
;; here we have to actually expand the symbol
(progn
;; first protect against infinite recursion
(when (member definition already-expanded-rules)
(error "Can not expand recursive rule: ~S." definition))
(destructuring-bind (rule-name rule-definition)
(or (assoc definition rule-set)
(assoc definition *abnf-default-rules*))
(let* ((already-expanded-rules
(cons definition already-expanded-rules))
(expanded-definition
(expand-rule-definition rule-definition
rule-set
registering-rules
already-expanded-rules)))
(if (member rule-name registering-rules)
`(:register ,expanded-definition)
expanded-definition))))))
;; all other types of data are "constants" in our parse-tree
(t definition)))
(defun expand-rule (rule-name rule-set &optional registering-rules)
"Given a rule, expand it completely removing references to other parsed
rules"
(let ((rule (rule-name-symbol rule-name :find-symbol t)))
(destructuring-bind (rule-name definition)
(assoc rule rule-set)
`(:sequence
:start-anchor
,(expand-rule-definition definition
rule-set
(loop
for rr in registering-rules
collect (rule-name-symbol rr :find-symbol t))
(list rule-name))))))
(defun parse-abnf-grammar (abnf-string top-level-rule
&key registering-rules junk-allowed)
"Parse STRING as an ABNF grammar as defined in RFC 2234. Returns a cl-ppcre
scanner that will only match strings conforming to given grammar.
See http://tools.ietf.org/html/rfc2234 for details about the ABNF specs.
Added to that grammar is support for regular expression, that are
expected in the ELEMENT production and spelled ~/regex/. The allowed
delimiters are: ~// ~[] ~{} ~() ~<> ~\"\" ~'' ~|| and ~##."
(let ((rule-set
(parse 'rule-list abnf-string :junk-allowed junk-allowed)))
(cl-ppcre:create-scanner
(expand-rule top-level-rule
;; in case of duplicates only keep the latest addition
(remove-duplicates rule-set :key #'car)
registering-rules))))
(defun test (&key (times 10000))
"This serves as a test and an example: if you're going to use the same
scanner more than one, be sure to compute it only once."
(let* ((cl-ppcre:*use-bmh-matchers* t)
(cl-ppcre:*optimize-char-classes* t)
(scanner
(parse-abnf-grammar *abnf-rfc-syslog-draft-15*
:timestamp
:registering-rules '(:full-date
:partial-time
:time-offset))))
(loop
repeat times
do (cl-ppcre:register-groups-bind
(date time zone)
(scanner "2013-09-08T00:02:03.123456Z+02:00")
(list date time zone)))))

View File

@ -1,38 +0,0 @@
;;;; abnf-example.lisp
(defpackage #:abnf-example
(:use #:cl #:abnf #:cl-ppcre))
(in-package #:abnf-example)
(defvar *timestamp-abnf*
" TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
DATE-FULLYEAR = 4DIGIT
DATE-MONTH = 2DIGIT ; 01-12
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year
FULL-TIME = PARTIAL-TIME TIME-OFFSET
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
[TIME-SECFRAC]
TIME-HOUR = 2DIGIT ; 00-23
TIME-MINUTE = 2DIGIT ; 00-59
TIME-SECOND = 2DIGIT ; 00-59
TIME-SECFRAC = \".\" 1*6DIGIT
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
NILVALUE = \"-\" "
"A timestamp ABNF grammar.")
(let ((scanner (parse-abnf-grammar *timestamp-abnf*
:timestamp
:registering-rules '(:full-date))))
(register-groups-bind (date)
(scanner "2013-09-08T00:02:03.123456Z+02:00")
date))
(let ((scanner (parse-abnf-grammar *timestamp-abnf*
:timestamp
:registering-rules '(:full-date))))
(scan-to-strings scanner "2013-09-08T00:02:03.123456Z+02:00"))

View File

@ -1,8 +0,0 @@
;;;; package.lisp
(defpackage #:abnf
(:use #:cl #:esrap)
(:export #:*abnf-rfc-syslog-draft-15*
#:*abnf-rsyslog*
#:*abnf-rfc5424-syslog-protocol*
#:parse-abnf-grammar))

View File

@ -1,77 +0,0 @@
# DB3
A lib to read dbf files version 3.
## Database file structure
The structure of a dBASE III database file is composed of a header
and data records. The layout is given below.
### dBASE III DATABASE FILE HEADER:
+---------+-------------------+---------------------------------+
| BYTE | CONTENTS | MEANING |
+---------+-------------------+---------------------------------+
| 0 | 1 byte | dBASE III version number |
| | | (03H without a .DBT file) |
| | | (83H with a .DBT file) |
+---------+-------------------+---------------------------------+
| 1-3 | 3 bytes | date of last update |
| | | (YY MM DD) in binary format |
+---------+-------------------+---------------------------------+
| 4-7 | 32 bit number | number of records in data file |
+---------+-------------------+---------------------------------+
| 8-9 | 16 bit number | length of header structure |
+---------+-------------------+---------------------------------+
| 10-11 | 16 bit number | length of the record |
+---------+-------------------+---------------------------------+
| 12-31 | 20 bytes | reserved bytes (version 1.00) |
+---------+-------------------+---------------------------------+
| 32-n | 32 bytes each | field descriptor array |
| | | (see below) | --+
+---------+-------------------+---------------------------------+ |
| n+1 | 1 byte | 0DH as the field terminator | |
+---------+-------------------+---------------------------------+ |
|
|
A FIELD DESCRIPTOR: <------------------------------------------+
+---------+-------------------+---------------------------------+
| BYTE | CONTENTS | MEANING |
+---------+-------------------+---------------------------------+
| 0-10 | 11 bytes | field name in ASCII zero-filled |
+---------+-------------------+---------------------------------+
| 11 | 1 byte | field type in ASCII |
| | | (C N L D or M) |
+---------+-------------------+---------------------------------+
| 12-15 | 32 bit number | field data address |
| | | (address is set in memory) |
+---------+-------------------+---------------------------------+
| 16 | 1 byte | field length in binary |
+---------+-------------------+---------------------------------+
| 17 | 1 byte | field decimal count in binary |
+---------+-------------------+---------------------------------+
| 18-31 | 14 bytes | reserved bytes (version 1.00) |
+---------+-------------------+---------------------------------+
### The data records are layed out as follows:
1. Data records are preceeded by one byte that is a space (20H) if the
record is not deleted and an asterisk (2AH) if it is deleted.
2. Data fields are packed into records with no field separators or record
terminators.
3. Data types are stored in ASCII format as follows:
DATA TYPE DATA RECORD STORAGE
--------- --------------------------------------------
Character (ASCII characters)
Numeric - . 0 1 2 3 4 5 6 7 8 9
Logical ? Y y N n T t F f (? when not initialized)
Memo (10 digits representing a .DBT block number)
Date (8 digits in YYYYMMDD format, such as
19840704 for July 4, 1984)
-----------------------------------------------------------

View File

@ -1,12 +0,0 @@
;;;; abnf.asd
(asdf:defsystem #:db3
:serial t
:description "DB3 file reader"
:author "Xach"
:license "WTFPL"
:depends-on ()
:components ((:file "package")
(:file "db3" :depends-on ("package"))))

View File

@ -1,218 +0,0 @@
;;;; http://xach.com/lisp/db3.lisp
;;
;; db3.lisp
#|
Database file structure
The structure of a dBASE III database file is composed of a header
and data records. The layout is given below.
dBASE III DATABASE FILE HEADER:
+---------+-------------------+---------------------------------+
| BYTE | CONTENTS | MEANING |
+---------+-------------------+---------------------------------+
| 0 | 1 byte | dBASE III version number |
| | | (03H without a .DBT file) |
| | | (83H with a .DBT file) |
+---------+-------------------+---------------------------------+
| 1-3 | 3 bytes | date of last update |
| | | (YY MM DD) in binary format |
+---------+-------------------+---------------------------------+
| 4-7 | 32 bit number | number of records in data file |
+---------+-------------------+---------------------------------+
| 8-9 | 16 bit number | length of header structure |
+---------+-------------------+---------------------------------+
| 10-11 | 16 bit number | length of the record |
+---------+-------------------+---------------------------------+
| 12-31 | 20 bytes | reserved bytes (version 1.00) |
+---------+-------------------+---------------------------------+
| 32-n | 32 bytes each | field descriptor array |
| | | (see below) | --+
+---------+-------------------+---------------------------------+ |
| n+1 | 1 byte | 0DH as the field terminator | |
+---------+-------------------+---------------------------------+ |
|
|
A FIELD DESCRIPTOR: <------------------------------------------+
+---------+-------------------+---------------------------------+
| BYTE | CONTENTS | MEANING |
+---------+-------------------+---------------------------------+
| 0-10 | 11 bytes | field name in ASCII zero-filled |
+---------+-------------------+---------------------------------+
| 11 | 1 byte | field type in ASCII |
| | | (C N L D or M) |
+---------+-------------------+---------------------------------+
| 12-15 | 32 bit number | field data address |
| | | (address is set in memory) |
+---------+-------------------+---------------------------------+
| 16 | 1 byte | field length in binary |
+---------+-------------------+---------------------------------+
| 17 | 1 byte | field decimal count in binary |
+---------+-------------------+---------------------------------+
| 18-31 | 14 bytes | reserved bytes (version 1.00) |
+---------+-------------------+---------------------------------+
The data records are layed out as follows:
1. Data records are preceeded by one byte that is a space (20H) if the
record is not deleted and an asterisk (2AH) if it is deleted.
2. Data fields are packed into records with no field separators or
record terminators.
3. Data types are stored in ASCII format as follows:
DATA TYPE DATA RECORD STORAGE
--------- --------------------------------------------
Character (ASCII characters)
Numeric - . 0 1 2 3 4 5 6 7 8 9
Logical ? Y y N n T t F f (? when not initialized)
Memo (10 digits representing a .DBT block number)
Date (8 digits in YYYYMMDD format, such as
19840704 for July 4, 1984)
|#
(in-package :db3)
;;; reading binary stuff
(defun read-uint32 (stream)
(loop repeat 4
for offset from 0 by 8
for value = (read-byte stream)
then (logior (ash (read-byte stream) offset) value)
finally (return value)))
(defun read-uint16 (stream)
(loop repeat 2
for offset from 0 by 8
for value = (read-byte stream)
then (logior (ash (read-byte stream) offset) value)
finally (return value)))
;;; objects
(defclass db3 ()
((version-number :accessor version-number)
(last-update :accessor last-update)
(record-count :accessor record-count)
(header-length :accessor header-length)
(record-length :accessor record-length)
(fields :accessor fields)))
(defclass db3-field ()
((name :accessor field-name)
(type :accessor field-type)
(data-address :accessor data-address)
(field-length :accessor field-length)
(field-count :accessor field-count)))
(defun asciiz->string (array)
(let* ((string-length (or (position 0 array)
(length array)))
(string (make-string string-length)))
(loop for i below string-length
do (setf (schar string i) (code-char (aref array i))))
string))
(defun ascii->string (array)
(map 'string #'code-char array))
(defun load-field-descriptor (stream)
(let ((field (make-instance 'db3-field))
(name (make-array 11 :element-type '(unsigned-byte 8))))
(read-sequence name stream)
(setf (field-name field) (asciiz->string name)
(field-type field) (code-char (read-byte stream))
(data-address field) (read-uint32 stream)
(field-length field) (read-byte stream)
(field-count field) (read-byte stream))
(loop repeat 14 do (read-byte stream))
field))
(defmethod field-count ((db3 db3))
(1- (/ (1- (header-length db3)) 32)))
(defmethod load-header ((db3 db3) stream)
(let ((version (read-byte stream)))
(unless (= version #x03)
(error "Can't handle this file"))
(let ((year (read-byte stream))
(month (read-byte stream))
(day (read-byte stream)))
(setf (version-number db3) version
(last-update db3) (list year month day)
(record-count db3) (read-uint32 stream)
(header-length db3) (read-uint16 stream)
(record-length db3) (read-uint16 stream))
(file-position stream 32)
(setf (fields db3) (loop repeat (field-count db3)
collect (load-field-descriptor stream)))
(assert (= (read-byte stream) #x0D))
db3)))
(defmethod convert-field (type data)
(ascii->string data))
(defmethod convert-field ((type (eql #\C)) data)
(ascii->string data))
(defmethod load-field (type length stream)
(let ((field (make-array length)))
(read-sequence field stream)
(convert-field type field)))
(defmethod load-record ((db3 db3) stream)
(read-byte stream)
(loop with record = (make-array (field-count db3))
for i below (field-count db3)
for field in (fields db3)
do (setf (svref record i)
(load-field (field-type field) (field-length field) stream))
finally (return record)))
(defun write-record (record stream)
(loop for field across record
do
(write-char #\" stream)
(write-string field stream)
(write-string "\"," stream))
(terpri stream))
(defun dump-db3 (input output)
(with-open-file (stream input :direction :input
:element-type '(unsigned-byte 8))
(with-open-file (ostream output :direction :output
:element-type 'character)
(let ((db3 (make-instance 'db3)))
(load-header db3 stream)
(loop repeat (record-count db3)
do (write-record (load-record db3 stream) ostream))
db3))))
(defun sample-db3 (input ostream &key (sample-size 10))
(with-open-file (stream input :direction :input
:element-type '(unsigned-byte 8))
(let ((db3 (make-instance 'db3)))
(load-header db3 stream)
(loop
:repeat sample-size
:do (format ostream "~s~%" (load-record db3 stream)))
db3)))

View File

@ -1,11 +0,0 @@
;;;; package.lisp
(defpackage #:db3
(:use #:cl)
(:export #:db3 ; the main class
#:load-header
#:record-count
#:load-record
#:write-record
#:dump-db3
#:sample-db3))