mirror of
https://github.com/dimitri/pgloader.git
synced 2026-02-06 23:11:01 +01:00
Remove "internal" libs that are now published separately at github.
This commit is contained in:
parent
02914741c1
commit
0fd3e1877f
13
Makefile
13
Makefile
@ -14,16 +14,23 @@ docs:
|
||||
pandoc pgloader.1.md -o pgloader.html
|
||||
pandoc pgloader.1.md -o pgloader.pdf
|
||||
|
||||
~/quicklisp/local-projects/cl-abnf:
|
||||
git clone https://github.com/dimitri/cl-abnf.git $@
|
||||
|
||||
~/quicklisp/local-projects/cl-db3:
|
||||
git clone https://github.com/dimitri/cl-db3.git $@
|
||||
|
||||
~/quicklisp/local-projects/Postmodern:
|
||||
git clone https://github.com/marijnh/Postmodern.git $@
|
||||
cd ~/quicklisp/local-projects/Postmodern/ && patch -p1 < $(POMO_PATCH)
|
||||
|
||||
postmodern: ~/quicklisp/local-projects/Postmodern ;
|
||||
|
||||
~/quicklisp/local-projects/cl-csv:
|
||||
git clone -b empty-strings-and-nil https://github.com/dimitri/cl-csv.git $@
|
||||
|
||||
cl-abnf: ~/quicklisp/local-projects/cl-abnf ;
|
||||
cl-db3: ~/quicklisp/local-projects/cl-db3 ;
|
||||
cl-csv: ~/quicklisp/local-projects/cl-csv ;
|
||||
postmodern: ~/quicklisp/local-projects/Postmodern ;
|
||||
|
||||
~/quicklisp/setup.lisp:
|
||||
curl -o ~/quicklisp.lisp http://beta.quicklisp.org/quicklisp.lisp
|
||||
@ -39,7 +46,7 @@ $(ASDF_CONF):
|
||||
|
||||
asdf-config: $(ASDF_CONF) ;
|
||||
|
||||
$(LIBS): quicklisp $(ASDF_CONF) postmodern cl-csv
|
||||
$(LIBS): quicklisp $(ASDF_CONF) cl-abnf cl-db3 postmodern cl-csv
|
||||
sbcl --load ~/quicklisp/setup.lisp \
|
||||
--eval '(ql:quickload "pgloader")' \
|
||||
--eval '(quit)'
|
||||
|
||||
@ -1,197 +0,0 @@
|
||||
# ABNF DEFINITION OF ABNF
|
||||
|
||||
This Common Lisp librairie implements a parser generator for the ABNF
|
||||
grammar format as described in [http://tools.ietf.org/html/rfc2234](RFC
|
||||
2234).
|
||||
|
||||
The generated parser is a regular expression scanner provided by the
|
||||
[http://weitz.de/cl-ppcre/](cl-ppcre) lib, which means that we can't parse
|
||||
recursive grammar definition. One such definition is the ABNF definition as
|
||||
given by the RFC. Fortunately, as you have this lib, you most probably don't
|
||||
need to generate another parser to handle that particular ABNF grammar.
|
||||
|
||||
## Installation
|
||||
|
||||
The system has been made Quicklisp ready.
|
||||
|
||||
$ cd ~/quicklisp/local-projects/
|
||||
$ git clone git://git.tapoueh.org/pgloader.git
|
||||
* (ql:quickload "abnf")
|
||||
|
||||
Currently the ABNF system is maintained as part of the `pgloader` tool as a
|
||||
central piece of its syslog message parser facility.
|
||||
|
||||
## Usage
|
||||
|
||||
The `parse-abnf-grammar` function expects the grammar to be parsed as a
|
||||
string, and also needs the top level rule name of the grammar you're
|
||||
interested into, as a symbol or a string. You can also give a list of rule
|
||||
names that you want to capture, they will be capture in the order in which
|
||||
they are needed to expand the given top-level rule.
|
||||
|
||||
The `parse-abnf-grammar` function returns a `cl-ppcre` scanner.
|
||||
|
||||
~~~ {#example.lisp .commonlisp .numberLines}
|
||||
(defvar *timestamp-abnf*
|
||||
" TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
|
||||
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
|
||||
DATE-FULLYEAR = 4DIGIT
|
||||
DATE-MONTH = 2DIGIT ; 01-12
|
||||
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
|
||||
; month/year
|
||||
FULL-TIME = PARTIAL-TIME TIME-OFFSET
|
||||
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
|
||||
[TIME-SECFRAC]
|
||||
TIME-HOUR = 2DIGIT ; 00-23
|
||||
TIME-MINUTE = 2DIGIT ; 00-59
|
||||
TIME-SECOND = 2DIGIT ; 00-59
|
||||
TIME-SECFRAC = \".\" 1*6DIGIT
|
||||
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
|
||||
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
|
||||
|
||||
NILVALUE = \"-\" "
|
||||
"A timestamp ABNF grammar.")
|
||||
|
||||
(let ((scanner (abnf:parse-abnf-grammar *timestamp-abnf*
|
||||
:timestamp
|
||||
:registering-rules '(:full-date))))
|
||||
(cl-ppcre:register-groups-bind (date)
|
||||
(scanner "2013-09-08T00:02:03.123456Z+02:00")
|
||||
date))
|
||||
~~~
|
||||
|
||||
In the previous usage example the `let` block returns `"2013-09-08"`.
|
||||
|
||||
## ABNF grammar
|
||||
|
||||
This library supports the ABNF grammar as given in RFC 2234, with additional
|
||||
support for plain regular expressions.
|
||||
|
||||
### Parsed grammar
|
||||
|
||||
Here's the RFC syntax:
|
||||
|
||||
rulelist = 1*( rule / (*c-wsp c-nl) )
|
||||
|
||||
rule = rulename defined-as elements c-nl
|
||||
; continues if next line starts
|
||||
; with white space
|
||||
|
||||
rulename = ALPHA *(ALPHA / DIGIT / "-")
|
||||
|
||||
defined-as = *c-wsp ("=" / "=/") *c-wsp
|
||||
; basic rules definition and
|
||||
; incremental alternatives
|
||||
|
||||
elements = alternation *c-wsp
|
||||
|
||||
c-wsp = WSP / (c-nl WSP)
|
||||
|
||||
c-nl = comment / CRLF
|
||||
; comment or newline
|
||||
|
||||
comment = ";" *(WSP / VCHAR) CRLF
|
||||
|
||||
alternation = concatenation
|
||||
*(*c-wsp "/" *c-wsp concatenation)
|
||||
|
||||
concatenation = repetition *(1*c-wsp repetition)
|
||||
|
||||
repetition = [repeat] element
|
||||
|
||||
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
|
||||
|
||||
element = rulename / group / option /
|
||||
char-val / num-val / prose-val / regex
|
||||
; regex is an addition of this lib, see above
|
||||
|
||||
group = "(" *c-wsp alternation *c-wsp ")"
|
||||
|
||||
option = "[" *c-wsp alternation *c-wsp "]"
|
||||
|
||||
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
|
||||
; quoted string of SP and VCHAR
|
||||
; without DQUOTE
|
||||
|
||||
num-val = "%" (bin-val / dec-val / hex-val)
|
||||
|
||||
bin-val = "b" 1*BIT
|
||||
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
|
||||
; series of concatenated bit values
|
||||
; or single ONEOF range
|
||||
|
||||
dec-val = "d" 1*DIGIT
|
||||
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
|
||||
|
||||
hex-val = "x" 1*HEXDIG
|
||||
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
|
||||
|
||||
prose-val = "<" *(%x20-3D / %x3F-7E) ">"
|
||||
; bracketed string of SP and VCHAR
|
||||
; without angles
|
||||
; prose description, to be used as
|
||||
; last resort
|
||||
|
||||
### Core rules
|
||||
|
||||
Those parts of the grammar are always provided, they are the *defaults*
|
||||
rules of the ABNF definition.
|
||||
|
||||
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
|
||||
BIT = "0" / "1"
|
||||
|
||||
CHAR = %x01-7F
|
||||
; any 7-bit US-ASCII character, excluding NUL
|
||||
|
||||
CR = %x0D
|
||||
; carriage return
|
||||
|
||||
CRLF = CR LF
|
||||
; Internet standard newline
|
||||
|
||||
CTL = %x00-1F / %x7F
|
||||
; controls
|
||||
|
||||
DIGIT = %x30-39
|
||||
; 0-9
|
||||
|
||||
DQUOTE = %x22
|
||||
; " (Double Quote)
|
||||
|
||||
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||
|
||||
HTAB = %x09
|
||||
; horizontal tab
|
||||
|
||||
LF = %x0A
|
||||
; linefeed
|
||||
|
||||
LWSP = *(WSP / CRLF WSP)
|
||||
; linear white space (past newline)
|
||||
|
||||
OCTET = %x00-FF
|
||||
; 8 bits of data
|
||||
|
||||
SP = %x20
|
||||
|
||||
### Regex Support
|
||||
|
||||
We add support for plain regexp in the `element` rule. A regexp is expected
|
||||
to follow the form:
|
||||
|
||||
regex = "~" delimiter expression delimiter
|
||||
|
||||
The *expression* shouldn't contain the *delimiter* of course, and the
|
||||
allowed delimiters are `~//`, `~[]`, `~{}`, `~()`, `~<>`, `~""`, `~''`,
|
||||
`~||` and `~##`. If you have to build a regexp with more than one of those
|
||||
delimiters in it, you can just concatenate multiple parts together like in
|
||||
this example:
|
||||
|
||||
complex-regex = ~/foo{bar}/ ~{baz/quux}
|
||||
|
||||
That will be used in exactly the same way as the following example:
|
||||
|
||||
complex-regex = ~<foo{bar}baz/quux>
|
||||
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
;;;; abnf.asd
|
||||
|
||||
(asdf:defsystem #:abnf
|
||||
:serial t
|
||||
:description "ABNF Parser Generator, per RFC2234"
|
||||
:author "Dimitri Fontaine <dim@tapoueh.org>"
|
||||
:license "WTFPL"
|
||||
:depends-on (#:esrap ; parser generator
|
||||
#:cl-ppcre ; regular expression
|
||||
)
|
||||
:components ((:file "package")
|
||||
(:file "abnf" :depends-on ("package"))))
|
||||
|
||||
|
||||
@ -1,586 +0,0 @@
|
||||
;;;
|
||||
;;; Augmented BNF for Syntax Specifications: ABNF
|
||||
;;;
|
||||
;;; Parsing ABNF syntaxes so that we can offer users to edit them
|
||||
;;;
|
||||
;;; see http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10
|
||||
;;; and http://tools.ietf.org/html/rfc2234
|
||||
;;;
|
||||
(in-package #:abnf)
|
||||
|
||||
(defvar *abnf-rfc2234-abnf-definition*
|
||||
" rulelist = 1*( rule / (*c-wsp c-nl) )
|
||||
|
||||
rule = rulename defined-as elements c-nl
|
||||
; continues if next line starts
|
||||
; with white space
|
||||
|
||||
rulename = ALPHA *(ALPHA / DIGIT / \"-\")
|
||||
|
||||
defined-as = *c-wsp (\"=\" / \"=/\") *c-wsp
|
||||
; basic rules definition and
|
||||
; incremental alternatives
|
||||
|
||||
elements = alternation *c-wsp
|
||||
|
||||
c-wsp = WSP / (c-nl WSP)
|
||||
|
||||
c-nl = comment / CRLF
|
||||
; comment or newline
|
||||
|
||||
comment = \";\" *(WSP / VCHAR) CRLF
|
||||
|
||||
alternation = concatenation
|
||||
*(*c-wsp \"/\" *c-wsp concatenation)
|
||||
|
||||
concatenation = repetition *(1*c-wsp repetition)
|
||||
|
||||
repetition = [repeat] element
|
||||
|
||||
repeat = 1*DIGIT / (*DIGIT \"*\" *DIGIT)
|
||||
|
||||
element = rulename / group / option /
|
||||
char-val / num-val / prose-val
|
||||
|
||||
group = \"(\" *c-wsp alternation *c-wsp \")\"
|
||||
|
||||
option = \"[\" *c-wsp alternation *c-wsp \"]\"
|
||||
|
||||
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
|
||||
; quoted string of SP and VCHAR without DQUOTE
|
||||
|
||||
num-val = \"%\" (bin-val / dec-val / hex-val)
|
||||
|
||||
bin-val = \"b\" 1*BIT
|
||||
[ 1*(\".\" 1*BIT) / (\"-\" 1*BIT) ]
|
||||
; series of concatenated bit values
|
||||
; or single ONEOF range
|
||||
|
||||
dec-val = \"d\" 1*DIGIT
|
||||
[ 1*(\".\" 1*DIGIT) / (\"-\" 1*DIGIT) ]
|
||||
|
||||
hex-val = \"x\" 1*HEXDIG
|
||||
[ 1*(\".\" 1*HEXDIG) / (\"-\" 1*HEXDIG) ]
|
||||
|
||||
prose-val = \"<\" *(%x20-3D / %x3F-7E) \">\"
|
||||
; bracketed string of SP and VCHAR without angles
|
||||
; prose description, to be used as last resort
|
||||
"
|
||||
"See http://tools.ietf.org/html/rfc2234#section-4")
|
||||
|
||||
(defvar *abnf-rfc-syslog-draft-15*
|
||||
"SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG]
|
||||
|
||||
HEADER = VERSION SP FACILITY SP SEVERITY SP
|
||||
TRUNCATE SP TIMESTAMP SP HOSTNAME
|
||||
SP APP-NAME SP PROCID SP MSGID
|
||||
VERSION = NONZERO-DIGIT 0*2DIGIT
|
||||
FACILITY = \"0\" / (NONZERO-DIGIT 0*9DIGIT)
|
||||
; range 0..2147483647 ;
|
||||
SEVERITY = \"0\" / \"1\" / \"2\" / \"3\" / \"4\" / \"5\" /
|
||||
\"6\" / \"7\"
|
||||
TRUNCATE = 2DIGIT
|
||||
HOSTNAME = 1*255PRINTUSASCII
|
||||
|
||||
APP-NAME = 1*48PRINTUSASCII
|
||||
PROCID = \"-\" / 1*128PRINTUSASCII
|
||||
MSGID = \"-\" / 1*32PRINTUSASCII
|
||||
|
||||
TIMESTAMP = FULL-DATE \"T\" FULL-TIME
|
||||
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
|
||||
DATE-FULLYEAR = 4DIGIT
|
||||
DATE-MONTH = 2DIGIT ; 01-12
|
||||
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
|
||||
; month/year ;
|
||||
FULL-TIME = PARTIAL-TIME TIME-OFFSET
|
||||
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
|
||||
[TIME-SECFRAC]
|
||||
TIME-HOUR = 2DIGIT ; 00-23
|
||||
TIME-MINUTE = 2DIGIT ; 00-59
|
||||
TIME-SECOND = 2DIGIT ; 00-58, 00-59, 00-60 based on leap
|
||||
; second rules ;
|
||||
TIME-SECFRAC = \".\" 1*6DIGIT
|
||||
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
|
||||
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
|
||||
|
||||
|
||||
STRUCTURED-DATA = 1*SD-ELEMENT / \"-\"
|
||||
SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\"
|
||||
SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34
|
||||
SD-ID = SD-NAME
|
||||
PARAM-NAME = SD-NAME
|
||||
PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and
|
||||
; ']' MUST be escaped. ;
|
||||
SD-NAME = 1*32PRINTUSASCII
|
||||
; except '=', SP, ']', %d34 (\") ;
|
||||
|
||||
MSG = UTF-8-STRING
|
||||
UTF-8-STRING = *OCTET ; Any VALID UTF-8 String
|
||||
; \"shortest form\" MUST be used ;
|
||||
|
||||
OCTET = %d00-255
|
||||
SP = %d32
|
||||
PRINTUSASCII = %d33-126
|
||||
NONZERO-DIGIT = \"1\" / \"2\" / \"3\" / \"4\" / \"5\" /
|
||||
\"6\" / \"7\" / \"8\" / \"9\"
|
||||
DIGIT = \"0\" / NONZERO-DIGIT"
|
||||
"See http://tools.ietf.org/html/draft-ietf-syslog-protocol-15#page-10")
|
||||
|
||||
(defvar *abnf-rsyslog*
|
||||
(concatenate 'string
|
||||
"RSYSLOG-MSG = \"<\" PRIVAL \">\" VERSION SP TIMESTAMP
|
||||
SP HOSTNAME SP APP-NAME SP PROCID SP MSGID
|
||||
SP [SD-ID SP] DATA
|
||||
|
||||
DATA = ~/.*/
|
||||
|
||||
PRIVAL = 1*3DIGIT ; range 0 .. 191"
|
||||
'(#\Newline #\Newline)
|
||||
*abnf-rfc-syslog-draft-15*)
|
||||
"See http://www.rsyslog.com/doc/syslog_protocol.html")
|
||||
|
||||
(defvar *abnf-rfc5424-syslog-protocol*
|
||||
" SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG]
|
||||
|
||||
HEADER = PRI VERSION SP TIMESTAMP SP HOSTNAME
|
||||
SP APP-NAME SP PROCID SP MSGID
|
||||
PRI = \"<\" PRIVAL \">\"
|
||||
PRIVAL = 1*3DIGIT ; range 0 .. 191
|
||||
VERSION = NONZERO-DIGIT 0*2DIGIT
|
||||
HOSTNAME = NILVALUE / 1*255PRINTUSASCII
|
||||
|
||||
APP-NAME = NILVALUE / 1*48PRINTUSASCII
|
||||
PROCID = NILVALUE / 1*128PRINTUSASCII
|
||||
MSGID = NILVALUE / 1*32PRINTUSASCII
|
||||
|
||||
TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
|
||||
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
|
||||
DATE-FULLYEAR = 4DIGIT
|
||||
DATE-MONTH = 2DIGIT ; 01-12
|
||||
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
|
||||
; month/year
|
||||
FULL-TIME = PARTIAL-TIME TIME-OFFSET
|
||||
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
|
||||
[TIME-SECFRAC]
|
||||
TIME-HOUR = 2DIGIT ; 00-23
|
||||
TIME-MINUTE = 2DIGIT ; 00-59
|
||||
TIME-SECOND = 2DIGIT ; 00-59
|
||||
TIME-SECFRAC = \".\" 1*6DIGIT
|
||||
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
|
||||
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
|
||||
|
||||
|
||||
STRUCTURED-DATA = NILVALUE / 1*SD-ELEMENT
|
||||
SD-ELEMENT = \"[\" SD-ID *(SP SD-PARAM) \"]\"
|
||||
SD-PARAM = PARAM-NAME \"=\" %d34 PARAM-VALUE %d34
|
||||
SD-ID = SD-NAME
|
||||
PARAM-NAME = SD-NAME
|
||||
PARAM-VALUE = UTF-8-STRING ; characters '\"', '\' and
|
||||
; ']' MUST be escaped.
|
||||
SD-NAME = 1*32PRINTUSASCII
|
||||
; except '=', SP, ']', %d34 (\")
|
||||
|
||||
MSG = MSG-ANY / MSG-UTF8
|
||||
MSG-ANY = *OCTET ; not starting with BOM
|
||||
MSG-UTF8 = BOM UTF-8-STRING
|
||||
BOM = %xEF.BB.BF
|
||||
|
||||
UTF-8-STRING = *OCTET ; UTF-8 string as specified
|
||||
; in RFC 3629
|
||||
|
||||
OCTET = %d00-255
|
||||
SP = %d32
|
||||
PRINTUSASCII = %d33-126
|
||||
NONZERO-DIGIT = %d49-57
|
||||
DIGIT = %d48 / NONZERO-DIGIT
|
||||
NILVALUE = \"-\""
|
||||
"See http://tools.ietf.org/html/rfc5424#section-6")
|
||||
|
||||
#|
|
||||
|
||||
This table comes from http://tools.ietf.org/html/rfc2234#page-11 and 12.
|
||||
|
||||
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
BIT = "0" / "1"
|
||||
CHAR = %x01-7F
|
||||
CR = %x0D
|
||||
CRLF = CR LF
|
||||
CTL = %x00-1F / %x7F
|
||||
DIGIT = %x30-39
|
||||
DQUOTE = %x22
|
||||
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||
HTAB = %x09
|
||||
LF = %x0A
|
||||
LWSP = *(WSP / CRLF WSP)
|
||||
OCTET = %x00-FF
|
||||
SP = %x20
|
||||
VCHAR = %x21-7E
|
||||
WSP = SP / HTAB
|
||||
|#
|
||||
|
||||
(defvar *abnf-default-rules*
|
||||
`((:abnf-alpha (:char-class (:range #\A #\Z) (:range #\a #\z)))
|
||||
(:abnf-bit (:char-class #\0 #\1))
|
||||
(:abnf-char (:char-class (:range ,(code-char #x1) ,(code-char #x7f))))
|
||||
(:abnf-cr #\Newline)
|
||||
(:abnf-crlf (:sequence #\Newline #\Return))
|
||||
(:abnf-ctl (:char-class (:range ,(code-char #x0) ,(code-char #x1f))
|
||||
,(code-char #x7f)))
|
||||
(:abnf-digit (:char-class (:range #\0 #\9)))
|
||||
(:abnf-dquote #\")
|
||||
(:abnf-hexdig (:char-class (:range #\0 #\9) (:range #\A #\F)))
|
||||
(:abnf-htab #\Tab)
|
||||
(:abnf-lf #\Newline)
|
||||
(:abnf-lwsp (:regex "\s+"))
|
||||
(:abnf-octet (:char-class (:range ,(code-char #x0) ,(code-char #xff))))
|
||||
(:abnf-sp #\Space)
|
||||
(:abnf-vchar (:char-class (:range ,(code-char #x21) ,(code-char #x7e))))
|
||||
(:abnf-wsp (:char-class #\Space #\Tab)))
|
||||
"An alist of the usual rules needed for ABNF grammars")
|
||||
|
||||
(defun rule-name-character-p (character)
|
||||
(or (alphanumericp character)
|
||||
(char= character #\-)))
|
||||
|
||||
(defun vcharp (character)
|
||||
(<= #x21 (char-code character) #x7E))
|
||||
|
||||
(defrule vchar (+ (vcharp character)) (:text t))
|
||||
(defrule wsp (or #\Space #\Tab) (:constant :wsp))
|
||||
|
||||
(defrule comment (and ";" (* (or wsp vchar)) #\Newline) (:constant :comment))
|
||||
(defrule c-nl (or comment #\Newline) (:constant :c-nl))
|
||||
(defrule c-wsp (or wsp c-nl) (:constant :c-wsp))
|
||||
(defrule n-wsp (* c-wsp) (:constant :c-wsp))
|
||||
|
||||
(defun rule-name-symbol (rule-name &key find-symbol)
|
||||
"Turn the string we read in the ABNF into internal symbol."
|
||||
(let ((symbol-fun (if find-symbol #'find-symbol #'intern))
|
||||
(symbol-name (string-upcase (format nil "abnf-~a" rule-name))))
|
||||
(funcall symbol-fun symbol-name :keyword)))
|
||||
|
||||
(defrule rule-name (and (alpha-char-p character)
|
||||
(+ (rule-name-character-p character)))
|
||||
(:lambda (name)
|
||||
(rule-name-symbol (text name))))
|
||||
|
||||
(defrule equal (and n-wsp #\= n-wsp) (:constant :equal))
|
||||
(defrule end-of-rule n-wsp (:constant :eor))
|
||||
|
||||
(defrule digit (digit-char-p character)
|
||||
(:lambda (digit)
|
||||
(parse-integer (text digit))))
|
||||
|
||||
(defrule digits (+ (digit-char-p character))
|
||||
(:lambda (digits)
|
||||
(code-char (parse-integer (text digits)))))
|
||||
|
||||
(defun char-val-char-p (character)
|
||||
(let ((code (char-code character)))
|
||||
(or (<= #x20 code #x21)
|
||||
(<= #x23 code #x7E))))
|
||||
|
||||
(defrule char-val (and #\" (* (char-val-char-p character)) #\")
|
||||
(:lambda (char)
|
||||
(destructuring-bind (open val close) char
|
||||
(declare (ignore open close))
|
||||
(text val))))
|
||||
|
||||
(defrule dec-string (and digits (+ (and "." digits)))
|
||||
(:lambda (string)
|
||||
(destructuring-bind (first rest) string
|
||||
`(:sequence ,first ,@(mapcar #'cadr rest)))))
|
||||
|
||||
(defrule dec-range (and digits "-" digits)
|
||||
(:lambda (range)
|
||||
(destructuring-bind (min sep max) range
|
||||
(declare (ignore sep))
|
||||
`(:char-class (:range ,min ,max)))))
|
||||
|
||||
(defrule dec-val (and "d" (or dec-string dec-range digits))
|
||||
(:lambda (dv)
|
||||
(destructuring-bind (d val) dv
|
||||
(declare (ignore d))
|
||||
val)))
|
||||
|
||||
(defun hexadecimal-char-p (character)
|
||||
(member character #. (quote (coerce "0123456789abcdefABCDEF" 'list))))
|
||||
|
||||
(defrule hexdigits (+ (hexadecimal-char-p character))
|
||||
(:lambda (hx)
|
||||
(code-char (parse-integer (text hx) :radix 16))))
|
||||
|
||||
(defrule hex-string (and hexdigits (+ (and "." hexdigits)))
|
||||
(:lambda (string)
|
||||
(destructuring-bind (first rest) string
|
||||
`(:sequence ,first ,@(mapcar #'cadr rest)))))
|
||||
|
||||
(defrule hex-range (and hexdigits range-sep hexdigits)
|
||||
(:lambda (range)
|
||||
(destructuring-bind (min sep max) range
|
||||
(declare (ignore sep))
|
||||
`(:char-class (:range ,min ,max)))))
|
||||
|
||||
(defrule hex-val (and "x" (or hex-string hex-range hexdigits))
|
||||
(:lambda (dv)
|
||||
(destructuring-bind (d val) dv
|
||||
(declare (ignore d))
|
||||
val)))
|
||||
|
||||
(defrule num-val (and "%" (or dec-val hex-val))
|
||||
(:lambda (nv)
|
||||
(destructuring-bind (percent val) nv
|
||||
(declare (ignore percent))
|
||||
val)))
|
||||
|
||||
;;; allow to parse rule definitions without a separating blank line
|
||||
(defrule rule-name-reference (and rule-name (! equal))
|
||||
(:lambda (ref)
|
||||
(destructuring-bind (rule-name nil) ref
|
||||
rule-name)))
|
||||
|
||||
;;; what about allowing regular expressions directly?
|
||||
(defun process-quoted-regex (pr)
|
||||
"Helper function to process different kinds of quotes for regexps"
|
||||
(destructuring-bind (open regex close) pr
|
||||
(declare (ignore open close))
|
||||
`(:regex ,(text regex))))
|
||||
|
||||
(defrule single-quoted-regex (and #\' (+ (not #\')) #\')
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule double-quoted-regex (and #\" (+ (not #\")) #\")
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule parens-quoted-regex (and #\( (+ (not #\))) #\))
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule braces-quoted-regex (and #\{ (+ (not #\})) #\})
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule chevron-quoted-regex (and #\< (+ (not #\>)) #\>)
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule brackets-quoted-regex (and #\[ (+ (not #\])) #\])
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule slash-quoted-regex (and #\/ (+ (not #\/)) #\/)
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule pipe-quoted-regex (and #\| (+ (not #\|)) #\|)
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule sharp-quoted-regex (and #\# (+ (not #\#)) #\#)
|
||||
(:function process-quoted-regex))
|
||||
|
||||
(defrule quoted-regex (and "~" (or single-quoted-regex
|
||||
double-quoted-regex
|
||||
parens-quoted-regex
|
||||
braces-quoted-regex
|
||||
chevron-quoted-regex
|
||||
brackets-quoted-regex
|
||||
slash-quoted-regex
|
||||
pipe-quoted-regex
|
||||
sharp-quoted-regex))
|
||||
(:lambda (qr)
|
||||
(destructuring-bind (tilde regex) qr
|
||||
(declare (ignore tilde))
|
||||
regex)))
|
||||
|
||||
(defrule element (or rule-name-reference char-val num-val quoted-regex))
|
||||
|
||||
(defrule number (+ (digit-char-p character))
|
||||
(:lambda (number)
|
||||
(parse-integer (text number))))
|
||||
|
||||
(defrule repeat-var (and (? number) "*" (? number))
|
||||
(:lambda (rv)
|
||||
(destructuring-bind (min star max) rv
|
||||
(declare (ignore star))
|
||||
(cons (or min 0) max))))
|
||||
|
||||
(defrule repeat-specific number
|
||||
(:lambda (number)
|
||||
(cons number number)))
|
||||
|
||||
(defrule repeat (or repeat-var repeat-specific))
|
||||
|
||||
(defrule repetition (and (? repeat) toplevel-element)
|
||||
(:lambda (repetition)
|
||||
(destructuring-bind (repeat element) repetition
|
||||
(if repeat
|
||||
(destructuring-bind (min . max) repeat
|
||||
`(:non-greedy-repetition ,min ,max ,element))
|
||||
;; no repeat clause
|
||||
element))))
|
||||
|
||||
(defrule concatenation-element (and n-wsp repetition)
|
||||
(:lambda (ce)
|
||||
(destructuring-bind (n-wsp rep) ce
|
||||
(declare (ignore n-wsp))
|
||||
rep)))
|
||||
|
||||
(defrule concatenation (and repetition (* concatenation-element))
|
||||
(:lambda (concat)
|
||||
(destructuring-bind (rep1 rlist) concat
|
||||
(if rlist
|
||||
`(:sequence ,@(list* rep1 rlist))
|
||||
;; concatenation of a single element
|
||||
rep1))))
|
||||
|
||||
(defrule alternation-element (and n-wsp "/" n-wsp concatenation)
|
||||
(:lambda (ae)
|
||||
(destructuring-bind (ws1 sl ws2 concatenation) ae
|
||||
(declare (ignore ws1 sl ws2))
|
||||
concatenation)))
|
||||
|
||||
(defrule alternation (and concatenation (* alternation-element))
|
||||
(:lambda (alternation)
|
||||
(destructuring-bind (c1 clist) alternation
|
||||
(if clist
|
||||
`(:alternation ,@(list* c1 clist))
|
||||
c1))))
|
||||
|
||||
(defrule group (and "(" n-wsp alternation n-wsp ")")
|
||||
(:lambda (group)
|
||||
(destructuring-bind (open ws1 a ws2 close) group
|
||||
(declare (ignore open close ws1 ws2))
|
||||
;; we need the grouping when parsing the ABNF syntax, but once this
|
||||
;; parsing is done there's no ambiguity possible left and we don't
|
||||
;; need the grouping anymore in the resulting regular-expression parse
|
||||
;; tree.
|
||||
a)))
|
||||
|
||||
(defrule option (and "[" n-wsp alternation n-wsp "]")
|
||||
(:lambda (option)
|
||||
(destructuring-bind (open ws1 a ws2 close) option
|
||||
(declare (ignore open close ws1 ws2))
|
||||
`(:non-greedy-repetition 0 1 ,a))))
|
||||
|
||||
(defrule toplevel-element (or group option element))
|
||||
|
||||
(defrule alternations (and n-wsp alternation)
|
||||
(:lambda (alts)
|
||||
(destructuring-bind (n-wsp alt) alts
|
||||
(declare (ignore n-wsp))
|
||||
alt)))
|
||||
|
||||
(defrule elements (and (+ alternations) end-of-rule)
|
||||
(:lambda (alist)
|
||||
(destructuring-bind (alts eor) alist
|
||||
(declare (ignore eor))
|
||||
alts)))
|
||||
|
||||
(defrule rule (and n-wsp rule-name equal elements)
|
||||
(:lambda (rule)
|
||||
(destructuring-bind (n-wsp rule-name eq definition) rule
|
||||
(declare (ignore n-wsp eq))
|
||||
(cons rule-name definition))))
|
||||
|
||||
(defrule rule-list (+ rule))
|
||||
|
||||
;;;
|
||||
;;; Now that we are able to transform ABNF rule set into an alist of
|
||||
;;; cl-ppcre parse trees and references to other rules in the set, we need
|
||||
;;; to expand each symbol's definition to get a real cl-ppcre scanner parse
|
||||
;;; tree.
|
||||
;;;
|
||||
(defun expand-rule-definition (definition
|
||||
rule-set
|
||||
registering-rules
|
||||
already-expanded-rules)
|
||||
"Expand given rule DEFINITION within given RULE-SET"
|
||||
(typecase definition
|
||||
(list
|
||||
;; walk the definition and expand its elements
|
||||
(loop
|
||||
for element in definition
|
||||
collect (expand-rule-definition element
|
||||
rule-set
|
||||
registering-rules
|
||||
already-expanded-rules)))
|
||||
|
||||
(symbol
|
||||
(if (member definition '(:sequence
|
||||
:alternation
|
||||
:regex
|
||||
:char-class
|
||||
:range
|
||||
:non-greedy-repetition))
|
||||
;; that's a cl-ppcre scanner parse-tree symbol
|
||||
;; only put in that list those cl-ppcre symbols we actually produce
|
||||
definition
|
||||
|
||||
;; here we have to actually expand the symbol
|
||||
(progn
|
||||
;; first protect against infinite recursion
|
||||
(when (member definition already-expanded-rules)
|
||||
(error "Can not expand recursive rule: ~S." definition))
|
||||
|
||||
(destructuring-bind (rule-name rule-definition)
|
||||
(or (assoc definition rule-set)
|
||||
(assoc definition *abnf-default-rules*))
|
||||
(let* ((already-expanded-rules
|
||||
(cons definition already-expanded-rules))
|
||||
|
||||
(expanded-definition
|
||||
(expand-rule-definition rule-definition
|
||||
rule-set
|
||||
registering-rules
|
||||
already-expanded-rules)))
|
||||
(if (member rule-name registering-rules)
|
||||
`(:register ,expanded-definition)
|
||||
expanded-definition))))))
|
||||
|
||||
;; all other types of data are "constants" in our parse-tree
|
||||
(t definition)))
|
||||
|
||||
(defun expand-rule (rule-name rule-set &optional registering-rules)
|
||||
"Given a rule, expand it completely removing references to other parsed
|
||||
rules"
|
||||
(let ((rule (rule-name-symbol rule-name :find-symbol t)))
|
||||
(destructuring-bind (rule-name definition)
|
||||
(assoc rule rule-set)
|
||||
`(:sequence
|
||||
:start-anchor
|
||||
,(expand-rule-definition definition
|
||||
rule-set
|
||||
(loop
|
||||
for rr in registering-rules
|
||||
collect (rule-name-symbol rr :find-symbol t))
|
||||
(list rule-name))))))
|
||||
|
||||
(defun parse-abnf-grammar (abnf-string top-level-rule
|
||||
&key registering-rules junk-allowed)
|
||||
"Parse STRING as an ABNF grammar as defined in RFC 2234. Returns a cl-ppcre
|
||||
scanner that will only match strings conforming to given grammar.
|
||||
|
||||
See http://tools.ietf.org/html/rfc2234 for details about the ABNF specs.
|
||||
Added to that grammar is support for regular expression, that are
|
||||
expected in the ELEMENT production and spelled ~/regex/. The allowed
|
||||
delimiters are: ~// ~[] ~{} ~() ~<> ~\"\" ~'' ~|| and ~##."
|
||||
(let ((rule-set
|
||||
(parse 'rule-list abnf-string :junk-allowed junk-allowed)))
|
||||
(cl-ppcre:create-scanner
|
||||
(expand-rule top-level-rule
|
||||
;; in case of duplicates only keep the latest addition
|
||||
(remove-duplicates rule-set :key #'car)
|
||||
registering-rules))))
|
||||
|
||||
(defun test (&key (times 10000))
|
||||
"This serves as a test and an example: if you're going to use the same
|
||||
scanner more than one, be sure to compute it only once."
|
||||
(let* ((cl-ppcre:*use-bmh-matchers* t)
|
||||
(cl-ppcre:*optimize-char-classes* t)
|
||||
(scanner
|
||||
(parse-abnf-grammar *abnf-rfc-syslog-draft-15*
|
||||
:timestamp
|
||||
:registering-rules '(:full-date
|
||||
:partial-time
|
||||
:time-offset))))
|
||||
(loop
|
||||
repeat times
|
||||
do (cl-ppcre:register-groups-bind
|
||||
(date time zone)
|
||||
(scanner "2013-09-08T00:02:03.123456Z+02:00")
|
||||
(list date time zone)))))
|
||||
@ -1,38 +0,0 @@
|
||||
;;;; abnf-example.lisp
|
||||
|
||||
(defpackage #:abnf-example
|
||||
(:use #:cl #:abnf #:cl-ppcre))
|
||||
|
||||
(in-package #:abnf-example)
|
||||
|
||||
(defvar *timestamp-abnf*
|
||||
" TIMESTAMP = NILVALUE / FULL-DATE \"T\" FULL-TIME
|
||||
FULL-DATE = DATE-FULLYEAR \"-\" DATE-MONTH \"-\" DATE-MDAY
|
||||
DATE-FULLYEAR = 4DIGIT
|
||||
DATE-MONTH = 2DIGIT ; 01-12
|
||||
DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
|
||||
; month/year
|
||||
FULL-TIME = PARTIAL-TIME TIME-OFFSET
|
||||
PARTIAL-TIME = TIME-HOUR \":\" TIME-MINUTE \":\" TIME-SECOND
|
||||
[TIME-SECFRAC]
|
||||
TIME-HOUR = 2DIGIT ; 00-23
|
||||
TIME-MINUTE = 2DIGIT ; 00-59
|
||||
TIME-SECOND = 2DIGIT ; 00-59
|
||||
TIME-SECFRAC = \".\" 1*6DIGIT
|
||||
TIME-OFFSET = \"Z\" / TIME-NUMOFFSET
|
||||
TIME-NUMOFFSET = (\"+\" / \"-\") TIME-HOUR \":\" TIME-MINUTE
|
||||
|
||||
NILVALUE = \"-\" "
|
||||
"A timestamp ABNF grammar.")
|
||||
|
||||
(let ((scanner (parse-abnf-grammar *timestamp-abnf*
|
||||
:timestamp
|
||||
:registering-rules '(:full-date))))
|
||||
(register-groups-bind (date)
|
||||
(scanner "2013-09-08T00:02:03.123456Z+02:00")
|
||||
date))
|
||||
|
||||
(let ((scanner (parse-abnf-grammar *timestamp-abnf*
|
||||
:timestamp
|
||||
:registering-rules '(:full-date))))
|
||||
(scan-to-strings scanner "2013-09-08T00:02:03.123456Z+02:00"))
|
||||
@ -1,8 +0,0 @@
|
||||
;;;; package.lisp
|
||||
|
||||
(defpackage #:abnf
|
||||
(:use #:cl #:esrap)
|
||||
(:export #:*abnf-rfc-syslog-draft-15*
|
||||
#:*abnf-rsyslog*
|
||||
#:*abnf-rfc5424-syslog-protocol*
|
||||
#:parse-abnf-grammar))
|
||||
@ -1,77 +0,0 @@
|
||||
# DB3
|
||||
|
||||
A lib to read dbf files version 3.
|
||||
|
||||
## Database file structure
|
||||
|
||||
The structure of a dBASE III database file is composed of a header
|
||||
and data records. The layout is given below.
|
||||
|
||||
### dBASE III DATABASE FILE HEADER:
|
||||
|
||||
+---------+-------------------+---------------------------------+
|
||||
| BYTE | CONTENTS | MEANING |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 0 | 1 byte | dBASE III version number |
|
||||
| | | (03H without a .DBT file) |
|
||||
| | | (83H with a .DBT file) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 1-3 | 3 bytes | date of last update |
|
||||
| | | (YY MM DD) in binary format |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 4-7 | 32 bit number | number of records in data file |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 8-9 | 16 bit number | length of header structure |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 10-11 | 16 bit number | length of the record |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 12-31 | 20 bytes | reserved bytes (version 1.00) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 32-n | 32 bytes each | field descriptor array |
|
||||
| | | (see below) | --+
|
||||
+---------+-------------------+---------------------------------+ |
|
||||
| n+1 | 1 byte | 0DH as the field terminator | |
|
||||
+---------+-------------------+---------------------------------+ |
|
||||
|
|
||||
|
|
||||
A FIELD DESCRIPTOR: <------------------------------------------+
|
||||
|
||||
+---------+-------------------+---------------------------------+
|
||||
| BYTE | CONTENTS | MEANING |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 0-10 | 11 bytes | field name in ASCII zero-filled |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 11 | 1 byte | field type in ASCII |
|
||||
| | | (C N L D or M) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 12-15 | 32 bit number | field data address |
|
||||
| | | (address is set in memory) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 16 | 1 byte | field length in binary |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 17 | 1 byte | field decimal count in binary |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 18-31 | 14 bytes | reserved bytes (version 1.00) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
|
||||
### The data records are layed out as follows:
|
||||
|
||||
1. Data records are preceeded by one byte that is a space (20H) if the
|
||||
record is not deleted and an asterisk (2AH) if it is deleted.
|
||||
|
||||
2. Data fields are packed into records with no field separators or record
|
||||
terminators.
|
||||
|
||||
3. Data types are stored in ASCII format as follows:
|
||||
|
||||
DATA TYPE DATA RECORD STORAGE
|
||||
--------- --------------------------------------------
|
||||
Character (ASCII characters)
|
||||
Numeric - . 0 1 2 3 4 5 6 7 8 9
|
||||
Logical ? Y y N n T t F f (? when not initialized)
|
||||
Memo (10 digits representing a .DBT block number)
|
||||
Date (8 digits in YYYYMMDD format, such as
|
||||
19840704 for July 4, 1984)
|
||||
-----------------------------------------------------------
|
||||
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
;;;; abnf.asd
|
||||
|
||||
(asdf:defsystem #:db3
|
||||
:serial t
|
||||
:description "DB3 file reader"
|
||||
:author "Xach"
|
||||
:license "WTFPL"
|
||||
:depends-on ()
|
||||
:components ((:file "package")
|
||||
(:file "db3" :depends-on ("package"))))
|
||||
|
||||
|
||||
218
lib/db3/db3.lisp
218
lib/db3/db3.lisp
@ -1,218 +0,0 @@
|
||||
;;;; http://xach.com/lisp/db3.lisp
|
||||
;;
|
||||
;; db3.lisp
|
||||
#|
|
||||
|
||||
Database file structure
|
||||
|
||||
The structure of a dBASE III database file is composed of a header
|
||||
and data records. The layout is given below.
|
||||
|
||||
|
||||
dBASE III DATABASE FILE HEADER:
|
||||
|
||||
+---------+-------------------+---------------------------------+
|
||||
| BYTE | CONTENTS | MEANING |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 0 | 1 byte | dBASE III version number |
|
||||
| | | (03H without a .DBT file) |
|
||||
| | | (83H with a .DBT file) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 1-3 | 3 bytes | date of last update |
|
||||
| | | (YY MM DD) in binary format |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 4-7 | 32 bit number | number of records in data file |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 8-9 | 16 bit number | length of header structure |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 10-11 | 16 bit number | length of the record |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 12-31 | 20 bytes | reserved bytes (version 1.00) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 32-n | 32 bytes each | field descriptor array |
|
||||
| | | (see below) | --+
|
||||
+---------+-------------------+---------------------------------+ |
|
||||
| n+1 | 1 byte | 0DH as the field terminator | |
|
||||
+---------+-------------------+---------------------------------+ |
|
||||
|
|
||||
|
|
||||
A FIELD DESCRIPTOR: <------------------------------------------+
|
||||
|
||||
+---------+-------------------+---------------------------------+
|
||||
| BYTE | CONTENTS | MEANING |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 0-10 | 11 bytes | field name in ASCII zero-filled |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 11 | 1 byte | field type in ASCII |
|
||||
| | | (C N L D or M) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 12-15 | 32 bit number | field data address |
|
||||
| | | (address is set in memory) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 16 | 1 byte | field length in binary |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 17 | 1 byte | field decimal count in binary |
|
||||
+---------+-------------------+---------------------------------+
|
||||
| 18-31 | 14 bytes | reserved bytes (version 1.00) |
|
||||
+---------+-------------------+---------------------------------+
|
||||
|
||||
|
||||
The data records are layed out as follows:
|
||||
|
||||
1. Data records are preceeded by one byte that is a space (20H) if the
|
||||
record is not deleted and an asterisk (2AH) if it is deleted.
|
||||
|
||||
2. Data fields are packed into records with no field separators or
|
||||
record terminators.
|
||||
|
||||
3. Data types are stored in ASCII format as follows:
|
||||
|
||||
DATA TYPE DATA RECORD STORAGE
|
||||
--------- --------------------------------------------
|
||||
Character (ASCII characters)
|
||||
Numeric - . 0 1 2 3 4 5 6 7 8 9
|
||||
Logical ? Y y N n T t F f (? when not initialized)
|
||||
Memo (10 digits representing a .DBT block number)
|
||||
Date (8 digits in YYYYMMDD format, such as
|
||||
19840704 for July 4, 1984)
|
||||
|
||||
|#
|
||||
(in-package :db3)
|
||||
|
||||
|
||||
;;; reading binary stuff
|
||||
(defun read-uint32 (stream)
|
||||
(loop repeat 4
|
||||
for offset from 0 by 8
|
||||
for value = (read-byte stream)
|
||||
then (logior (ash (read-byte stream) offset) value)
|
||||
finally (return value)))
|
||||
|
||||
(defun read-uint16 (stream)
|
||||
(loop repeat 2
|
||||
for offset from 0 by 8
|
||||
for value = (read-byte stream)
|
||||
then (logior (ash (read-byte stream) offset) value)
|
||||
finally (return value)))
|
||||
|
||||
|
||||
|
||||
;;; objects
|
||||
|
||||
(defclass db3 ()
|
||||
((version-number :accessor version-number)
|
||||
(last-update :accessor last-update)
|
||||
(record-count :accessor record-count)
|
||||
(header-length :accessor header-length)
|
||||
(record-length :accessor record-length)
|
||||
(fields :accessor fields)))
|
||||
|
||||
|
||||
(defclass db3-field ()
|
||||
((name :accessor field-name)
|
||||
(type :accessor field-type)
|
||||
(data-address :accessor data-address)
|
||||
(field-length :accessor field-length)
|
||||
(field-count :accessor field-count)))
|
||||
|
||||
|
||||
(defun asciiz->string (array)
|
||||
(let* ((string-length (or (position 0 array)
|
||||
(length array)))
|
||||
(string (make-string string-length)))
|
||||
(loop for i below string-length
|
||||
do (setf (schar string i) (code-char (aref array i))))
|
||||
string))
|
||||
|
||||
(defun ascii->string (array)
|
||||
(map 'string #'code-char array))
|
||||
|
||||
|
||||
(defun load-field-descriptor (stream)
|
||||
(let ((field (make-instance 'db3-field))
|
||||
(name (make-array 11 :element-type '(unsigned-byte 8))))
|
||||
(read-sequence name stream)
|
||||
(setf (field-name field) (asciiz->string name)
|
||||
(field-type field) (code-char (read-byte stream))
|
||||
(data-address field) (read-uint32 stream)
|
||||
(field-length field) (read-byte stream)
|
||||
(field-count field) (read-byte stream))
|
||||
(loop repeat 14 do (read-byte stream))
|
||||
field))
|
||||
|
||||
|
||||
(defmethod field-count ((db3 db3))
|
||||
(1- (/ (1- (header-length db3)) 32)))
|
||||
|
||||
|
||||
(defmethod load-header ((db3 db3) stream)
|
||||
(let ((version (read-byte stream)))
|
||||
(unless (= version #x03)
|
||||
(error "Can't handle this file"))
|
||||
(let ((year (read-byte stream))
|
||||
(month (read-byte stream))
|
||||
(day (read-byte stream)))
|
||||
(setf (version-number db3) version
|
||||
(last-update db3) (list year month day)
|
||||
(record-count db3) (read-uint32 stream)
|
||||
(header-length db3) (read-uint16 stream)
|
||||
(record-length db3) (read-uint16 stream))
|
||||
(file-position stream 32)
|
||||
(setf (fields db3) (loop repeat (field-count db3)
|
||||
collect (load-field-descriptor stream)))
|
||||
(assert (= (read-byte stream) #x0D))
|
||||
db3)))
|
||||
|
||||
|
||||
(defmethod convert-field (type data)
|
||||
(ascii->string data))
|
||||
|
||||
(defmethod convert-field ((type (eql #\C)) data)
|
||||
(ascii->string data))
|
||||
|
||||
|
||||
(defmethod load-field (type length stream)
|
||||
(let ((field (make-array length)))
|
||||
(read-sequence field stream)
|
||||
(convert-field type field)))
|
||||
|
||||
(defmethod load-record ((db3 db3) stream)
|
||||
(read-byte stream)
|
||||
(loop with record = (make-array (field-count db3))
|
||||
for i below (field-count db3)
|
||||
for field in (fields db3)
|
||||
do (setf (svref record i)
|
||||
(load-field (field-type field) (field-length field) stream))
|
||||
finally (return record)))
|
||||
|
||||
|
||||
(defun write-record (record stream)
|
||||
(loop for field across record
|
||||
do
|
||||
(write-char #\" stream)
|
||||
(write-string field stream)
|
||||
(write-string "\"," stream))
|
||||
(terpri stream))
|
||||
|
||||
|
||||
(defun dump-db3 (input output)
|
||||
(with-open-file (stream input :direction :input
|
||||
:element-type '(unsigned-byte 8))
|
||||
(with-open-file (ostream output :direction :output
|
||||
:element-type 'character)
|
||||
(let ((db3 (make-instance 'db3)))
|
||||
(load-header db3 stream)
|
||||
(loop repeat (record-count db3)
|
||||
do (write-record (load-record db3 stream) ostream))
|
||||
db3))))
|
||||
|
||||
(defun sample-db3 (input ostream &key (sample-size 10))
|
||||
(with-open-file (stream input :direction :input
|
||||
:element-type '(unsigned-byte 8))
|
||||
(let ((db3 (make-instance 'db3)))
|
||||
(load-header db3 stream)
|
||||
(loop
|
||||
:repeat sample-size
|
||||
:do (format ostream "~s~%" (load-record db3 stream)))
|
||||
db3)))
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
;;;; package.lisp
|
||||
|
||||
(defpackage #:db3
|
||||
(:use #:cl)
|
||||
(:export #:db3 ; the main class
|
||||
#:load-header
|
||||
#:record-count
|
||||
#:load-record
|
||||
#:write-record
|
||||
#:dump-db3
|
||||
#:sample-db3))
|
||||
Loading…
x
Reference in New Issue
Block a user