From 2bb1c1f082120f2c4e2026a492685d27cb1572e3 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 17 Mar 2014 13:23:33 -0700 Subject: New file: awkreg-grammar.txt. --- ChangeLog | 5 ++++ awkreg-grammar.txt | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 awkreg-grammar.txt diff --git a/ChangeLog b/ChangeLog index 7f4faff..1261b64 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2014-03-17 Kaz Kylheku + + * awkreg-grammar.txt: New file. Taken from my original Usenet + posting in comp.lang.awk. + 2014-03-17 Kaz Kylheku Fix in {m,n} syntax. diff --git a/awkreg-grammar.txt b/awkreg-grammar.txt new file mode 100644 index 0000000..af6984b --- /dev/null +++ b/awkreg-grammar.txt @@ -0,0 +1,69 @@ +####################### +# Original LR grammar # +####################### + +S -> <^> R <$> + +R -> R|R + -> R+ + -> R? + -> R* + -> R R + -> R{num<,>} + -> R{<,num>} + -> (R) + -> bracket + -> rchar + +bracket -> [<^> *] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character + +################# +# Left-factored # +################# + +R -> T # regex is a single term + -> T R # a term followed by a regex + -> T | R # a term or regex + -> # empty + +T -> F # a regex term is a factor + -> F * + -> F ? + -> F + + -> F {num<,>} + +F -> rchar # a factor is a regex char + -> bracket # [...] expression + -> (R) # parenthesized regex + +bracket -> [<^> *] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character -- cgit v1.2.3