diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | awkreg-grammar.txt | 69 |
2 files changed, 74 insertions, 0 deletions
@@ -1,5 +1,10 @@ 2014-03-17 Kaz Kylheku <kaz@kylheku.com> + * awkreg-grammar.txt: New file. Taken from my original Usenet + posting in comp.lang.awk. + +2014-03-17 Kaz Kylheku <kaz@kylheku.com> + Fix in {m,n} syntax. The issue is that the parser partially consumes broken {m,n} diff --git a/awkreg-grammar.txt b/awkreg-grammar.txt new file mode 100644 index 0000000..af6984b --- /dev/null +++ b/awkreg-grammar.txt @@ -0,0 +1,69 @@ +####################### +# Original LR grammar # +####################### + +S -> <^> R <$> + +R -> R|R + -> R+ + -> R? + -> R* + -> R R + -> R{num<,<num>>} + -> R{<,num>} + -> (R) + -> bracket + -> rchar + +bracket -> [<^> <bchar / range / class >*] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character + +################# +# Left-factored # +################# + +R -> T # regex is a single term + -> T R # a term followed by a regex + -> T | R # a term or regex + -> # empty + +T -> F # a regex term is a factor + -> F * + -> F ? + -> F + + -> F {num<,<num>>} + +F -> rchar # a factor is a regex char + -> bracket # [...] expression + -> (R) # parenthesized regex + +bracket -> [<^> <bchar / range / class >*] + +bchar -> any character but [ or - + -> \] + -> \- + -> \^ + -> \\ + +range -> bchar - bchar + +class -> [:alpha:] / [:digit:] / ... et cetera + +rchar -> any character but ( ) [ ] { } * ? + + -> \ char + +char -> any character |