summaryrefslogtreecommitdiffstats
path: root/awkreg-grammar.txt
blob: af6984bc5472212273c56a0e0b41849a3da0ca66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#######################
# Original LR grammar #
#######################

S -> <^> R <$>

R -> R|R
  -> R+
  -> R?
  -> R*
  -> R R
  -> R{num<,<num>>}
  -> R{<,num>}
  -> (R)
  -> bracket
  -> rchar

bracket -> [<^> <bchar / range / class >*]

bchar -> any character but [ or -
    -> \]
    -> \-
    -> \^
    -> \\

range -> bchar - bchar

class -> [:alpha:] / [:digit:] / ... et cetera

rchar -> any character but ( ) [ ] { } * ? +
      -> \ char

char  -> any character

#################
# Left-factored #
#################

R -> T                  # regex is a single term
  -> T R                # a term followed by a regex
  -> T | R              # a term or regex
  ->                    # empty

T -> F                  # a regex term is a factor
  -> F *
  -> F ?
  -> F +
  -> F {num<,<num>>}

F -> rchar             # a factor is a regex char
  -> bracket            # [...] expression
  -> (R)                # parenthesized regex

bracket -> [<^> <bchar / range / class >*]

bchar -> any character but [ or -
      -> \]
      -> \-
      -> \^
      -> \\

range -> bchar - bchar

class -> [:alpha:] / [:digit:] / ... et cetera

rchar -> any character but ( ) [ ] { } * ? +
      -> \ char

char  -> any character