diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 11:00:28 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 11:00:28 -0700 |
commit | b9f7be76a0d7fd986603ae24ff820547d0f78716 (patch) | |
tree | 3f4adf9114b004bfb31ffb0b6ab1208e8bcc58f3 | |
download | unix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.tar.gz unix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.tar.bz2 unix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.zip |
Initial version.
-rw-r--r-- | awkreg.awk | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/awkreg.awk b/awkreg.awk new file mode 100644 index 0000000..a0ea898 --- /dev/null +++ b/awkreg.awk @@ -0,0 +1,205 @@ +function empty(s) +{ + return s == "" +} + +function eat_char(s) +{ + return substr(s, 2) +} + +function eat_chars(s, n) +{ + return substr(s, n + 1) +} + +function matches(s, pfx) +{ + return substr(s, 1, length(pfx)) == pfx +} + +function match_and_eat(s, pfx) +{ + if (matches(s, pfx)) + return eat_chars(s, length(pfx)) + return s +} + +function eat_rchar(c) +{ + if (c ~ /^\\./) + return eat_chars(c, 2) + + if (c == "$") + return c + + if (c !~ /^[\[\*\+\?{}\(\)|]/) + return eat_char(c) + + return c +} + +function eat_bchar(c) +{ + if (c ~ /^\\]|\\-|\\\\/) + return eat_chars(c, 2) + + if (c !~ /^[\-\[]/) + return eat_char(c) + + return c +} + +function eat_class(c) +{ + c = match_and_eat(c, "[:alnum:]") + c = match_and_eat(c, "[:alpha:]") + c = match_and_eat(c, "[:blank:]") + c = match_and_eat(c, "[:cntrl:]") + c = match_and_eat(c, "[:digit:]") + c = match_and_eat(c, "[:graph:]") + c = match_and_eat(c, "[:lower:]") + c = match_and_eat(c, "[:print:]") + c = match_and_eat(c, "[:punct:]") + c = match_and_eat(c, "[:space:]") + c = match_and_eat(c, "[:upper:]") + return match_and_eat(c, "[:xdigit:]") +} + +function eat_bracket_exp(e, + #local + f, o) +{ + o = e + e = eat_char(e) + + for (;;) { + if (matches(e, "]")) { + return eat_char(e) + } + + if (matches(e, "[")) { + f = eat_class(e) + if (f == e) + return o + e = f + continue; + } + + f = eat_bchar(e) + + if (f == e) + return o + e = f + + if (matches(e, "-")) { + e = eat_char(e) + f = eat_bchar(e) + if (f == e) + return o + e = f + } + } +} + +function eat_rep_notation(n) +{ + n = eat_char(n) + + if (n !~ /^[0-9]/) + return n + + while (n ~ /^[0-9]/) + n = eat_char(n) + + if (matches(n, "}")) + return eat_char(n) + + if (!matches(n, ",")) + return n + + n = eat_char(n) + + if (matches(n, "}")) + return eat_char(n) + + if (n !~ /^[0-9]/) + return n + + while (n ~ /^[0-9]/) + n = eat_char(n) + + return match_and_eat(n, "}") +} + +function eat_factor(f) +{ + if (matches(f, "(")) + return match_and_eat(eat_regex(eat_char(f)), ")") + + if (matches(f, "[")) + return eat_bracket_exp(f) + + return eat_rchar(f) +} + +function eat_term(t, + #local + s) +{ + s = eat_factor(t) + + if (empty(s) || s == t) + return s + + t = s + + if (t ~ /^[?+*]/) + return eat_char(t) + + if (matches(t, "{")) + return eat_rep_notation(t) + + return t +} + +function eat_regex(r, + #locals + s) +{ + if (empty(r)) + return r + + s = eat_term(r) + + if (empty(s) || s == r) + return s + + r = s; + + if (matches(r, "|")) + r = eat_char(r) + + return eat_regex(r) +} + + +function is_regex(r) +{ + if (matches(r, "^")) + r = eat_char(r) + + if (empty(r)) + return 1 + + r = eat_regex(r) + + if (r == "$") + r = "" + + return empty(r); +} + +{ + printf("is_regex(%s)\n", is_regex($0) ? "yes" : "no") +} |