From 29b5d37a8577d83ae0c88b2e894287b990e6849e Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 26 Sep 2011 23:21:58 -0700 Subject: Support &#xNNNN; hex escapes in html. Bugfix in field formatting. chr function inlined. * filter.c (trie_value_at, trie_lookup_feed_char): Handle function case. (build_filter): New parameter, compress_p. (html_hex_continue, html_hex_handler): New functions. (filter_init): Add a function-based node to the from_html trie. * lib.c (chr): Function removed. (functionp) New function. * lib.h (chr): Declaration replaced with inline function. (functionp): Declared. * match.c (format_field): Bugfix: failed to apply filter that came in as an argument. --- ChangeLog | 25 ++++++++++++++++++++++--- filter.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- lib.c | 14 +++++++++----- lib.h | 7 ++++++- match.c | 3 ++- 5 files changed, 85 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 408e463f..37570e42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,9 +6,8 @@ Obsolete forms of @(next) and @(output) syntax are gone. - New filtering feature for substitutions in output. - Filtering to and from HTML built in, plus user-defined - filtering with deffilter. + New filtering feature for substitutions in output. Filtering to and + from HTML built in, plus user-defined filtering with deffilter. Bugfixes: wrong error message in throw; lack of support for escaping backslashes in literals and regexes. @@ -19,6 +18,26 @@ * configure: Bumped txr_ver to 037. +2011-09-26 Kaz Kylheku + + Support &#xNNNN; hex escapes in html. Bugfix in field formatting. + chr function inlined. + + * filter.c (trie_value_at, trie_lookup_feed_char): Handle function + case. + (build_filter): New parameter, compress_p. + (html_hex_continue, html_hex_handler): New functions. + (filter_init): Add a function-based node to the from_html trie. + + * lib.c (chr): Function removed. + (functionp) New function. + + * lib.h (chr): Declaration replaced with inline function. + (functionp): Declared. + + * match.c (format_field): Bugfix: failed to apply filter + that came in as an argument. + 2011-09-26 Kaz Kylheku Bugfixes: Consistent escaping in various literals. Double diff --git a/filter.c b/filter.c index 78373cf2..370df293 100644 --- a/filter.c +++ b/filter.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include "config.h" #include "lib.h" #include "hash.h" @@ -103,6 +105,8 @@ val trie_value_at(val node) return get_hash_userdata(node); if (consp(node)) return nil; + if (functionp(node)) + return nil; return node; } @@ -110,6 +114,8 @@ val trie_lookup_feed_char(val node, val ch) { if (hashp(node)) return gethash(node, ch); + if (functionp(node)) + return funcall1(node, ch); if (consp(node) && eq(ch,car(node))) return cdr(node); return nil; @@ -124,7 +130,7 @@ struct filter_pair { wchar_t *key, *value; }; -static val build_filter(struct filter_pair *pair) +static val build_filter(struct filter_pair *pair, val compress_p) { int i; val trie = make_trie(); @@ -132,7 +138,8 @@ static val build_filter(struct filter_pair *pair) for (i = 0; pair[i].key; i++) trie_add(trie, static_str(pair[i].key), static_str(pair[i].value)); - trie_compress(&trie); + if (compress_p) + trie_compress(&trie); return trie; } @@ -474,6 +481,36 @@ static struct filter_pair from_html_table[] = { { 0, 0 } }; +static val html_hex_continue(val hexlist, val ch) +{ + static wchar_t *hexdigs = L"0123456789ABCDEF"; + + if (iswxdigit(c_chr(ch))) { + return func_f1(cons(ch, hexlist), html_hex_continue); + } if (eq(ch, chr(';'))) { + wchar_t out[2] = { 0 }; + val iter; + + for (iter = nreverse(hexlist); iter; iter = cdr(iter)) { + val hexch = car(iter); + int val = wcschr(hexdigs, towupper(c_chr(hexch))) - hexdigs; + out[0] <<= 4; + out[0] |= val; + } + + return string(out); + } else { + return nil; + } +} + +static val html_hex_handler(val ch) +{ + if (!iswxdigit(c_chr(ch))) + return nil; + return func_f1(cons(ch, nil), html_hex_continue); +} + val filters; val filter_k, to_html_k, from_html_k; @@ -483,6 +520,11 @@ void filter_init(void) filter_k = intern(lit("filter"), keyword_package); to_html_k = intern(lit("to_html"), keyword_package); from_html_k = intern(lit("from_html"), keyword_package); - sethash(filters, to_html_k, build_filter(to_html_table)); - sethash(filters, from_html_k, build_filter(from_html_table)); + sethash(filters, to_html_k, build_filter(to_html_table, t)); + { + val trie = build_filter(from_html_table, nil); + trie_add(trie, lit("&#x"), func_n1(html_hex_handler)); + trie_compress(&trie); + sethash(filters, from_html_k, trie); + } } diff --git a/lib.c b/lib.c index 77c01623..a1b74624 100644 --- a/lib.c +++ b/lib.c @@ -1136,11 +1136,6 @@ val string_lt(val astr, val bstr) return cmp == -1 ? t : nil; } -val chr(wchar_t ch) -{ - return (val) (((cnum) ch << TAG_SHIFT) | TAG_CHR); -} - val chrp(val chr) { return (is_chr(chr)) ? t : nil; @@ -1358,6 +1353,15 @@ val func_n4(val (*fun)(val, val, val, val)) return obj; } +val functionp(val obj) +{ + if (!obj) { + return nil; + } else { + type_t ty = type(obj); + return (ty == FUN) ? t : nil; + } +} val apply(val fun, val arglist) { diff --git a/lib.h b/lib.h index 4ad81d7b..6c538aeb 100644 --- a/lib.h +++ b/lib.h @@ -208,6 +208,11 @@ INLINE val num_fast(cnum n) return (val) ((n << TAG_SHIFT) | TAG_NUM); } +INLINE val chr(wchar_t ch) +{ + return (val) (((cnum) ch << TAG_SHIFT) | TAG_CHR); +} + #define lit_noex(strlit) ((obj_t *) ((cnum) (L ## strlit) | TAG_LIT)) #define lit(strlit) lit_noex(strlit) @@ -319,7 +324,6 @@ val split_str(val str, val sep); val split_str_set(val str, val set); val trim_str(val str); val string_lt(val astr, val bstr); -val chr(wchar_t ch); val chrp(val chr); wchar_t c_chr(val chr); val chr_str(val str, val index); @@ -343,6 +347,7 @@ val func_n1(val (*fun)(val)); val func_n2(val (*fun)(val, val)); val func_n3(val (*fun)(val, val, val)); val func_n4(val (*fun)(val, val, val, val)); +val functionp(val); val apply(val fun, val arglist); val funcall(val fun); val funcall1(val fun, val arg); diff --git a/match.c b/match.c index fb533c50..a7284f19 100644 --- a/match.c +++ b/match.c @@ -557,10 +557,11 @@ static val format_field(val string_or_list, val modifier, val filter) uw_throwf(query_error_s, lit("format_field: filter ~s not known"), filter_sym, nao); } + } + if (filter) string_or_list = filter_string(filter, cat_str(list(string_or_list, nao), nil)); - } } { -- cgit v1.2.3