diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | eval.c | 1 | ||||
-rw-r--r-- | lib.c | 25 | ||||
-rw-r--r-- | lib.h | 1 | ||||
-rw-r--r-- | txr.1 | 22 |
5 files changed, 59 insertions, 0 deletions
@@ -1,5 +1,15 @@ 2013-05-20 Kaz Kylheku <kaz@kylheku.com> + * eval.c (eval_init): Register tok_str as intrinsic. + + * lib.c (tok_str): New function. + + * lib.h (tok_str): Declared. + + * txr.1: Documented. + +2013-05-20 Kaz Kylheku <kaz@kylheku.com> + * txr.1: Corrected wrong function name in documentation for split-str-set. @@ -2368,6 +2368,7 @@ void eval_init(void) reg_fun(intern(lit("cat-str"), user_package), func_n2o(cat_str, 1)); reg_fun(intern(lit("split-str"), user_package), func_n2(split_str)); reg_fun(intern(lit("split-str-set"), user_package), func_n2(split_str_set)); + reg_fun(intern(lit("tok-str"), user_package), func_n2(tok_str)); reg_fun(intern(lit("list-str"), user_package), func_n1(list_str)); reg_fun(intern(lit("trim-str"), user_package), func_n1(trim_str)); reg_fun(intern(lit("string-lt"), user_package), func_n2(string_lt)); @@ -2060,6 +2060,31 @@ val split_str_set(val str, val set) return out; } +val tok_str(val str, val tok_regex) +{ + list_collect_decl (out, iter); + val pos = zero; + + for (;;) { + cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil)); + val end; + + if (!len) + break; + + end = plus(new_pos, len); + + list_collect(iter, sub_str(str, new_pos, end)); + + pos = end; + + if (len == zero) + pos = plus(pos, one); + } + + return out; +} + val list_str(val str) { const wchar_t *cstr = c_str(str); @@ -501,6 +501,7 @@ val sub_str(val str_in, val from_num, val to_num); val cat_str(val list, val sep); val split_str(val str, val sep); val split_str_set(val str, val set); +val tok_str(val str, val tok_regex); val list_str(val str); val trim_str(val str); val string_lt(val astr, val bstr); @@ -7556,6 +7556,28 @@ be separate gaps which come between empty strings. This operation is nondestructive: <string> is not modified in any way. +.SS Function tok-str + +.TP +Syntax: + + (tok-str <string> <regex>) + +.TP +Description: + +The tok-str function searches <string> for tokens, which are defined as +substrings of <string> which match the regular expression <regex> in the +longest possible way, and do not overlap. These tokens are extracted from the +string and returned as a list. + +Whenever <regex> matches an empty string, then an empty token is returned, and +the search for another token within <string> resumes after advancing by one +character position. So for instance, (tok-str "abc" #/a?/) returns the +list ("a" "" "" ""). After the token "a" is extracted from a non-empty match +for the regex, the regex is considered to matches three more times: before the +"b", between "b" and "c", and after the "c". + .SS Function list-str .TP |