From aa5aed38f16aa15dc3c3d2fe4547fb86e26cd6e6 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 20 May 2013 08:36:10 -0700 Subject: * eval.c (eval_init): Register tok_str as intrinsic. * lib.c (tok_str): New function. * lib.h (tok_str): Declared. * txr.1: Documented. --- ChangeLog | 10 ++++++++++ eval.c | 1 + lib.c | 25 +++++++++++++++++++++++++ lib.h | 1 + txr.1 | 22 ++++++++++++++++++++++ 5 files changed, 59 insertions(+) diff --git a/ChangeLog b/ChangeLog index 30112d78..2c37d00a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2013-05-20 Kaz Kylheku + + * eval.c (eval_init): Register tok_str as intrinsic. + + * lib.c (tok_str): New function. + + * lib.h (tok_str): Declared. + + * txr.1: Documented. + 2013-05-20 Kaz Kylheku * txr.1: Corrected wrong function name in documentation diff --git a/eval.c b/eval.c index 7d1f1758..64f8de49 100644 --- a/eval.c +++ b/eval.c @@ -2368,6 +2368,7 @@ void eval_init(void) reg_fun(intern(lit("cat-str"), user_package), func_n2o(cat_str, 1)); reg_fun(intern(lit("split-str"), user_package), func_n2(split_str)); reg_fun(intern(lit("split-str-set"), user_package), func_n2(split_str_set)); + reg_fun(intern(lit("tok-str"), user_package), func_n2(tok_str)); reg_fun(intern(lit("list-str"), user_package), func_n1(list_str)); reg_fun(intern(lit("trim-str"), user_package), func_n1(trim_str)); reg_fun(intern(lit("string-lt"), user_package), func_n2(string_lt)); diff --git a/lib.c b/lib.c index eb34fc62..a1e505c8 100644 --- a/lib.c +++ b/lib.c @@ -2060,6 +2060,31 @@ val split_str_set(val str, val set) return out; } +val tok_str(val str, val tok_regex) +{ + list_collect_decl (out, iter); + val pos = zero; + + for (;;) { + cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil)); + val end; + + if (!len) + break; + + end = plus(new_pos, len); + + list_collect(iter, sub_str(str, new_pos, end)); + + pos = end; + + if (len == zero) + pos = plus(pos, one); + } + + return out; +} + val list_str(val str) { const wchar_t *cstr = c_str(str); diff --git a/lib.h b/lib.h index 67a35d26..5cecbad1 100644 --- a/lib.h +++ b/lib.h @@ -501,6 +501,7 @@ val sub_str(val str_in, val from_num, val to_num); val cat_str(val list, val sep); val split_str(val str, val sep); val split_str_set(val str, val set); +val tok_str(val str, val tok_regex); val list_str(val str); val trim_str(val str); val string_lt(val astr, val bstr); diff --git a/txr.1 b/txr.1 index 799fb0d1..af8fb604 100644 --- a/txr.1 +++ b/txr.1 @@ -7556,6 +7556,28 @@ be separate gaps which come between empty strings. This operation is nondestructive: is not modified in any way. +.SS Function tok-str + +.TP +Syntax: + + (tok-str ) + +.TP +Description: + +The tok-str function searches for tokens, which are defined as +substrings of which match the regular expression in the +longest possible way, and do not overlap. These tokens are extracted from the +string and returned as a list. + +Whenever matches an empty string, then an empty token is returned, and +the search for another token within resumes after advancing by one +character position. So for instance, (tok-str "abc" #/a?/) returns the +list ("a" "" "" ""). After the token "a" is extracted from a non-empty match +for the regex, the regex is considered to matches three more times: before the +"b", between "b" and "c", and after the "c". + .SS Function list-str .TP -- cgit v1.2.3