summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2013-05-20 08:36:10 -0700
committerKaz Kylheku <kaz@kylheku.com>2013-05-20 08:36:10 -0700
commitaa5aed38f16aa15dc3c3d2fe4547fb86e26cd6e6 (patch)
tree65cba72084061c9ec1ea861c042fd24426f7fdb8
parent0641abc494c3588d9201d354c92bc6901aa2ef5a (diff)
downloadtxr-aa5aed38f16aa15dc3c3d2fe4547fb86e26cd6e6.tar.gz
txr-aa5aed38f16aa15dc3c3d2fe4547fb86e26cd6e6.tar.bz2
txr-aa5aed38f16aa15dc3c3d2fe4547fb86e26cd6e6.zip
* eval.c (eval_init): Register tok_str as intrinsic.
* lib.c (tok_str): New function. * lib.h (tok_str): Declared. * txr.1: Documented.
-rw-r--r--ChangeLog10
-rw-r--r--eval.c1
-rw-r--r--lib.c25
-rw-r--r--lib.h1
-rw-r--r--txr.122
5 files changed, 59 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 30112d78..2c37d00a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2013-05-20 Kaz Kylheku <kaz@kylheku.com>
+ * eval.c (eval_init): Register tok_str as intrinsic.
+
+ * lib.c (tok_str): New function.
+
+ * lib.h (tok_str): Declared.
+
+ * txr.1: Documented.
+
+2013-05-20 Kaz Kylheku <kaz@kylheku.com>
+
* txr.1: Corrected wrong function name in documentation
for split-str-set.
diff --git a/eval.c b/eval.c
index 7d1f1758..64f8de49 100644
--- a/eval.c
+++ b/eval.c
@@ -2368,6 +2368,7 @@ void eval_init(void)
reg_fun(intern(lit("cat-str"), user_package), func_n2o(cat_str, 1));
reg_fun(intern(lit("split-str"), user_package), func_n2(split_str));
reg_fun(intern(lit("split-str-set"), user_package), func_n2(split_str_set));
+ reg_fun(intern(lit("tok-str"), user_package), func_n2(tok_str));
reg_fun(intern(lit("list-str"), user_package), func_n1(list_str));
reg_fun(intern(lit("trim-str"), user_package), func_n1(trim_str));
reg_fun(intern(lit("string-lt"), user_package), func_n2(string_lt));
diff --git a/lib.c b/lib.c
index eb34fc62..a1e505c8 100644
--- a/lib.c
+++ b/lib.c
@@ -2060,6 +2060,31 @@ val split_str_set(val str, val set)
return out;
}
+val tok_str(val str, val tok_regex)
+{
+ list_collect_decl (out, iter);
+ val pos = zero;
+
+ for (;;) {
+ cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
+ val end;
+
+ if (!len)
+ break;
+
+ end = plus(new_pos, len);
+
+ list_collect(iter, sub_str(str, new_pos, end));
+
+ pos = end;
+
+ if (len == zero)
+ pos = plus(pos, one);
+ }
+
+ return out;
+}
+
val list_str(val str)
{
const wchar_t *cstr = c_str(str);
diff --git a/lib.h b/lib.h
index 67a35d26..5cecbad1 100644
--- a/lib.h
+++ b/lib.h
@@ -501,6 +501,7 @@ val sub_str(val str_in, val from_num, val to_num);
val cat_str(val list, val sep);
val split_str(val str, val sep);
val split_str_set(val str, val set);
+val tok_str(val str, val tok_regex);
val list_str(val str);
val trim_str(val str);
val string_lt(val astr, val bstr);
diff --git a/txr.1 b/txr.1
index 799fb0d1..af8fb604 100644
--- a/txr.1
+++ b/txr.1
@@ -7556,6 +7556,28 @@ be separate gaps which come between empty strings.
This operation is nondestructive: <string> is not modified in any way.
+.SS Function tok-str
+
+.TP
+Syntax:
+
+ (tok-str <string> <regex>)
+
+.TP
+Description:
+
+The tok-str function searches <string> for tokens, which are defined as
+substrings of <string> which match the regular expression <regex> in the
+longest possible way, and do not overlap. These tokens are extracted from the
+string and returned as a list.
+
+Whenever <regex> matches an empty string, then an empty token is returned, and
+the search for another token within <string> resumes after advancing by one
+character position. So for instance, (tok-str "abc" #/a?/) returns the
+list ("a" "" "" ""). After the token "a" is extracted from a non-empty match
+for the regex, the regex is considered to matches three more times: before the
+"b", between "b" and "c", and after the "c".
+
.SS Function list-str
.TP