summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-09-16 20:14:29 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-09-16 20:14:29 -0700
commitf326cd09b24c2cd30794330ee374eef0c83b3cb5 (patch)
treeb93343892e949822d54f609ce7b9f6ff35c23d30
parent6d800734b64c92b5bb9979c06c84ed4d29098933 (diff)
downloadtxr-f326cd09b24c2cd30794330ee374eef0c83b3cb5.tar.gz
txr-f326cd09b24c2cd30794330ee374eef0c83b3cb5.tar.bz2
txr-f326cd09b24c2cd30794330ee374eef0c83b3cb5.zip
Bugfix in split-str: empty-match regexes.
* lib.c (split_str_keep): In the regex case, changing to an infinite loop. The do/while is no longer needed because the if statement includes a test of the position having reached the end of the string. This is done before it is incremented by len, so we avoid wrongly keeping a separator. * txr.1: Clarified that an empty regex match behaves like an sep which is an empty string,
-rw-r--r--lib.c9
-rw-r--r--txr.19
2 files changed, 12 insertions, 6 deletions
diff --git a/lib.c b/lib.c
index 13982b69..56d4cf88 100644
--- a/lib.c
+++ b/lib.c
@@ -3814,24 +3814,25 @@ val split_str_keep(val str, val sep, val keep_sep)
if (regexp(sep)) {
list_collect_decl (out, iter);
val pos = zero;
+ val slen = length(str);
- do {
+ for (;;) {
cons_bind (new_pos, len, search_regex(str, sep, pos, nil));
- if (eql(pos, new_pos) && len == zero)
+ if (len == zero && new_pos != slen)
new_pos = plus(new_pos, one);
iter = list_collect(iter, sub_str(str, pos, new_pos));
pos = new_pos;
- if (len) {
+ if (len && pos != slen) {
pos = plus(pos, len);
if (keep_sep)
iter = list_collect(iter, sub_str(str, new_pos, pos));
continue;
}
break;
- } while (le(pos, length_str(str)));
+ }
return out;
} else {
diff --git a/txr.1 b/txr.1
index 1cbaff45..e0f0b17c 100644
--- a/txr.1
+++ b/txr.1
@@ -18469,11 +18469,16 @@ characters. In this case, if
.meta string
is of length one or zero, then it is considered to have no such pieces, and a
list of one element is returned containing the original string.
+These remarks also apply to the situation when
+.meta sep
+is a regular expression which matches only an empty
+substring of
+.metn string .
If a match for
.meta sep
-is not found in the string at all, then the string is not
-split at all: a list of one element is returned containing the original
+is not found in the string at all (not even an empty match), then the string is
+not split at all: a list of one element is returned containing the original
string.
If