From f326cd09b24c2cd30794330ee374eef0c83b3cb5 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Fri, 16 Sep 2016 20:14:29 -0700 Subject: Bugfix in split-str: empty-match regexes. * lib.c (split_str_keep): In the regex case, changing to an infinite loop. The do/while is no longer needed because the if statement includes a test of the position having reached the end of the string. This is done before it is incremented by len, so we avoid wrongly keeping a separator. * txr.1: Clarified that an empty regex match behaves like an sep which is an empty string, --- lib.c | 9 +++++---- txr.1 | 9 +++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib.c b/lib.c index 13982b69..56d4cf88 100644 --- a/lib.c +++ b/lib.c @@ -3814,24 +3814,25 @@ val split_str_keep(val str, val sep, val keep_sep) if (regexp(sep)) { list_collect_decl (out, iter); val pos = zero; + val slen = length(str); - do { + for (;;) { cons_bind (new_pos, len, search_regex(str, sep, pos, nil)); - if (eql(pos, new_pos) && len == zero) + if (len == zero && new_pos != slen) new_pos = plus(new_pos, one); iter = list_collect(iter, sub_str(str, pos, new_pos)); pos = new_pos; - if (len) { + if (len && pos != slen) { pos = plus(pos, len); if (keep_sep) iter = list_collect(iter, sub_str(str, new_pos, pos)); continue; } break; - } while (le(pos, length_str(str))); + } return out; } else { diff --git a/txr.1 b/txr.1 index 1cbaff45..e0f0b17c 100644 --- a/txr.1 +++ b/txr.1 @@ -18469,11 +18469,16 @@ characters. In this case, if .meta string is of length one or zero, then it is considered to have no such pieces, and a list of one element is returned containing the original string. +These remarks also apply to the situation when +.meta sep +is a regular expression which matches only an empty +substring of +.metn string . If a match for .meta sep -is not found in the string at all, then the string is not -split at all: a list of one element is returned containing the original +is not found in the string at all (not even an empty match), then the string is +not split at all: a list of one element is returned containing the original string. If -- cgit v1.2.3