From 596c0007d83c67ce8878f4fb822e45434946d370 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Fri, 16 Sep 2016 20:14:29 -0700 Subject: Bugfix in tok-str: empty-match regexes. * lib.c (tok_str): Only continue the loop if the new position isn't past the end of the string. This fixes the problem of recognizing an empty token past the last character in the string. Also, advance new_pos by one if there is a zero length match. Then don't advance pos by one later in that case. This fixes the bug that we collect empty separator pieces *and* empty tokens, and also prevents empty matches before the first character of the string. Logic in tok_str is now very similar to that in split_str_keep. --- lib.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib.c b/lib.c index 56d4cf88..20ffd069 100644 --- a/lib.c +++ b/lib.c @@ -3926,30 +3926,27 @@ val tok_str(val str, val tok_regex, val keep_sep) { list_collect_decl (out, iter); val pos = zero; + val slen = length(str); keep_sep = default_bool_arg(keep_sep); for (;;) { cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil)); - val end; - if (!len) { + if (len == zero && new_pos != slen) + new_pos = plus(new_pos, one); + + if (new_pos == slen || !len) { if (keep_sep) iter = list_collect(iter, sub_str(str, pos, t)); break; } - end = plus(new_pos, len); - if (keep_sep) iter = list_collect(iter, sub_str(str, pos, new_pos)); - iter = list_collect(iter, sub_str(str, new_pos, end)); - - pos = end; - - if (len == zero) - pos = plus(pos, one); + pos = plus(new_pos, len); + iter = list_collect(iter, sub_str(str, new_pos, pos)); } return out; -- cgit v1.2.3