From 34d7df624c34f7b909da7db08dbcbf7d05de84af Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 17 Nov 2014 07:13:18 -0800 Subject: * lib.c (split_str): If the separator string is empty, then unless opt_compat is 100 or less, provide a more consistent behavior, rather than splitting the string into characters. This latter behavior was never documented. * txr.1: Documented. * dep.mk: Updated. --- ChangeLog | 11 +++++++++++ dep.mk | 2 +- lib.c | 25 ++++++++++++++++++++++++- txr.1 | 31 +++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 33590a1b..80681bd7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2014-11-17 Kaz Kylheku + + * lib.c (split_str): If the separator string is empty, + then unless opt_compat is 100 or less, provide a more + consistent behavior, rather than splitting the string + into characters. This latter behavior was never documented. + + * txr.1: Documented. + + * dep.mk: Updated. + 2014-11-15 Kaz Kylheku * lib.c (max2, min2): Use the less comparison function diff --git a/dep.mk b/dep.mk index bc7458e0..4fa08832 100644 --- a/dep.mk +++ b/dep.mk @@ -2,7 +2,7 @@ ./lex.yy.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./hash.h $(top_srcdir)/./parser.h $(top_srcdir)/./eval.h y.tab.h ./y.tab.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./regex.h $(top_srcdir)/./utf8.h $(top_srcdir)/./match.h $(top_srcdir)/./hash.h $(top_srcdir)/./eval.h $(top_srcdir)/./stream.h $(top_srcdir)/./parser.h ./match.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./regex.h $(top_srcdir)/./stream.h $(top_srcdir)/./parser.h $(top_srcdir)/./txr.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./hash.h $(top_srcdir)/./debug.h $(top_srcdir)/./eval.h $(top_srcdir)/./match.h -./lib.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./arith.h $(top_srcdir)/./rand.h $(top_srcdir)/./hash.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./eval.h $(top_srcdir)/./sysif.h $(top_srcdir)/./regex.h +./lib.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./arith.h $(top_srcdir)/./rand.h $(top_srcdir)/./hash.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./eval.h $(top_srcdir)/./sysif.h $(top_srcdir)/./regex.h $(top_srcdir)/./txr.h ./regex.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./parser.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./gc.h $(top_srcdir)/./regex.h $(top_srcdir)/./txr.h ./gc.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./stream.h $(top_srcdir)/./hash.h $(top_srcdir)/./txr.h $(top_srcdir)/./eval.h $(top_srcdir)/./gc.h $(top_srcdir)/./signal.h ./unwind.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./stream.h $(top_srcdir)/./txr.h $(top_srcdir)/./signal.h $(top_srcdir)/./eval.h $(top_srcdir)/./parser.h $(top_srcdir)/./unwind.h diff --git a/lib.c b/lib.c index 62d43b67..796a126c 100644 --- a/lib.c +++ b/lib.c @@ -57,6 +57,7 @@ #include "eval.h" #include "sysif.h" #include "regex.h" +#include "txr.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -2778,7 +2779,29 @@ val split_str(val str, val sep) size_t len_sep = c_num(length_str(sep)); if (len_sep == 0) { - return list_str(str); + if (opt_compat && opt_compat <= 100) { + return list_str(str); + } else { + const wchar_t *cstr = c_str(str); + + if (*cstr) { + list_collect_decl (out, iter); + + prot1(&str); + + for (; *cstr; cstr++) { + val piece = mkustring(one); + init_str(piece, cstr); + iter = list_collect(iter, piece); + } + + rel1(&str); + + return out; + } else { + return cons(str, nil); + } + } } else { const wchar_t *cstr = c_str(str); const wchar_t *csep = c_str(sep); diff --git a/txr.1 b/txr.1 index 409fe8a3..c4ae19d8 100644 --- a/txr.1 +++ b/txr.1 @@ -15874,6 +15874,15 @@ The string is broken into pieces according to the gaps left behind by the removed separators, and a list of the remaining pieces is returned. +If +.meta sep +is the empty string, then the separator pieces removed from the +string are considered to be the empty strings between its +characters. In this case, if +.meta string +is of length one or zero, then it is considered to have no such pieces, and a +list of one element is returned containing the original string. + If a match for .meta sep is not found in the string at all, then the string is not @@ -15895,6 +15904,17 @@ This operation is nondestructive: .meta string is not modified in any way. +Note: To split a string into pieces of length one such that an empty string +produces +.code nil +rather than +.codn ("") , +use the +.cblk +.meti (tok-str < string #/./) +.cble +pattern. + .coNP Function @ split-str-set .synb .mets (split-str-set < string << set ) @@ -26036,6 +26056,17 @@ can be emulated is \*(TX 97. Here are values which have a special meaning as arguments to the .code -C option, along with a description of what behaviors are affected: +.IP 100 +Up to \*(TX 100, the +.code split-str +function had an undocumented behavior. When the +.code sep +argument was an empty string, it split the string into +individual characters as if by calling +.codn list-str . +This behavior changed to the currently +documented behavior starting in \*(TX 101. + .IP 99 Up to \*(TX 99, the substitution of TXR Lisp expressions in .code @(output) -- cgit v1.2.3