From 6cb3edd9d01fe4187d1aa627c937b7f7dffcab7b Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 18 Jul 2023 23:13:06 -0700 Subject: del/replace with index-list: fix semantics. This commit does two things. The replace function, implemented under the hood by four specializations: replace-list, replace-vec, replace-str and replace-buf, will handle the index-list case a little differently. This is needed to fix the ability of the del macro work on place designated by an index list, such as: (del [sequence '(1 3 5 6)] which now deletes elements 1, 3, 5 and 6 from the sequence, and returns a sequence of those items. The underlying implementation uses replace with an index-list, which is now capable of deleting items. Previously, replace would stop processing the index list when the replacement-sequence corresponding to the index list ran out of items. Now, when the replacement-sequence runs out of items, the remaining index-list sequence elements specify items to be deleted. For instance if str holds "abcdefg" then: (set [str '(1 3 5)] "xy") will change str to "axcyeg". Elements 1 and 3 are replaced by x and y, respectively. Element 5, the letter f, is deleted, because the replacement "xy" has no element corresponding to 5. * lib.c (replace_list, replace_str, replace_vec): Implement new deleteion semantics for the case when the replacement sequence runs out of items. * buf.c (replace_buf): Likewise. * tests/010/seq.txr: Some new test cases here for deletion. * tests/010/seq.expected: Updated. * txr.1: Documented new semantics of replace, including a new restriction that if elements are being deleted, the indices should be monotonically increasing regardless of the type of the sequence (not only list). A value of 289 for the -C option documented, which restores the previous behavior of replace (breaking deletion by index-list, unfortunately: you don't always get to simulate an old version of TXR while using new features.) --- buf.c | 32 +++++++++++++++- lib.c | 99 ++++++++++++++++++++++++++++++++++++++++++++------ tests/010/seq.expected | 36 ++++++++++++++++++ tests/010/seq.txr | 57 ++++++++++++++++++++++++++++- txr.1 | 55 +++++++++++++++++++++++----- 5 files changed, 257 insertions(+), 22 deletions(-) diff --git a/buf.c b/buf.c index 8437a41a..e4dec8aa 100644 --- a/buf.c +++ b/buf.c @@ -47,6 +47,7 @@ #include "stream.h" #include "arith.h" #include "utf8.h" +#include "txr.h" #include "buf.h" static cnum buf_check_len(val len, val self) @@ -290,6 +291,8 @@ val replace_buf(val buf, val items, val from, val to) from = len; } else if (!integerp(from)) { seq_iter_t wh_iter, item_iter; + cnum offs = 0; + cnum l = c_num(len, self), ol = l; val wh, item; seq_iter_init(self, &wh_iter, from); seq_iter_init(self, &item_iter, items); @@ -299,12 +302,39 @@ val replace_buf(val buf, val items, val from, val to) lit("~a: to-arg not applicable when from-arg is a list"), self, nao); - while (seq_get(&wh_iter, &wh) && seq_get(&item_iter, &item)) { + while (seq_get(&item_iter, &item) && seq_get(&wh_iter, &wh)) { if (ge(wh, len)) break; buf_put_uchar(buf, wh, item); } + if (!opt_compat || opt_compat > 289) { + while (seq_get(&wh_iter, &wh)) { + cnum w = c_num(wh, self); + + if (w < 0) + w += ol; + + if (w < 0) + break; + + w -= offs; + + if (w >= l) + break; + + memmove(buf->b.data + w, + buf->b.data + w + 1, + l - w - 1); + l--; + offs++; + + } + + if (offs > 0) + buf_set_length(buf, num_fast(l), zero); + } + return buf; } else if (minusp(from)) { from = plus(from, len); diff --git a/lib.c b/lib.c index cf887d4e..1ce03dc2 100644 --- a/lib.c +++ b/lib.c @@ -2603,8 +2603,11 @@ val replace_list(val list, val items, val from, val to) from = nil; } else if (!integerp(from)) { seq_iter_t wh_iter; - val iter = list, idx = zero, item, wh; + cnum ndel = 0; + loc iter = mkcloc(list); + val cons, idx = zero, item, wh; seq_iter_t item_iter; + int compat = opt_compat && opt_compat <= 289; seq_iter_init(self, &item_iter, items); seq_iter_init(self, &wh_iter, from); @@ -2613,20 +2616,30 @@ val replace_list(val list, val items, val from, val to) lit("~a: to-arg not applicable when from-arg is a list"), self, nao); - while (iter && seq_peek(&item_iter, &item) && seq_peek(&wh_iter, &wh)) { + while ((cons = deref(iter)) && seq_peek(&wh_iter, &wh)) { + int have_item = seq_peek(&item_iter, &item); + if (!have_item && compat) + break; if (minusp(wh)) - wh = plus(wh, len ? len : (len = length(list))); + wh = plus(wh, len ? len : (len = plus(length(list), num_fast(ndel)))); if (lt(wh, idx)) { seq_geti(&wh_iter); seq_geti(&item_iter); continue; } else if (eql(wh, idx)) { - rplaca(iter, item); seq_geti(&wh_iter); - seq_geti(&item_iter); + if (have_item) { + rplaca(cons, item); + seq_geti(&item_iter); + } else { + deref(iter) = cdr(cons); + idx = plus(idx, one); + ndel++; + continue; + } } - iter = cdr(iter); + iter = cdr_l(cons); idx = plus(idx, one); } @@ -5688,6 +5701,8 @@ val replace_str(val str_in, val items, val from, val to) from = len; } else if (!integerp(from)) { val wh, item; + cnum offs = 0; + cnum l = c_num(len, self), ol = l; seq_iter_t wh_iter, item_iter; seq_iter_init(self, &item_iter, items); seq_iter_init(self, &wh_iter, from); @@ -5697,12 +5712,37 @@ val replace_str(val str_in, val items, val from, val to) lit("~a: to-arg not applicable when from-arg is a list"), self, nao); - while (seq_get(&wh_iter, &wh) && seq_get(&item_iter, &item)) { + while (seq_get(&item_iter, &item) && seq_get(&wh_iter, &wh)) { if (ge(wh, len)) break; chr_str_set(str_in, wh, item); } + if (!opt_compat || opt_compat > 289) { + while (seq_get(&wh_iter, &wh)) { + cnum w = c_num(wh, self); + + if (w < 0) + w += ol; + + if (w < 0) + break; + + w -= offs; + + if (w >= l) + break; + + wmemmove(str_in->st.str + w, + str_in->st.str + w + 1, + l - w); + l--; + offs++; + + } + if (offs > 0) + set(mkloc(str_in->st.len, str_in), num_fast(l)); + } return str_in; } else if (minusp(from)) { from = plus(from, len); @@ -9636,6 +9676,8 @@ val replace_vec(val vec_in, val items, val from, val to) from = len; } else if (!integerp(from)) { seq_iter_t wh_iter, item_iter; + cnum offs = 0; + cnum l = c_num(len, self), ol = l; val wh, item; seq_iter_init(self, &wh_iter, from); seq_iter_init(self, &item_iter, items); @@ -9645,12 +9687,38 @@ val replace_vec(val vec_in, val items, val from, val to) lit("~a: to-arg not applicable when from-arg is a list"), self, nao); - while (seq_get(&wh_iter, &wh) && seq_get(&item_iter, &item)) { + while (seq_get(&item_iter, &item) && seq_get(&wh_iter, &wh)) { if (ge(wh, len)) break; set(vecref_l(vec_in, wh), item); } + if (!opt_compat || opt_compat > 289) { + while (seq_get(&wh_iter, &wh)) { + cnum w = c_num(wh, self); + + if (w < 0) + w += ol; + + if (w < 0) + break; + + w -= offs; + + if (w >= l) + break; + + memmove(vec_in->v.vec + w, + vec_in->v.vec + w + 1, + (l - w - 1) * sizeof vec_in->v.vec); + l--; + offs++; + } + + if (offs > 0) + vec_set_length(vec_in, num_fast(l)); + } + return vec_in; } else if (minusp(from)) { from = plus(from, len); @@ -13257,9 +13325,18 @@ val dwim_del(val place_p, val seq, val ind_range) break; } - if (rangep(ind_range)) { - return replace(seq, nil, from(ind_range), to(ind_range)); - } else { + switch (type(ind_range)) { + case NIL: + case CONS: + case LCONS: + case VEC: + return replace(seq, nil, ind_range, colon_k); + case RNG: + { + range_bind (x, y, ind_range); + return replace(seq, nil, x, y); + } + default: return replace(seq, nil, ind_range, succ(ind_range)); } } diff --git a/tests/010/seq.expected b/tests/010/seq.expected index 9c4d860d..691e6ac4 100644 --- a/tests/010/seq.expected +++ b/tests/010/seq.expected @@ -14,3 +14,39 @@ exception! 38 37 36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1) #((8 . #\g) (6 . #\f)) #((7 . #\h) (5 . #\e) (4 . #\d) (3 . #\c) (2 . #\b) (1 . #\a)) +"bdf" +"aceg" +"g" +"abcdef" +"abcdefg" +"" +"abcdefg" +"" +"aceg" +"bdf" +(1 3 5) +(0 2 4 6) +(0 1 2 3 4 5 6) +nil +(0 1 2 3 4 5 6) +nil +(0 2 4 6) +(1 3 5) +#(1 3 5) +#(0 2 4 6) +#(0 1 2 3 4 5 6) +#() +#(0 1 2 3 4 5 6) +#() +#(0 2 4 6) +#(1 3 5) +#b'bbddff' +#b'aaccee99' +#b'99' +#b'aabbccddeeff' +#b'aabbccddeeff99' +#b'' +#b'aabbccddeeff99' +#b'' +#b'aaccee99' +#b'bbddff' diff --git a/tests/010/seq.txr b/tests/010/seq.txr index 9b3edf28..18f5c198 100644 --- a/tests/010/seq.txr +++ b/tests/010/seq.txr @@ -17,4 +17,59 @@ (pr [nsort *v* >]) (pr [nsort *v2* > cdr]) (pr [nsort (range 1 100) >]) - (pr2 (del [*v2* 1..3]) *v2*)) + (pr2 (del [*v2* 1..3]) *v2*) + (let ((s (copy "abcdefg"))) + (pr (del [s '(1 3 5)])) + (pr s)) + (let ((s (copy "abcdefg"))) + (pr (del [s '(6)])) + (pr s)) + (let ((s (copy "abcdefg"))) + (pr (del [s '(0 1 2 3 4 5 6)])) + (pr s)) + (let ((s (copy "abcdefg"))) + (pr (del [s '(-7 -6 -5 -4 -3 -2 -1)])) + (pr s)) + (let ((s (copy "abcdefg"))) + (pr (del [s '(-7 -5 -3 -1)])) + (pr s)) + (let ((s (list 0 1 2 3 4 5 6))) + (pr (del [s '(1 3 5)])) + (pr s)) + (let ((s (list 0 1 2 3 4 5 6))) + (pr (del [s '(0 1 2 3 4 5 6)])) + (pr s)) + (let ((s (list 0 1 2 3 4 5 6))) + (pr (del [s '(-7 -6 -5 -4 -3 -2 -1)])) + (pr s)) + (let ((s (list 0 1 2 3 4 5 6))) + (pr (del [s '(-7 -5 -3 -1)])) + (pr s)) + (let ((s (vec 0 1 2 3 4 5 6))) + (pr (del [s '(1 3 5)])) + (pr s)) + (let ((s (vec 0 1 2 3 4 5 6))) + (pr (del [s '(0 1 2 3 4 5 6)])) + (pr s)) + (let ((s (vec 0 1 2 3 4 5 6))) + (pr (del [s '(-7 -6 -5 -4 -3 -2 -1)])) + (pr s)) + (let ((s (vec 0 1 2 3 4 5 6))) + (pr (del [s '(-7 -5 -3 -1)])) + (pr s)) + (let ((s (copy #b'aabbccddeeff99'))) + (pr (del [s '(1 3 5)])) + (pr s)) + (let ((s (copy #b'aabbccddeeff99'))) + (pr (del [s '(6)])) + (pr s)) + (let ((s (copy #b'aabbccddeeff99'))) + (pr (del [s '(0 1 2 3 4 5 6)])) + (pr s)) + (let ((s (copy #b'aabbccddeeff99'))) + (pr (del [s '(-7 -6 -5 -4 -3 -2 -1)])) + (pr s)) + (let ((s (copy #b'aabbccddeeff99'))) + (pr (del [s '(-7 -5 -3 -1)])) + (pr s)) + ) diff --git a/txr.1 b/txr.1 index 7ff8f608..ede8d78e 100644 --- a/txr.1 +++ b/txr.1 @@ -34283,18 +34283,28 @@ given by with their counterparts from .metn replacement-sequence . -This form of the replace function does not insert -or delete; it simply overwrites elements. If +If .meta replacement-sequence -and +has at least as many elements as are indicated in +.metn index-list , +then the indicated elements of +.meta sequence +are overwritten with successive elements from +.metn replacement-sequence . +If +.meta replacement-sequence +contains fewer elements than +.metn index-list , +then the excess elements indicated in .meta index-list -are of different lengths, then the shorter of the two determines -the maximum number of elements which are overwritten. +which have no counterparts in the +.meta replacement-sequence +are deleted. Whenever a negative value occurs in .meta index-list -the length of +the original length of .meta sequence -is added to that value. +(before any deletions) is added to that value. Furthermore, similar restrictions apply on .meta index-list as under the @@ -34305,11 +34315,12 @@ is encountered which is out of range for .metn sequence . furthermore, if .meta sequence -is a list, then +is a list, or if any deletions take place, then .meta index-list must be monotonically increasing, after consideration of the -displacement of negative values. +displacement of negative values, or else the behavior +is unspecified. If .meta replacement-sequence @@ -91822,6 +91833,32 @@ of these version values, the described behaviors are provided if is given an argument which is equal or lower. For instance .code "-C 103" selects the behaviors described below for version 105, but not those for 102. +.IP 289 +Until \*(TX 289, the +.code replace +function had different semantics in the handling of the +.meta index-list +and +.metn replacement-sequence . +When the +.meta index-list +contained more indices than elements of +.meta replacement-sequence +then the replacement of elemenets in the main sequence would stop. +No deletion of elements was performed. This behavior is restored by +selecting 289 or lower compatibility. Note, however, that this breaks +the ability of the +.code del +macro to delete items from a sequence by +.metn index-list . +The +.code del +macro could do that in version 289 or older, and the behavior +That behavior didn't work in version 289 or older, and is supported +by the new semantics of +.metn replace , +which is capable of deleting items specified by +.metn index-list . .IP 288 Integers and ranges callable like functions are a new feature introduced after \*(TX 288. The latter, callable ranges, are a breaking change; -- cgit v1.2.3