summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-05-29 06:37:10 -0700
committerKaz Kylheku <kaz@kylheku.com>2025-05-29 06:37:10 -0700
commitb01dcd8533183810adec7bd52edab4868a521436 (patch)
tree4714cece7e69bb09f3fa94279f460f190ed46a7e
parent0ee7c2b049f3e2764f9c27dcb4f849617ae52d73 (diff)
downloadtxr-b01dcd8533183810adec7bd52edab4868a521436.tar.gz
txr-b01dcd8533183810adec7bd52edab4868a521436.tar.bz2
txr-b01dcd8533183810adec7bd52edab4868a521436.zip
buf: stream: switch approach for unget_char.
We switch to the method used in string streams for ungetting characters, whereby we UTF-8 encode the pushed back character and push back the subsequent bytes, thereby unifying character and byte pushback. * buf.c (struct buf_strm): Remove member unget_c. (buf_strm_mark): Remove reference to unget_c. (strm_get_char): Remove code for obtaining previously pushed back character from s->unget_c stack. (buf_strm_unget_char): Rewrite using the approach of using utf8_encode to write the multi-byte representation of the character into utf8_tiny_buf, and then pushing back the bytes. (make_buf_stream): Don't initialize removed unget_c. * tests/018/streams.tl: New tests.
-rw-r--r--buf.c48
-rw-r--r--tests/018/streams.tl17
2 files changed, 47 insertions, 18 deletions
diff --git a/buf.c b/buf.c
index 360d5bec..d5782dc2 100644
--- a/buf.c
+++ b/buf.c
@@ -1086,7 +1086,6 @@ struct buf_strm {
int is_byte_oriented;
val buf;
val pos;
- val unget_c;
};
static void buf_strm_mark(val stream)
@@ -1095,7 +1094,6 @@ static void buf_strm_mark(val stream)
strm_base_mark(&b->a);
gc_mark(b->buf);
gc_mark(b->pos);
- gc_mark(b->unget_c);
}
static int buf_strm_put_byte_callback(int b, mem_t *ctx)
@@ -1147,23 +1145,18 @@ static int buf_strm_get_byte_callback(mem_t *ctx)
static val buf_strm_get_char(val stream)
{
struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle);
+ wint_t ch;
- if (s->unget_c) {
- return rcyc_pop(&s->unget_c);
+ if (s->is_byte_oriented) {
+ ch = buf_strm_get_byte_callback(coerce(mem_t *, s));
+ if (ch == 0)
+ ch = 0xDC00;
} else {
- wint_t ch;
-
- if (s->is_byte_oriented) {
- ch = buf_strm_get_byte_callback(coerce(mem_t *, s));
- if (ch == 0)
- ch = 0xDC00;
- } else {
- ch = utf8_decode(&s->ud, buf_strm_get_byte_callback,
- coerce(mem_t *, s));
- }
-
- return (ch != WEOF) ? chr(ch) : nil;
+ ch = utf8_decode(&s->ud, buf_strm_get_byte_callback,
+ coerce(mem_t *, s));
}
+
+ return (ch != WEOF) ? chr(ch) : nil;
}
static val buf_strm_get_byte(val stream)
@@ -1175,8 +1168,28 @@ static val buf_strm_get_byte(val stream)
static val buf_strm_unget_char(val stream, val ch)
{
+ val self = lit("unget-char");
struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle);
- mpush(ch, mkloc(s->unget_c, stream));
+ struct buf *b = us_buf_handle(s->buf);
+ struct utf8_tiny_buf bu;
+ unsigned char *bend = bu.buf + sizeof bu.buf;
+ ucnum index = c_unum(s->pos, self);
+
+ bu.ptr = bend;
+
+ (void) utf8_encode(c_chr(ch), utf8_tiny_buf_putc, coerce(mem_t *, &bu));
+
+ if (convert(size_t, bend - bu.ptr) > index) {
+ uw_throwf(file_error_s,
+ lit("~a: cannot push back past start of stream ~s"),
+ stream, self, nao);
+ }
+
+ while (bu.ptr < bend)
+ b->data[--index] = *bu.ptr++;
+
+ s->pos = unum(index);
+
return ch;
}
@@ -1367,7 +1380,6 @@ val make_buf_stream(val buf_opt)
s->buf = nil;
s->pos = zero;
s->is_byte_oriented = 0;
- s->unget_c = nil;
stream = cobj(coerce(mem_t *, s), stream_cls, &buf_strm_ops.cobj_ops);
s->buf = buf;
diff --git a/tests/018/streams.tl b/tests/018/streams.tl
index f82bc06e..84f1b89b 100644
--- a/tests/018/streams.tl
+++ b/tests/018/streams.tl
@@ -249,3 +249,20 @@
(seek-stream s -3 :from-end) t
(fill-buf-adjust b 0 s) 3
b #b'DDEEFF')))
+
+(with-in-buf-stream (s (buf-str "ABCD"))
+ (mtest
+ (seek-stream s 0 :from-start) t
+ (unget-byte 3 s) :error
+ (get-char s) #\A
+ (get-byte s) 66
+ (get-char s) #\C
+ (seek-stream s 0 :from-current) 3
+ (unget-char #\x3042 s) #\x3042
+ (seek-stream s 0 :from-current) 0
+ (get-char s) #\x3042
+ (unget-char #\x3042 s) #\x3042
+ (get-byte s) #xe3
+ (get-char s) #\xdc81
+ (seek-stream s 0 :from-current) 2
+ (unget-char #\x3042 s) :error))