summaryrefslogtreecommitdiffstats
path: root/buf.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-05-29 06:37:10 -0700
committerKaz Kylheku <kaz@kylheku.com>2025-05-29 06:37:10 -0700
commitb01dcd8533183810adec7bd52edab4868a521436 (patch)
tree4714cece7e69bb09f3fa94279f460f190ed46a7e /buf.c
parent0ee7c2b049f3e2764f9c27dcb4f849617ae52d73 (diff)
downloadtxr-b01dcd8533183810adec7bd52edab4868a521436.tar.gz
txr-b01dcd8533183810adec7bd52edab4868a521436.tar.bz2
txr-b01dcd8533183810adec7bd52edab4868a521436.zip
buf: stream: switch approach for unget_char.
We switch to the method used in string streams for ungetting characters, whereby we UTF-8 encode the pushed back character and push back the subsequent bytes, thereby unifying character and byte pushback. * buf.c (struct buf_strm): Remove member unget_c. (buf_strm_mark): Remove reference to unget_c. (strm_get_char): Remove code for obtaining previously pushed back character from s->unget_c stack. (buf_strm_unget_char): Rewrite using the approach of using utf8_encode to write the multi-byte representation of the character into utf8_tiny_buf, and then pushing back the bytes. (make_buf_stream): Don't initialize removed unget_c. * tests/018/streams.tl: New tests.
Diffstat (limited to 'buf.c')
-rw-r--r--buf.c48
1 files changed, 30 insertions, 18 deletions
diff --git a/buf.c b/buf.c
index 360d5bec..d5782dc2 100644
--- a/buf.c
+++ b/buf.c
@@ -1086,7 +1086,6 @@ struct buf_strm {
int is_byte_oriented;
val buf;
val pos;
- val unget_c;
};
static void buf_strm_mark(val stream)
@@ -1095,7 +1094,6 @@ static void buf_strm_mark(val stream)
strm_base_mark(&b->a);
gc_mark(b->buf);
gc_mark(b->pos);
- gc_mark(b->unget_c);
}
static int buf_strm_put_byte_callback(int b, mem_t *ctx)
@@ -1147,23 +1145,18 @@ static int buf_strm_get_byte_callback(mem_t *ctx)
static val buf_strm_get_char(val stream)
{
struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle);
+ wint_t ch;
- if (s->unget_c) {
- return rcyc_pop(&s->unget_c);
+ if (s->is_byte_oriented) {
+ ch = buf_strm_get_byte_callback(coerce(mem_t *, s));
+ if (ch == 0)
+ ch = 0xDC00;
} else {
- wint_t ch;
-
- if (s->is_byte_oriented) {
- ch = buf_strm_get_byte_callback(coerce(mem_t *, s));
- if (ch == 0)
- ch = 0xDC00;
- } else {
- ch = utf8_decode(&s->ud, buf_strm_get_byte_callback,
- coerce(mem_t *, s));
- }
-
- return (ch != WEOF) ? chr(ch) : nil;
+ ch = utf8_decode(&s->ud, buf_strm_get_byte_callback,
+ coerce(mem_t *, s));
}
+
+ return (ch != WEOF) ? chr(ch) : nil;
}
static val buf_strm_get_byte(val stream)
@@ -1175,8 +1168,28 @@ static val buf_strm_get_byte(val stream)
static val buf_strm_unget_char(val stream, val ch)
{
+ val self = lit("unget-char");
struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle);
- mpush(ch, mkloc(s->unget_c, stream));
+ struct buf *b = us_buf_handle(s->buf);
+ struct utf8_tiny_buf bu;
+ unsigned char *bend = bu.buf + sizeof bu.buf;
+ ucnum index = c_unum(s->pos, self);
+
+ bu.ptr = bend;
+
+ (void) utf8_encode(c_chr(ch), utf8_tiny_buf_putc, coerce(mem_t *, &bu));
+
+ if (convert(size_t, bend - bu.ptr) > index) {
+ uw_throwf(file_error_s,
+ lit("~a: cannot push back past start of stream ~s"),
+ stream, self, nao);
+ }
+
+ while (bu.ptr < bend)
+ b->data[--index] = *bu.ptr++;
+
+ s->pos = unum(index);
+
return ch;
}
@@ -1367,7 +1380,6 @@ val make_buf_stream(val buf_opt)
s->buf = nil;
s->pos = zero;
s->is_byte_oriented = 0;
- s->unget_c = nil;
stream = cobj(coerce(mem_t *, s), stream_cls, &buf_strm_ops.cobj_ops);
s->buf = buf;