diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-05-29 06:37:10 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-05-29 06:37:10 -0700 |
commit | b01dcd8533183810adec7bd52edab4868a521436 (patch) | |
tree | 4714cece7e69bb09f3fa94279f460f190ed46a7e /buf.c | |
parent | 0ee7c2b049f3e2764f9c27dcb4f849617ae52d73 (diff) | |
download | txr-b01dcd8533183810adec7bd52edab4868a521436.tar.gz txr-b01dcd8533183810adec7bd52edab4868a521436.tar.bz2 txr-b01dcd8533183810adec7bd52edab4868a521436.zip |
buf: stream: switch approach for unget_char.
We switch to the method used in string streams for
ungetting characters, whereby we UTF-8 encode the pushed
back character and push back the subsequent bytes,
thereby unifying character and byte pushback.
* buf.c (struct buf_strm): Remove member unget_c.
(buf_strm_mark): Remove reference to unget_c.
(strm_get_char): Remove code for obtaining previously
pushed back character from s->unget_c stack.
(buf_strm_unget_char): Rewrite using the approach
of using utf8_encode to write the multi-byte representation
of the character into utf8_tiny_buf, and then
pushing back the bytes.
(make_buf_stream): Don't initialize removed unget_c.
* tests/018/streams.tl: New tests.
Diffstat (limited to 'buf.c')
-rw-r--r-- | buf.c | 48 |
1 files changed, 30 insertions, 18 deletions
@@ -1086,7 +1086,6 @@ struct buf_strm { int is_byte_oriented; val buf; val pos; - val unget_c; }; static void buf_strm_mark(val stream) @@ -1095,7 +1094,6 @@ static void buf_strm_mark(val stream) strm_base_mark(&b->a); gc_mark(b->buf); gc_mark(b->pos); - gc_mark(b->unget_c); } static int buf_strm_put_byte_callback(int b, mem_t *ctx) @@ -1147,23 +1145,18 @@ static int buf_strm_get_byte_callback(mem_t *ctx) static val buf_strm_get_char(val stream) { struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle); + wint_t ch; - if (s->unget_c) { - return rcyc_pop(&s->unget_c); + if (s->is_byte_oriented) { + ch = buf_strm_get_byte_callback(coerce(mem_t *, s)); + if (ch == 0) + ch = 0xDC00; } else { - wint_t ch; - - if (s->is_byte_oriented) { - ch = buf_strm_get_byte_callback(coerce(mem_t *, s)); - if (ch == 0) - ch = 0xDC00; - } else { - ch = utf8_decode(&s->ud, buf_strm_get_byte_callback, - coerce(mem_t *, s)); - } - - return (ch != WEOF) ? chr(ch) : nil; + ch = utf8_decode(&s->ud, buf_strm_get_byte_callback, + coerce(mem_t *, s)); } + + return (ch != WEOF) ? chr(ch) : nil; } static val buf_strm_get_byte(val stream) @@ -1175,8 +1168,28 @@ static val buf_strm_get_byte(val stream) static val buf_strm_unget_char(val stream, val ch) { + val self = lit("unget-char"); struct buf_strm *s = coerce(struct buf_strm *, stream->co.handle); - mpush(ch, mkloc(s->unget_c, stream)); + struct buf *b = us_buf_handle(s->buf); + struct utf8_tiny_buf bu; + unsigned char *bend = bu.buf + sizeof bu.buf; + ucnum index = c_unum(s->pos, self); + + bu.ptr = bend; + + (void) utf8_encode(c_chr(ch), utf8_tiny_buf_putc, coerce(mem_t *, &bu)); + + if (convert(size_t, bend - bu.ptr) > index) { + uw_throwf(file_error_s, + lit("~a: cannot push back past start of stream ~s"), + stream, self, nao); + } + + while (bu.ptr < bend) + b->data[--index] = *bu.ptr++; + + s->pos = unum(index); + return ch; } @@ -1367,7 +1380,6 @@ val make_buf_stream(val buf_opt) s->buf = nil; s->pos = zero; s->is_byte_oriented = 0; - s->unget_c = nil; stream = cobj(coerce(mem_t *, s), stream_cls, &buf_strm_ops.cobj_ops); s->buf = buf; |