summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--stream.c61
-rw-r--r--tests/018/streams.tl13
-rw-r--r--txr.141
3 files changed, 109 insertions, 6 deletions
diff --git a/stream.c b/stream.c
index 9f358a3c..6459ffdf 100644
--- a/stream.c
+++ b/stream.c
@@ -2081,6 +2081,12 @@ struct byte_input {
unsigned char *buf;
size_t size;
size_t index;
+ utf8_decoder_t ud;
+};
+
+struct byte_input_ungetch {
+ unsigned char buf[8];
+ unsigned char *ptr;
};
static void byte_in_stream_destroy(val stream)
@@ -2092,6 +2098,53 @@ static void byte_in_stream_destroy(val stream)
free(bi);
}
+static int byte_in_get_char_callback(mem_t *ctx)
+{
+ struct byte_input *bi = coerce(struct byte_input *, ctx);
+ return (bi->index < bi->size) ? bi->buf[bi->index++] : EOF;
+}
+
+static val byte_in_get_char(val stream)
+{
+ struct byte_input *bi = coerce(struct byte_input *, stream->co.handle);
+ wint_t wch = utf8_decode(&bi->ud, byte_in_get_char_callback,
+ coerce(mem_t *, bi));
+ int ch;
+
+ while ((ch = utf8_getc(&bi->ud)) != EOF)
+ if (bi->index > 0)
+ bi->buf[--bi->index] = ch;
+
+ return (wch != WEOF) ? chr(wch) : nil;
+}
+
+static int byte_in_unget_char_callback(int ch, mem_t *ctx)
+{
+ struct byte_input_ungetch *bu = coerce(struct byte_input_ungetch *, ctx);
+ return (bu->ptr > bu->buf) ? *--bu->ptr = ch, 1 : 0;
+}
+
+static val byte_in_unget_char(val stream, val ch)
+{
+ struct byte_input *bi = coerce(struct byte_input *, stream->co.handle);
+ struct byte_input_ungetch bu;
+ unsigned char *bend = bu.buf + sizeof bu.buf;
+
+ bu.ptr = bend;
+
+ (void) utf8_encode(c_chr(ch), byte_in_unget_char_callback, coerce(mem_t *, &bu));
+
+ if (convert(size_t, bend - bu.ptr) > bi->index)
+ uw_throwf(file_error_s,
+ lit("unget-char: cannot push past beginning of byte stream"),
+ nao);
+
+ while (bu.ptr < bend)
+ bi->buf[--bi->index] = *bu.ptr++;
+
+ return ch;
+}
+
static val byte_in_get_byte(val stream)
{
struct byte_input *bi = coerce(struct byte_input *, stream->co.handle);
@@ -2133,10 +2186,13 @@ static struct strm_ops byte_in_ops =
cobj_eq_hash_op,
0),
wli("byte-input-stream"),
- 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ byte_in_get_char,
byte_in_get_byte,
+ byte_in_unget_char,
+ byte_in_unget_byte,
0,
- byte_in_unget_byte, 0, 0,
+ generic_fill_buf,
0, 0, 0, 0, 0, 0,
byte_in_get_error,
byte_in_get_error_str,
@@ -2153,6 +2209,7 @@ val make_string_byte_input_stream(val string)
strm_base_init(&bi->a);
bi->buf = utf8_dup_to_buf(wstring, &bi->size, 0);
bi->index = 0;
+ utf8_decoder_init(&bi->ud);
return cobj(coerce(mem_t *, bi), stream_cls, &byte_in_ops.cobj_ops);
}
}
diff --git a/tests/018/streams.tl b/tests/018/streams.tl
index 12157e48..1350131f 100644
--- a/tests/018/streams.tl
+++ b/tests/018/streams.tl
@@ -59,3 +59,16 @@
(unget-byte #x81) #x81
(unget-byte #xe3) #xe3
(get-char) #\x3042)
+
+(with-in-string-byte-stream (s "ABCD")
+ (mtest
+ (unget-byte 3 s) :error
+ (get-char s) #\A
+ (get-byte s) 66
+ (get-char s) #\C
+ (unget-char #\x3042 s) #\x3042
+ (get-char s) #\x3042
+ (unget-char #\x3042 s) #\x3042
+ (get-byte s) #xe3
+ (get-char s) #\xdc81
+ (unget-char #\x3042 s) :error))
diff --git a/txr.1 b/txr.1
index e1cab5a0..a58a28b2 100644
--- a/txr.1
+++ b/txr.1
@@ -66882,11 +66882,20 @@ Output operations and byte operations are not supported.
.desc
The
.code make-string-byte-input-stream
-function produces an input stream object. Byte read operations on
-this stream object read successive byte values obtained by encoding
+function produces an input stream object. The
+.code get-byte
+operations on this stream type reads successive byte values obtained by
+encoding
.meta string
-into UTF-8. Character read operations are not supported, and neither
-are output operations.
+into UTF-8. The
+.code get-char
+operation is also supported, via decoding UTF-8 characters
+from the byte sequence. Both
+.code unget-byte
+and
+.code unget-char
+are supported.
+Output and positioning operations are not supported.
.coNP Function @ make-strlist-input-stream
.synb
@@ -67490,6 +67499,30 @@ as UTF-8. After pushed back bytes are exhausted, bytes are
read from the buffered stream in the usual way.
.RE
+String byte input streams, created by
+.codn make-string-byte-input-stream ,
+make the following guarantees:
+.RS
+.IP 1.
+Only as many bytes may be pushed back as have been read
+from the stream.
+.IP 2.
+When a character is pushed back, it is converted to a UTF-8
+byte sequence. The byte sequence is then pushed back, in
+reverse order, so that a subsequent
+.code get-char
+operator can recover the same character via UTF-8 decoding.
+If there isn't room to push back all of the bytes,
+an exception of type
+.code file-error
+is thrown thrown.
+.IP 3.
+Since push-back is byte-based, rather than a separate mechanism
+for characters and bytes, as in the case of file streams,
+mixtures of getting and ungetting bytes and characters
+are supported.
+.RE
+
.coNP Functions @, put-string @, put-line @ put-char and @ put-byte
.synb
.mets (put-string < string <> [ stream ])