summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-05-25 20:36:04 -0700
committerKaz Kylheku <kaz@kylheku.com>2025-05-25 20:36:04 -0700
commitb0318e9910c09086a6249fdaf9251f8587a8ecd1 (patch)
tree9f2f0286ec2ded72eb0b43bd137ca90ac1ef429e
parent401d37621c19f6848ece1531db687c0ef1770deb (diff)
downloadtxr-b0318e9910c09086a6249fdaf9251f8587a8ecd1.tar.gz
txr-b0318e9910c09086a6249fdaf9251f8587a8ecd1.tar.bz2
txr-b0318e9910c09086a6249fdaf9251f8587a8ecd1.zip
streams: seek and truncate ops for string streams.
This patch makes seek-stream and truncate-stream work for string output streams, and seek-stream for string input streams. * stream.c (string_in_seek): New static function. (string_in_ops): Wire seek operation to string_in_seek. (struct string out): New member, len. Keeps track of the length of the string, so that fill can be freely positioned. (string_out_put_string): We can no longer add the string with a null terminator, because the put operation could be happening at any position. We only add the null terminator when we are writing the data at the end. This function also now supports buffer extension: the seek operation can seek beyond the current string. The seek operation then calls string_out_put_string with a null string. This function then grows th buffer as needed. In that case there is a need to fill the space with space characters. (string_out_truncate, string_out_seek): New static functions. (string_out_ops): Wire in string_out_seek and string_out_truncate. (make_string_output_stream); Initialize new so->len member to zero. (get_string_from_stream_common): New function, renamed from get_string_from_stream, and taking a parameter to optionally request non-destructive readout. (stream_init): Update registration of get-string-from-stream to get_string_from_stream_common. * stream.h (get_string_from_stream_common): Declared. (get_string_from_stream): Becomes inline function which calls get_string_from_stream, defaulting the argument. Why I didn't add the argument to get_string_from_stream is not to have to edit numerous calls to get_string_to_stream throughout the code base. * tests/018/streams.tl: New tests. * txr.1: Documentation updated to correct text claiming that string streams don't support truncate-stream and seek-stream, and describe the support in detal.
-rw-r--r--stream.c155
-rw-r--r--stream.h6
-rw-r--r--tests/018/streams.tl55
-rw-r--r--txr.172
4 files changed, 260 insertions, 28 deletions
diff --git a/stream.c b/stream.c
index cdcf35b1..03269dd6 100644
--- a/stream.c
+++ b/stream.c
@@ -2067,6 +2067,40 @@ static val string_in_unget_char(val stream, val ch)
return ch;
}
+static val string_in_seek(val stream, val off, enum strm_whence whence)
+{
+ struct string_in *s = coerce(struct string_in *, stream->co.handle);
+ val self = lit("seek-stream");
+ val pos, len = nil;
+
+ if (off == zero && whence == strm_cur) {
+ return s->pos;
+ } else switch (whence) {
+ case strm_start: default:
+ pos = off;
+ break;
+ case strm_cur:
+ pos = plus(s->pos, off);
+ break;
+ case strm_end:
+ pos = plus(len = length_str(s->string), off);
+ break;
+ }
+
+ if (minusp(pos))
+ uw_throwf(file_error_s,
+ lit("~a: ~s: negative position ~s disallowed"),
+ self, stream, pos, nao);
+
+ if (gt(pos, len ? len : length_str(s->string)))
+ uw_throwf(file_error_s,
+ lit("~a: ~s: position ~s lies beyond string length ~s"),
+ self, stream, pos, len, nao);
+
+ s->pos = pos;
+ return t;
+}
+
static val string_in_get_prop(val stream, val ind)
{
if (ind == name_k) {
@@ -2104,8 +2138,8 @@ static struct strm_ops string_in_ops =
0,
string_in_unget_char,
0, 0, 0, 0, 0,
- 0, /* TODO: seek */
- 0, /* TODO: truncate */
+ string_in_seek,
+ 0,
string_in_get_prop,
0,
string_in_get_error,
@@ -2419,8 +2453,7 @@ val make_strlist_input_stream(val list)
struct string_out {
struct strm_base a;
wchar_t *buf;
- size_t size;
- size_t fill;
+ size_t size, len, fill;
unsigned char byte_buf[4];
int head, tail;
};
@@ -2501,8 +2534,14 @@ static val string_out_put_string(val stream, val str)
so->buf = coerce(wchar_t *, chk_grow_vec(coerce(mem_t *, so->buf),
old_size, so->size,
sizeof *so->buf));
- wmemcpy(so->buf + so->fill, s, len + 1);
- so->fill += len;
+ wmemcpy(so->buf + so->fill, s, len);
+
+ if (so->fill > so->len)
+ wmemset(so->buf + so->len, ' ', so->fill - so->len);
+
+ if ((so->fill += len) > so->len)
+ so->buf[so->len = so->fill] = 0;
+
return t;
oflow:
uw_throwf(error_s, lit("~a: string output stream overflow"), self, nao);
@@ -2532,6 +2571,76 @@ static val string_out_put_byte(val stream, int ch)
return t;
}
+static val string_out_truncate(val stream, val len)
+{
+ struct string_out *so = coerce(struct string_out *, stream->co.handle);
+ val self = lit("truncate-stream");
+
+ if (so->buf == 0)
+ string_out_extracted_error(stream);
+
+ if (minusp(len))
+ uw_throwf(file_error_s,
+ lit("~a: ~s: negative length ~s specified"),
+ self, stream, len, nao);
+
+ while (so->head != so->tail)
+ string_out_byte_flush(so, stream);
+
+ {
+ size_t newlen = c_unum(len, self);
+
+ if (newlen > so->len) {
+ size_t fs = so->fill;
+ so->fill = newlen;
+ string_out_put_string(stream, null_string);
+ so->fill = fs;
+ } else {
+ so->len = newlen;
+ so->buf[newlen] = 0;
+ if (so->fill > newlen)
+ so->fill = newlen;
+ }
+ }
+
+ return t;
+}
+
+static val string_out_seek(val stream, val off, enum strm_whence whence)
+{
+ struct string_out *so = coerce(struct string_out *, stream->co.handle);
+ val self = lit("seek-stream");
+ val pos, len = nil;
+
+ while (so->head != so->tail)
+ string_out_byte_flush(so, stream);
+
+ pos = unum(so->fill);
+
+ if (off == zero && whence == strm_cur) {
+ return pos;
+ } else switch (whence) {
+ case strm_start:
+ pos = off;
+ break;
+ case strm_cur:
+ pos = plus(pos, off);
+ break;
+ case strm_end:
+ pos = plus(len = unum(so->len), off);
+ break;
+ }
+
+ if (minusp(pos))
+ uw_throwf(file_error_s,
+ lit("~a: ~s: negative position ~s disallowed"),
+ self, stream, pos, nao);
+
+ so->fill = c_unum(pos, self);
+ string_out_put_string(stream, null_string);
+ return t;
+}
+
static struct strm_ops string_out_ops =
strm_ops_init(cobj_ops_init(eq,
stream_print_op,
@@ -2544,8 +2653,8 @@ static struct strm_ops string_out_ops =
string_out_put_char,
string_out_put_byte,
0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, /* TODO: seek; fill-with-spaces semantics if past end. */
- 0,
+ string_out_seek,
+ string_out_truncate,
0, 0, 0, 0, 0, 0);
val make_string_output_stream(void)
@@ -2554,33 +2663,43 @@ val make_string_output_stream(void)
strm_base_init(&so->a);
so->size = 128;
so->buf = chk_wmalloc(so->size);
- so->fill = 0;
+ so->fill = so->len = 0;
so->buf[0] = 0;
so->head = so->tail = 0;
return cobj(coerce(mem_t *, so), stream_cls, &string_out_ops.cobj_ops);
}
-val get_string_from_stream(val stream)
+val get_string_from_stream_common(val stream, val copy_p_in)
{
val self = lit("get-string-from-stream");
+ val copy_p = default_null_arg(copy_p_in);
struct string_out *so = coerce(struct string_out *,
cobj_handle(self, stream, stream_cls));
if (stream->co.ops == &string_out_ops.cobj_ops) {
val out = nil;
- wchar_t *buf;
+ wchar_t *buf = so->buf;
+ size_t waste = so->size - so->len;
- if (!so->buf)
+ if (!buf)
return out;
while (so->head != so->tail)
out = string_out_byte_flush(so, stream);
- /* Trim to actual size */
- buf = coerce(wchar_t *, chk_realloc(coerce(mem_t *, so->buf),
- (so->fill + 1) * sizeof *so->buf));
- so->buf = 0;
- out = string_own(buf);
+ if (copy_p) {
+ out = string(buf);
+ } else {
+ /* Trim to actual size if waste is more than 25% of the
+ * actual size, and at least 128 chars
+ */
+ if (waste >= 128 && so->size - so->len > so->len / 4)
+ buf = coerce(wchar_t *, chk_realloc(coerce(mem_t *, so->buf),
+ (so->len + 1) * sizeof *so->buf));
+ out = string_own(buf);
+ so->buf = 0;
+ }
+
return out;
} else {
type_assert (stream->co.ops == &string_in_ops.cobj_ops,
@@ -5897,7 +6016,7 @@ void stream_init(void)
reg_fun(intern(lit("make-string-byte-input-stream"), user_package), func_n1(make_string_byte_input_stream));
reg_fun(intern(lit("make-strlist-input-stream"), user_package), func_n1(make_strlist_input_stream));
reg_fun(intern(lit("make-string-output-stream"), user_package), func_n0(make_string_output_stream));
- reg_fun(intern(lit("get-string-from-stream"), user_package), func_n1(get_string_from_stream));
+ reg_fun(intern(lit("get-string-from-stream"), user_package), func_n2o(get_string_from_stream_common, 1));
reg_fun(intern(lit("make-strlist-output-stream"), user_package), func_n0(make_strlist_output_stream));
reg_fun(intern(lit("get-list-from-stream"), user_package), func_n1(get_list_from_stream));
reg_fun(intern(lit("make-byte-input-stream"), user_package), func_n1(make_byte_input_stream));
diff --git a/stream.h b/stream.h
index 0b413776..d8cb61f1 100644
--- a/stream.h
+++ b/stream.h
@@ -220,7 +220,11 @@ val make_string_input_stream(val);
val make_string_byte_input_stream(val);
val make_strlist_input_stream(val);
val make_string_output_stream(void);
-val get_string_from_stream(val);
+val get_string_from_stream_common(val, val copy_p_in);
+INLINE val get_string_from_stream(val stream)
+{
+ return get_string_from_stream_common(stream, nil);
+}
val make_strlist_output_stream(void);
val get_list_from_stream(val);
val record_adapter(val regex, val stream, val include_match);
diff --git a/tests/018/streams.tl b/tests/018/streams.tl
index 1abde546..9475d4f7 100644
--- a/tests/018/streams.tl
+++ b/tests/018/streams.tl
@@ -139,3 +139,58 @@
(mtest
(get-delimited-string s : #\a) ""
(get-string s) "bcd:"))
+
+(with-in-string-stream (s "abcde")
+ (mtest
+ (truncate-stream s 3) :error
+ (seek-stream s 10 :from-start) :error
+ (seek-stream s 6 :from-start) :error
+ (seek-stream s 5 :from-start) t
+ (get-char s) nil
+ (seek-stream s 3 :from-start) t
+ (get-char s) #\d
+ (seek-stream s 0 :from-start) t
+ (seek-stream s 0 :from-current) 0
+ (get-char s) #\a
+ (seek-stream s 0 :from-current) 1
+ (seek-stream s -1 :from-start) :error
+ (get-char s) #\b
+ (seek-stream s 0 :from-current) 2
+ (seek-stream s -2 :from-current) t
+ (seek-stream s 0 :from-current) 0
+ (get-char s) #\a
+ (seek-stream s -2 :from-current) :error
+ (seek-stream s 6 :from-current) :error
+ (seek-stream s -1 :from-end) t
+ (seek-stream s 6 :from-end) :error
+ (seek-stream s 1 :from-end) :error
+ (get-char s) #\e))
+
+(with-out-string-stream (s)
+ (mtest
+ (truncate-stream s 8) t
+ (get-string-from-stream s t) " "
+ (truncate-stream s 5) t
+ (get-string-from-stream s t) " "
+ (seek-stream s 8 :from-start) t
+ (get-string-from-stream s t) " "
+ (seek-stream s -1 :from-start) :error
+ (seek-stream s -10 :from-end) :error
+ (seek-stream s 6 :from-start) t
+ (put-char #\6 s) t
+ (get-string-from-stream s t) " 6 "
+ (seek-stream s -8 :from-end) t
+ (put-char #\0 s) t
+ (get-string-from-stream s t) "0 6 "
+ (seek-stream s 0 :from-current) 1
+ (seek-stream s -1 :from-current) t
+ (put-char #\A s) t
+ (get-string-from-stream s t) "A 6 "
+ (put-string "BCDEF" s) t
+ (get-string-from-stream s t) "ABCDEF6 "
+ (put-string "GHI" s) t
+ (get-string-from-stream s) "ABCDEFGHI"
+ (get-string-from-stream s) nil
+ (truncate-stream s 2) :error
+ (seek-stream s 0 :from-current) :error
+ (put-string "X" s) :error))
diff --git a/txr.1 b/txr.1
index 9e079280..0f9ff70b 100644
--- a/txr.1
+++ b/txr.1
@@ -66873,7 +66873,17 @@ The
function produces an input stream object. Character read operations on the
stream object read successive characters from
.metn string .
-Output operations and byte operations are not supported.
+
+Output operations and byte input operations are not supported.
+The
+.code truncate-stream
+operation is likewise not supported by string input streams.
+
+String input streams support the
+.code seek-stream
+operation. Seeking to a position beyond the length of
+.metn string ,
+or to a negative position, is not permitted and throws an exception.
.coNP Function @ make-string-byte-input-stream
.synb
@@ -66922,14 +66932,28 @@ String output streams support both character and byte output operations.
Bytes are assumed to represent a UTF-8 encoding, and are decoded in order
to form characters which are stored into the string.
+String output streams support the
+.code seek-stream
+and
+.code truncate-stream
+operations. Seeking to a position beyond the current length of
+the accumulated string causes the string to be extended with
+space characters. Likewise, truncating to a length beyond the
+current length causes the same extension.
+Truncation to a length which lies below the current position causes the
+position to be moved to the end of the shortened string, so that subsequent
+output will add characters to the string.
+
If an incomplete UTF-8 code is output, and a character output operation then
-takes place, that code is assumed to be terminated and is decoded as invalid
-bytes. The UTF-8 decoding machine is reset and ready for the start of a new
-code.
+takes place, or a truncation or repositioning operation takes place, that code
+is assumed to be terminated and is decoded as invalid bytes. The UTF-8
+decoding machine is reset and ready for the start of a new code.
The
.code get-string-from-stream
-function is used to retrieve the accumulated string.
+function is used to retrieve the accumulated string. Depending on
+how that function is invoked, the accumulated string may be removed,
+such that the stream can no longer be manipulated.
If the null character is written to a string output stream, the behavior
is unspecified. \*(TX strings cannot contain null bytes. The pseudo-null
@@ -66942,13 +66966,14 @@ effective internal representation of the null character in external data.
.coNP Function @ get-string-from-stream
.synb
-.mets (get-string-from-stream << stream )
+.mets (get-string-from-stream < stream <> [ copy-p ])
.syne
.desc
The
.meta stream
-argument must be a string output stream. This function finalizes
-the data sent to the stream and retrieves the accumulated character string.
+argument must be a string output stream. The function retrieves
+the data accumulated in the string output stream, returning it
+as a string object.
If a partial UTF-8 code has been written to
.metn stream ,
@@ -66956,7 +66981,36 @@ and then this
function is called, the byte stream is considered complete and the partial
code is decoded as invalid bytes.
-After this function is called, further output on the stream is not possible.
+If the
+.meta copy-p
+is specified as true, then the function returns a copy of the data,
+without changing the state of
+.metn string .
+
+If
+.meta copy-p
+is is
+.codn nil ,
+which is the parameter's default value when an argument is omitted,
+then
+.code get-string-from-stream
+removes the data from the
+.meta stream
+object. After this removal, further manipulation of the stream, including
+output, truncation and repositioning operations, is not possible.
+Subsequent calls to
+.code get-string-from-stream
+on
+.meta stream
+return
+.codn nil .
+
+Implementation note: the destructive read-out of the string when
+.meta copy-p
+is given as
+.code nil
+allows the function to directly pass ownership of the data to the
+returned string object, without extra memory allocation or copying.
.coNP Function @ make-strlist-output-stream
.synb