summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-05-23 20:07:45 -0700
committerKaz Kylheku <kaz@kylheku.com>2025-05-23 20:07:45 -0700
commit37d391a97d2efc8e3f2e84fe6accd8825faed03e (patch)
tree65bf4a6a0340ed7a9ed1c3f652bd5475c6c2bb2d
parenta619222ee1eb25d48516f6cfcb848a17eab052fb (diff)
downloadtxr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.tar.gz
txr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.tar.bz2
txr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.zip
streams: move utf8 decoder into strm_base.
This refactoring is required become some upcoming hack work is going to depend on the assumption that every stream object has a utf8_decoder_t. * utf8.h (utf8_decoder_initializer): New macro, used when initializing strm_base. * stream.h: Some of the content is now hidden unless the preprocessor symbol STREAM_IMPL is defined. This is motivated by the fact that stream.h now depends on utf8.h. When STREAM_IMPL is not defined, the bits that depend on utf8.h are disabled and we don't have to touch files which include "stream.h" which don't refer to those bits. (struct strm_base): New member ud, of type ut8_decoder_t. * stream.c: include "utf8.h" before "stream.h" and ensure STREAM_IMPL is defined. (strm_base_init): Initialize ud member of struct strm_buf using utf8_decoder_initializer. (struct stdio_handle, struct byte_input, struct string_out): Remove member ud. (stdio_switch, stdio_seek, stdio_get_char, stdio_get_byte, stdio_unget_byte, stdio_fill_buf, tail_strategy, byte_in_get_char string_out_byte_flush): Refer to new ud member in base. (make_stdio_stream_common, make_string_byte_input_stream, make_string_output_stream): No need to call utf8_decoder_init since strm_base_init takes care of it. * buf.c, gzio.c, hash.c, lib.c, parser.c, regex.c, socket.c, struct.c, strudel.c, syslog.c, tree.c: Move #include "utf8.h" above "stream.h", or in some cases add it. Define STREAM_IMPL before #include "stream.h".
-rw-r--r--buf.c4
-rw-r--r--gzio.c3
-rw-r--r--hash.c3
-rw-r--r--lib.c3
-rw-r--r--parser.c1
-rw-r--r--regex.c2
-rw-r--r--socket.c3
-rw-r--r--stream.c35
-rw-r--r--stream.h22
-rw-r--r--struct.c3
-rw-r--r--strudel.c2
-rw-r--r--syslog.c3
-rw-r--r--tree.c3
-rw-r--r--utf8.h2
14 files changed, 59 insertions, 30 deletions
diff --git a/buf.c b/buf.c
index 98e3863a..94472e46 100644
--- a/buf.c
+++ b/buf.c
@@ -34,6 +34,7 @@
#include <stdarg.h>
#include <signal.h>
#include <stdio.h>
+#include <wchar.h>
#include "config.h"
#if HAVE_ZLIB
#include <zlib.h>
@@ -44,9 +45,10 @@
#include "signal.h"
#include "unwind.h"
#include "eval.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "arith.h"
-#include "utf8.h"
#include "txr.h"
#include "buf.h"
diff --git a/gzio.c b/gzio.c
index 724d6b56..bda0bd04 100644
--- a/gzio.c
+++ b/gzio.c
@@ -40,10 +40,11 @@
#endif
#include "alloca.h"
#include "lib.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "gc.h"
#include "args.h"
-#include "utf8.h"
#include "eval.h"
#include "signal.h"
#include "unwind.h"
diff --git a/hash.c b/hash.c
index c0188a38..9751ab1d 100644
--- a/hash.c
+++ b/hash.c
@@ -33,6 +33,7 @@
#include <string.h>
#include <limits.h>
#include <signal.h>
+#include <wchar.h>
#include "config.h"
#include "alloca.h"
#if HAVE_UNISTD_H
@@ -44,6 +45,8 @@
#include "txr.h"
#include "signal.h"
#include "unwind.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "eval.h"
#include "itypes.h"
diff --git a/lib.c b/lib.c
index 15639e44..cc7c389d 100644
--- a/lib.c
+++ b/lib.c
@@ -57,9 +57,10 @@
#include "signal.h"
#include "unwind.h"
#include "args.h"
+#define STREAM_IMPL
+#include "utf8.h"
#include "stream.h"
#include "strudel.h"
-#include "utf8.h"
#include "filter.h"
#include "eval.h"
#include "vm.h"
diff --git a/parser.c b/parser.c
index d83fbd97..adc67af7 100644
--- a/parser.c
+++ b/parser.c
@@ -55,6 +55,7 @@
#include "utf8.h"
#include "hash.h"
#include "eval.h"
+#define STREAM_IMPL
#include "stream.h"
#if HAVE_ZLIB
#include "gzio.h"
diff --git a/regex.c b/regex.c
index ebd7b2f4..fb5352cc 100644
--- a/regex.c
+++ b/regex.c
@@ -40,6 +40,8 @@
#include "parser.h"
#include "signal.h"
#include "unwind.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "gc.h"
#include "eval.h"
diff --git a/socket.c b/socket.c
index 338679fe..495b6768 100644
--- a/socket.c
+++ b/socket.c
@@ -52,9 +52,10 @@
#include <netinet/in.h>
#include <netinet/tcp.h>
#include "lib.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "signal.h"
-#include "utf8.h"
#include "unwind.h"
#include "gc.h"
#include "eval.h"
diff --git a/stream.c b/stream.c
index 6459ffdf..18b13b3c 100644
--- a/stream.c
+++ b/stream.c
@@ -68,8 +68,9 @@
#include "unwind.h"
#include "args.h"
#include "sysif.h"
-#include "stream.h"
#include "utf8.h"
+#define STREAM_IMPL
+#include "stream.h"
#include "eval.h"
#include "regex.h"
#include "txr.h"
@@ -119,7 +120,9 @@ static val shell, shell_arg;
void strm_base_init(struct strm_base *s)
{
- static struct strm_base init = { indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0 };
+ static struct strm_base init = {
+ indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0, utf8_decoder_initializer
+ };
*s = init;
}
@@ -518,7 +521,6 @@ struct stdio_handle {
FILE *f;
val descr;
val unget_c;
- utf8_decoder_t ud;
val err;
char *buf;
#if HAVE_FORK_STUFF
@@ -672,7 +674,7 @@ static void stdio_switch(struct stdio_handle *h, enum stdio_op op)
}
#endif
if (h->last_op != stdio_none)
- utf8_decoder_init(&h->ud);
+ utf8_decoder_init(&h->a.ud);
h->last_op = op;
}
}
@@ -751,7 +753,7 @@ static val stdio_seek(val stream, val offset, enum strm_whence whence)
return stdio_ftell(h->f);
} else {
if (stdio_fseek(h->f, offset, whence)) {
- utf8_decoder_init(&h->ud);
+ utf8_decoder_init(&h->a.ud);
h->unget_c = nil;
return t;
}
@@ -897,7 +899,7 @@ static val stdio_get_char(val stream)
if (ch == 0)
ch = 0xDC00;
} else {
- ch = utf8_decode(&h->ud, stdio_get_char_callback,
+ ch = utf8_decode(&h->a.ud, stdio_get_char_callback,
coerce(mem_t *, h->f));
}
@@ -915,7 +917,7 @@ static val stdio_get_byte(val stream)
lit("get-byte: ~s: pushed-back characters prevent byte reads"),
stream, nao);
} else {
- int ch = utf8_getc(&h->ud);
+ int ch = utf8_getc(&h->a.ud);
if (ch != EOF)
return num_fast(ch);
@@ -945,7 +947,7 @@ static val stdio_unget_byte(val stream, int byte)
lit("unget-byte: ~s: previously pushed chars are in the way"),
stream, nao);
} else {
- int uch = utf8_ungetc(&h->ud, byte);
+ int uch = utf8_ungetc(&h->a.ud, byte);
return (uch == EOF)
? uw_throwf(file_error_s,
lit("unget-byte: ~s: out of space pushing ~s"),
@@ -982,7 +984,7 @@ static ucnum stdio_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos)
if (pos >= len)
return len;
- while (pos < len && (ch = utf8_getc(&h->ud)) != EOF)
+ while (pos < len && (ch = utf8_getc(&h->a.ud)) != EOF)
ptr[pos++] = ch;
if (pos < len) {
@@ -1312,7 +1314,7 @@ static void tail_strategy(val stream, unsigned long *state)
return;
}
- utf8_decoder_init(&h->ud);
+ utf8_decoder_init(&h->a.ud);
}
}
@@ -1750,7 +1752,6 @@ static val make_stdio_stream_common(FILE *f, val descr, struct cobj_ops *ops)
h->f = f;
h->descr = descr;
h->unget_c = nil;
- utf8_decoder_init(&h->ud);
h->err = nil;
h->buf = 0;
h->pid = 0;
@@ -2081,7 +2082,6 @@ struct byte_input {
unsigned char *buf;
size_t size;
size_t index;
- utf8_decoder_t ud;
};
struct byte_input_ungetch {
@@ -2107,11 +2107,11 @@ static int byte_in_get_char_callback(mem_t *ctx)
static val byte_in_get_char(val stream)
{
struct byte_input *bi = coerce(struct byte_input *, stream->co.handle);
- wint_t wch = utf8_decode(&bi->ud, byte_in_get_char_callback,
+ wint_t wch = utf8_decode(&bi->a.ud, byte_in_get_char_callback,
coerce(mem_t *, bi));
int ch;
- while ((ch = utf8_getc(&bi->ud)) != EOF)
+ while ((ch = utf8_getc(&bi->a.ud)) != EOF)
if (bi->index > 0)
bi->buf[--bi->index] = ch;
@@ -2209,7 +2209,6 @@ val make_string_byte_input_stream(val string)
strm_base_init(&bi->a);
bi->buf = utf8_dup_to_buf(wstring, &bi->size, 0);
bi->index = 0;
- utf8_decoder_init(&bi->ud);
return cobj(coerce(mem_t *, bi), stream_cls, &byte_in_ops.cobj_ops);
}
}
@@ -2356,7 +2355,6 @@ struct string_out {
wchar_t *buf;
size_t size;
size_t fill;
- utf8_decoder_t ud;
unsigned char byte_buf[4];
int head, tail;
};
@@ -2392,13 +2390,13 @@ static val string_out_byte_flush(struct string_out *so, val stream)
val result = nil;
if (so->tail < so->head) {
- wint_t ch = utf8_decode(&so->ud, string_out_byte_callback,
+ wint_t ch = utf8_decode(&so->a.ud, string_out_byte_callback,
coerce(mem_t *, so));
int remaining = so->head - so->tail;
if (remaining != 0)
memmove(so->byte_buf, so->byte_buf + so->tail, remaining);
so->head = so->tail = remaining;
- utf8_decoder_init(&so->ud);
+ utf8_decoder_init(&so->a.ud);
if (ch == WEOF)
internal_error("unexpected WEOF from utf8_decode");
result = string_out_put_char(stream, chr(ch));
@@ -2492,7 +2490,6 @@ val make_string_output_stream(void)
so->buf = chk_wmalloc(so->size);
so->fill = 0;
so->buf[0] = 0;
- utf8_decoder_init(&so->ud);
so->head = so->tail = 0;
return cobj(coerce(mem_t *, so), stream_cls, &string_out_ops.cobj_ops);
}
diff --git a/stream.h b/stream.h
index 9adebf2e..65f0a336 100644
--- a/stream.h
+++ b/stream.h
@@ -26,6 +26,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef STREAM_IMPL
+
enum strm_whence {
strm_start = SEEK_SET,
strm_cur = SEEK_CUR,
@@ -58,6 +60,7 @@ struct strm_base {
cnum max_depth;
val close_result;
struct strm_ctx *ctx;
+ utf8_decoder_t ud;
};
struct strm_ops {
@@ -128,12 +131,6 @@ struct stdio_mode {
#define stdio_mode_init_r { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } }, -1 }
#define stdio_mode_init_rpb { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } }, -1 }
-#define std_input (deref(lookup_var_l(nil, stdin_s)))
-#define std_output (deref(lookup_var_l(nil, stdout_s)))
-#define std_debug (deref(lookup_var_l(nil, stddebug_s)))
-#define std_error (deref(lookup_var_l(nil, stderr_s)))
-#define std_null (deref(lookup_var_l(nil, stdnull_s)))
-
enum json_fmt {
json_fmt_default,
json_fmt_standard
@@ -144,6 +141,14 @@ struct json_opts {
unsigned type : 1;
};
+#endif
+
+#define std_input (deref(lookup_var_l(nil, stdin_s)))
+#define std_output (deref(lookup_var_l(nil, stdout_s)))
+#define std_debug (deref(lookup_var_l(nil, stddebug_s)))
+#define std_error (deref(lookup_var_l(nil, stderr_s)))
+#define std_null (deref(lookup_var_l(nil, stdnull_s)))
+
loc lookup_var_l(val env, val sym);
extern val from_start_k, from_current_k, from_end_k;
@@ -173,6 +178,8 @@ extern wchar_t path_var_sep_char;
extern val top_stderr;
+#ifdef STREAM_IMPL
+
extern struct cobj_class *stream_cls, *stdio_stream_cls;
void strm_base_init(struct strm_base *s);
@@ -196,6 +203,9 @@ val make_stdio_stream(FILE *, val descr);
val make_tail_stream(FILE *, val descr);
val pipe_close_status_helper(val stream, val throw_on_error,
int status, val self);
+
+#endif
+
val stream_fd(val stream);
#if HAVE_SOCKETS
val make_sock_stream(FILE *f, val family, val type);
diff --git a/struct.c b/struct.c
index 8122cf4e..ec47a98d 100644
--- a/struct.c
+++ b/struct.c
@@ -34,6 +34,7 @@
#include <limits.h>
#include <signal.h>
#include <assert.h>
+#include <wchar.h>
#include "config.h"
#include "alloca.h"
#include "lib.h"
@@ -41,6 +42,8 @@
#include "eval.h"
#include "signal.h"
#include "unwind.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "gc.h"
#include "args.h"
diff --git a/strudel.c b/strudel.c
index 406beae3..0ddb33ff 100644
--- a/strudel.c
+++ b/strudel.c
@@ -31,6 +31,8 @@
#include <wchar.h>
#include "config.h"
#include "lib.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "gc.h"
#include "eval.h"
diff --git a/syslog.c b/syslog.c
index 5866ca0d..ae639510 100644
--- a/syslog.c
+++ b/syslog.c
@@ -35,10 +35,11 @@
#include "config.h"
#include "alloca.h"
#include "lib.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "gc.h"
#include "args.h"
-#include "utf8.h"
#include "eval.h"
#include "syslog.h"
diff --git a/tree.c b/tree.c
index 37075031..092c9042 100644
--- a/tree.c
+++ b/tree.c
@@ -33,12 +33,15 @@
#include <string.h>
#include <limits.h>
#include <signal.h>
+#include <wchar.h>
#include "config.h"
#include "alloca.h"
#include "lib.h"
#include "gc.h"
#include "signal.h"
#include "unwind.h"
+#include "utf8.h"
+#define STREAM_IMPL
#include "stream.h"
#include "eval.h"
#include "hash.h"
diff --git a/utf8.h b/utf8.h
index f861bd39..b0985613 100644
--- a/utf8.h
+++ b/utf8.h
@@ -47,6 +47,8 @@ typedef struct utf8_decoder {
int buf[8];
} utf8_decoder_t;
+#define utf8_decoder_initializer { utf8_init, 0, 0, 0, 0, 0, 0, { 0 } }
+
int utf8_encode(wchar_t, int (*put)(int ch, mem_t *ctx), mem_t *ctx);
void utf8_decoder_init(utf8_decoder_t *);
wint_t utf8_decode(utf8_decoder_t *,int (*get)(mem_t *ctx), mem_t *ctx);