diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-05-23 20:07:45 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-05-23 20:07:45 -0700 |
commit | 37d391a97d2efc8e3f2e84fe6accd8825faed03e (patch) | |
tree | 65bf4a6a0340ed7a9ed1c3f652bd5475c6c2bb2d | |
parent | a619222ee1eb25d48516f6cfcb848a17eab052fb (diff) | |
download | txr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.tar.gz txr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.tar.bz2 txr-37d391a97d2efc8e3f2e84fe6accd8825faed03e.zip |
streams: move utf8 decoder into strm_base.
This refactoring is required become some upcoming
hack work is going to depend on the assumption that
every stream object has a utf8_decoder_t.
* utf8.h (utf8_decoder_initializer): New macro, used when
initializing strm_base.
* stream.h: Some of the content is now hidden unless the
preprocessor symbol STREAM_IMPL is defined. This is motivated
by the fact that stream.h now depends on utf8.h.
When STREAM_IMPL is not defined, the bits that depend on
utf8.h are disabled and we don't have to touch files which
include "stream.h" which don't refer to those bits.
(struct strm_base): New member ud, of type ut8_decoder_t.
* stream.c: include "utf8.h" before "stream.h" and ensure
STREAM_IMPL is defined.
(strm_base_init): Initialize ud member of struct strm_buf
using utf8_decoder_initializer.
(struct stdio_handle, struct byte_input, struct string_out):
Remove member ud.
(stdio_switch, stdio_seek, stdio_get_char, stdio_get_byte,
stdio_unget_byte, stdio_fill_buf, tail_strategy, byte_in_get_char
string_out_byte_flush): Refer to new ud member in base.
(make_stdio_stream_common, make_string_byte_input_stream,
make_string_output_stream): No need to call utf8_decoder_init
since strm_base_init takes care of it.
* buf.c, gzio.c, hash.c, lib.c, parser.c, regex.c, socket.c,
struct.c, strudel.c, syslog.c, tree.c: Move #include "utf8.h"
above "stream.h", or in some cases add it. Define STREAM_IMPL
before #include "stream.h".
-rw-r--r-- | buf.c | 4 | ||||
-rw-r--r-- | gzio.c | 3 | ||||
-rw-r--r-- | hash.c | 3 | ||||
-rw-r--r-- | lib.c | 3 | ||||
-rw-r--r-- | parser.c | 1 | ||||
-rw-r--r-- | regex.c | 2 | ||||
-rw-r--r-- | socket.c | 3 | ||||
-rw-r--r-- | stream.c | 35 | ||||
-rw-r--r-- | stream.h | 22 | ||||
-rw-r--r-- | struct.c | 3 | ||||
-rw-r--r-- | strudel.c | 2 | ||||
-rw-r--r-- | syslog.c | 3 | ||||
-rw-r--r-- | tree.c | 3 | ||||
-rw-r--r-- | utf8.h | 2 |
14 files changed, 59 insertions, 30 deletions
@@ -34,6 +34,7 @@ #include <stdarg.h> #include <signal.h> #include <stdio.h> +#include <wchar.h> #include "config.h" #if HAVE_ZLIB #include <zlib.h> @@ -44,9 +45,10 @@ #include "signal.h" #include "unwind.h" #include "eval.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "arith.h" -#include "utf8.h" #include "txr.h" #include "buf.h" @@ -40,10 +40,11 @@ #endif #include "alloca.h" #include "lib.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "gc.h" #include "args.h" -#include "utf8.h" #include "eval.h" #include "signal.h" #include "unwind.h" @@ -33,6 +33,7 @@ #include <string.h> #include <limits.h> #include <signal.h> +#include <wchar.h> #include "config.h" #include "alloca.h" #if HAVE_UNISTD_H @@ -44,6 +45,8 @@ #include "txr.h" #include "signal.h" #include "unwind.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "eval.h" #include "itypes.h" @@ -57,9 +57,10 @@ #include "signal.h" #include "unwind.h" #include "args.h" +#define STREAM_IMPL +#include "utf8.h" #include "stream.h" #include "strudel.h" -#include "utf8.h" #include "filter.h" #include "eval.h" #include "vm.h" @@ -55,6 +55,7 @@ #include "utf8.h" #include "hash.h" #include "eval.h" +#define STREAM_IMPL #include "stream.h" #if HAVE_ZLIB #include "gzio.h" @@ -40,6 +40,8 @@ #include "parser.h" #include "signal.h" #include "unwind.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "gc.h" #include "eval.h" @@ -52,9 +52,10 @@ #include <netinet/in.h> #include <netinet/tcp.h> #include "lib.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "signal.h" -#include "utf8.h" #include "unwind.h" #include "gc.h" #include "eval.h" @@ -68,8 +68,9 @@ #include "unwind.h" #include "args.h" #include "sysif.h" -#include "stream.h" #include "utf8.h" +#define STREAM_IMPL +#include "stream.h" #include "eval.h" #include "regex.h" #include "txr.h" @@ -119,7 +120,9 @@ static val shell, shell_arg; void strm_base_init(struct strm_base *s) { - static struct strm_base init = { indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0 }; + static struct strm_base init = { + indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0, utf8_decoder_initializer + }; *s = init; } @@ -518,7 +521,6 @@ struct stdio_handle { FILE *f; val descr; val unget_c; - utf8_decoder_t ud; val err; char *buf; #if HAVE_FORK_STUFF @@ -672,7 +674,7 @@ static void stdio_switch(struct stdio_handle *h, enum stdio_op op) } #endif if (h->last_op != stdio_none) - utf8_decoder_init(&h->ud); + utf8_decoder_init(&h->a.ud); h->last_op = op; } } @@ -751,7 +753,7 @@ static val stdio_seek(val stream, val offset, enum strm_whence whence) return stdio_ftell(h->f); } else { if (stdio_fseek(h->f, offset, whence)) { - utf8_decoder_init(&h->ud); + utf8_decoder_init(&h->a.ud); h->unget_c = nil; return t; } @@ -897,7 +899,7 @@ static val stdio_get_char(val stream) if (ch == 0) ch = 0xDC00; } else { - ch = utf8_decode(&h->ud, stdio_get_char_callback, + ch = utf8_decode(&h->a.ud, stdio_get_char_callback, coerce(mem_t *, h->f)); } @@ -915,7 +917,7 @@ static val stdio_get_byte(val stream) lit("get-byte: ~s: pushed-back characters prevent byte reads"), stream, nao); } else { - int ch = utf8_getc(&h->ud); + int ch = utf8_getc(&h->a.ud); if (ch != EOF) return num_fast(ch); @@ -945,7 +947,7 @@ static val stdio_unget_byte(val stream, int byte) lit("unget-byte: ~s: previously pushed chars are in the way"), stream, nao); } else { - int uch = utf8_ungetc(&h->ud, byte); + int uch = utf8_ungetc(&h->a.ud, byte); return (uch == EOF) ? uw_throwf(file_error_s, lit("unget-byte: ~s: out of space pushing ~s"), @@ -982,7 +984,7 @@ static ucnum stdio_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) if (pos >= len) return len; - while (pos < len && (ch = utf8_getc(&h->ud)) != EOF) + while (pos < len && (ch = utf8_getc(&h->a.ud)) != EOF) ptr[pos++] = ch; if (pos < len) { @@ -1312,7 +1314,7 @@ static void tail_strategy(val stream, unsigned long *state) return; } - utf8_decoder_init(&h->ud); + utf8_decoder_init(&h->a.ud); } } @@ -1750,7 +1752,6 @@ static val make_stdio_stream_common(FILE *f, val descr, struct cobj_ops *ops) h->f = f; h->descr = descr; h->unget_c = nil; - utf8_decoder_init(&h->ud); h->err = nil; h->buf = 0; h->pid = 0; @@ -2081,7 +2082,6 @@ struct byte_input { unsigned char *buf; size_t size; size_t index; - utf8_decoder_t ud; }; struct byte_input_ungetch { @@ -2107,11 +2107,11 @@ static int byte_in_get_char_callback(mem_t *ctx) static val byte_in_get_char(val stream) { struct byte_input *bi = coerce(struct byte_input *, stream->co.handle); - wint_t wch = utf8_decode(&bi->ud, byte_in_get_char_callback, + wint_t wch = utf8_decode(&bi->a.ud, byte_in_get_char_callback, coerce(mem_t *, bi)); int ch; - while ((ch = utf8_getc(&bi->ud)) != EOF) + while ((ch = utf8_getc(&bi->a.ud)) != EOF) if (bi->index > 0) bi->buf[--bi->index] = ch; @@ -2209,7 +2209,6 @@ val make_string_byte_input_stream(val string) strm_base_init(&bi->a); bi->buf = utf8_dup_to_buf(wstring, &bi->size, 0); bi->index = 0; - utf8_decoder_init(&bi->ud); return cobj(coerce(mem_t *, bi), stream_cls, &byte_in_ops.cobj_ops); } } @@ -2356,7 +2355,6 @@ struct string_out { wchar_t *buf; size_t size; size_t fill; - utf8_decoder_t ud; unsigned char byte_buf[4]; int head, tail; }; @@ -2392,13 +2390,13 @@ static val string_out_byte_flush(struct string_out *so, val stream) val result = nil; if (so->tail < so->head) { - wint_t ch = utf8_decode(&so->ud, string_out_byte_callback, + wint_t ch = utf8_decode(&so->a.ud, string_out_byte_callback, coerce(mem_t *, so)); int remaining = so->head - so->tail; if (remaining != 0) memmove(so->byte_buf, so->byte_buf + so->tail, remaining); so->head = so->tail = remaining; - utf8_decoder_init(&so->ud); + utf8_decoder_init(&so->a.ud); if (ch == WEOF) internal_error("unexpected WEOF from utf8_decode"); result = string_out_put_char(stream, chr(ch)); @@ -2492,7 +2490,6 @@ val make_string_output_stream(void) so->buf = chk_wmalloc(so->size); so->fill = 0; so->buf[0] = 0; - utf8_decoder_init(&so->ud); so->head = so->tail = 0; return cobj(coerce(mem_t *, so), stream_cls, &string_out_ops.cobj_ops); } @@ -26,6 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifdef STREAM_IMPL + enum strm_whence { strm_start = SEEK_SET, strm_cur = SEEK_CUR, @@ -58,6 +60,7 @@ struct strm_base { cnum max_depth; val close_result; struct strm_ctx *ctx; + utf8_decoder_t ud; }; struct strm_ops { @@ -128,12 +131,6 @@ struct stdio_mode { #define stdio_mode_init_r { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } }, -1 } #define stdio_mode_init_rpb { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, { { 0 } }, -1 } -#define std_input (deref(lookup_var_l(nil, stdin_s))) -#define std_output (deref(lookup_var_l(nil, stdout_s))) -#define std_debug (deref(lookup_var_l(nil, stddebug_s))) -#define std_error (deref(lookup_var_l(nil, stderr_s))) -#define std_null (deref(lookup_var_l(nil, stdnull_s))) - enum json_fmt { json_fmt_default, json_fmt_standard @@ -144,6 +141,14 @@ struct json_opts { unsigned type : 1; }; +#endif + +#define std_input (deref(lookup_var_l(nil, stdin_s))) +#define std_output (deref(lookup_var_l(nil, stdout_s))) +#define std_debug (deref(lookup_var_l(nil, stddebug_s))) +#define std_error (deref(lookup_var_l(nil, stderr_s))) +#define std_null (deref(lookup_var_l(nil, stdnull_s))) + loc lookup_var_l(val env, val sym); extern val from_start_k, from_current_k, from_end_k; @@ -173,6 +178,8 @@ extern wchar_t path_var_sep_char; extern val top_stderr; +#ifdef STREAM_IMPL + extern struct cobj_class *stream_cls, *stdio_stream_cls; void strm_base_init(struct strm_base *s); @@ -196,6 +203,9 @@ val make_stdio_stream(FILE *, val descr); val make_tail_stream(FILE *, val descr); val pipe_close_status_helper(val stream, val throw_on_error, int status, val self); + +#endif + val stream_fd(val stream); #if HAVE_SOCKETS val make_sock_stream(FILE *f, val family, val type); @@ -34,6 +34,7 @@ #include <limits.h> #include <signal.h> #include <assert.h> +#include <wchar.h> #include "config.h" #include "alloca.h" #include "lib.h" @@ -41,6 +42,8 @@ #include "eval.h" #include "signal.h" #include "unwind.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "gc.h" #include "args.h" @@ -31,6 +31,8 @@ #include <wchar.h> #include "config.h" #include "lib.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "gc.h" #include "eval.h" @@ -35,10 +35,11 @@ #include "config.h" #include "alloca.h" #include "lib.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "gc.h" #include "args.h" -#include "utf8.h" #include "eval.h" #include "syslog.h" @@ -33,12 +33,15 @@ #include <string.h> #include <limits.h> #include <signal.h> +#include <wchar.h> #include "config.h" #include "alloca.h" #include "lib.h" #include "gc.h" #include "signal.h" #include "unwind.h" +#include "utf8.h" +#define STREAM_IMPL #include "stream.h" #include "eval.h" #include "hash.h" @@ -47,6 +47,8 @@ typedef struct utf8_decoder { int buf[8]; } utf8_decoder_t; +#define utf8_decoder_initializer { utf8_init, 0, 0, 0, 0, 0, 0, { 0 } } + int utf8_encode(wchar_t, int (*put)(int ch, mem_t *ctx), mem_t *ctx); void utf8_decoder_init(utf8_decoder_t *); wint_t utf8_decode(utf8_decoder_t *,int (*get)(mem_t *ctx), mem_t *ctx); |