From 679975d45faa07632f063ec01a5c963ffdcc7449 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sat, 29 May 2021 13:03:47 -0700 Subject: json: functions put-json and put-jsonl. * eval.c (eval_init): Register put-json and put-jsonl intrinsics. * lib.c (out_json_str): Do not output the U+DC01 to U+DCFF code points by masking them and using put_byte. This is unnecessary; if we just send them as-is to the text stream, the UTF-8 encoder does that for us. (put_json, put_jsonl): New functions. * lib.h (put_json, put_jsonl): Declared. * txr.1: Documented. The bulk of tojson is moved under the descriptions of these new functions, and elsewhere where the document pointed to tojson for more information, it now points to put-json. More detailed description of character treatment is given. * share/txr/stdlib/doc-syms.tl: Updated. --- eval.c | 2 + lib.c | 24 ++++++++- lib.h | 2 + share/txr/stdlib/doc-syms.tl | 2 + txr.1 | 114 ++++++++++++++++++++++++++++++++++--------- 5 files changed, 118 insertions(+), 26 deletions(-) diff --git a/eval.c b/eval.c index ca7f09f5..b9055404 100644 --- a/eval.c +++ b/eval.c @@ -6875,6 +6875,8 @@ void eval_init(void) reg_fun(intern(lit("pprinl"), user_package), func_n2o(pprinl, 1)); reg_fun(intern(lit("tprint"), user_package), func_n2o(tprint, 1)); reg_fun(intern(lit("tojson"), user_package), func_n2o(tojson, 1)); + reg_fun(intern(lit("put-json"), user_package), func_n3o(put_json, 1)); + reg_fun(intern(lit("put-jsonl"), user_package), func_n3o(put_jsonl, 1)); reg_fun(intern(lit("display-width"), user_package), func_n1(display_width)); reg_fun(intern(lit("fmt-simple"), system_package), func_n5o(fmt_simple, 1)); diff --git a/lib.c b/lib.c index 1d2c65b7..26966cbb 100644 --- a/lib.c +++ b/lib.c @@ -12609,8 +12609,6 @@ static void out_json_str(val str, val out) ch == 0xFFFE || ch == 0xFFFF) { format(out, lit("\\u~,04X"), chr(ch), nao); - } else if (ch >= 0xDC01 && ch < 0xDD00) { - put_byte(num_fast(ch & 0xFF), out); } else if (ch >= 0xFFFF) { wchar_t c20 = ch - 0x10000; wchar_t sg0 = 0xD800 + ((c20 >> 10) & 0x3FF); @@ -13459,6 +13457,28 @@ val tostringp(val obj) return get_string_from_stream(ss); } +val put_json(val obj, val stream_in, val flat) +{ + val stream = default_arg(stream_in, std_output); + + if (default_null_arg(flat)) { + out_json_rec(obj, stream, 0); + } else { + val imode = set_indent_mode(stream, num_fast(indent_foff)); + out_json_rec(obj, stream, 0); + set_indent_mode(stream, imode); + } + + return t; +} + +val put_jsonl(val obj, val stream, val flat) +{ + put_json(obj, stream, flat); + put_char(chr('\n'), stream); + return t; +} + val tojson(val obj, val flat) { val ss = make_string_output_stream(); diff --git a/lib.h b/lib.h index 1d1ee673..a3fe1dd4 100644 --- a/lib.h +++ b/lib.h @@ -1206,6 +1206,8 @@ val print(val obj, val stream, val pretty); val pprint(val obj, val stream); val tostring(val obj); val tostringp(val obj); +val put_json(val obj, val stream, val flat); +val put_jsonl(val obj, val stream, val flat); val tojson(val obj, val flat); val display_width(val obj); #if !HAVE_SETENV diff --git a/share/txr/stdlib/doc-syms.tl b/share/txr/stdlib/doc-syms.tl index 3f77d0d7..7f3dda77 100644 --- a/share/txr/stdlib/doc-syms.tl +++ b/share/txr/stdlib/doc-syms.tl @@ -1419,6 +1419,8 @@ ("put-byte" "D-002E") ("put-carray" "N-00737951") ("put-char" "D-0003") + ("put-json" "N-009C27EF") + ("put-jsonl" "N-009C27EF") ("put-line" "N-012163C3") ("put-lines" "N-0367B282") ("put-obj" "N-025DB229") diff --git a/txr.1 b/txr.1 index cf692fd5..1f764f6b 100644 --- a/txr.1 +++ b/txr.1 @@ -12420,7 +12420,7 @@ expression is evaluated. The following remarks indicate special treatment and extensions in the processing of JSON. Similar remarks regarding the production of JSON are given under the -.code tojson +.code put-json function. When an invalid UTF-8 byte is encountered inside a JSON string, its value is @@ -71843,16 +71843,25 @@ etc. .SS* Data Interchange Support -.coNP Function @ tojson +.coNP Functions @ put-json and @ put-jsonl .synb -.mets (tojson < obj <> [ flat-p ]) +.mets (put-json < obj >> [ stream <> [ flat-p ]]) +.mets (put-jsonl < obj >> [ stream <> [ flat-p ]]) .syne .desc The -.code tojson +.code put-json function converts .meta obj -into JSON notation, returned as a character string. +into JSON notation, and writes that notation into +.meta stream +as a sequence of characters. + +If +.meta stream +is an external stream such as a file stream, then the JSON is +rendered by conversion of the characters into UTF-8, in the usual +manner characteristic of those streams. The behavior is unspecified if .meta obj @@ -71891,37 +71900,94 @@ is produced, since RFC 8259 requires JSON object keys to be strings. If the .code flat-p argument is present and has a true value, then the JSON is generated -without any line breaks or indentation. +without any line breaks or indentation. Otherwise, the JSON output is subject +to such formatting. -Otherwise, the JSON is potentially subject to such formatting. - -Even if the JSON data contains line breaks, it does not end in a line break. +The difference between +.code put-json +and +.code put-jsonl +is that the latter emits a newline character after the JSON output. -When a JSON string is output, any code points U+DC01 through U+DCFF occurring -in that string are assumed to denote raw bytes to be output, without -escaping. The code point U+DC00 produces the -.code "\eu0000" -escape syntax. This behavior is different from \*(TL literals, which, on -output, simply render these code points using -.code "\ex" -escape sequences. Rationale: this is because JSON is considered an external format. -The requirements are intended to reproduce the original byte sequence, if -possible, rather than JSON syntax which will produce the same \*(TX object -if read back by \*(TX. +When a string object is output as JSON string syntax, the following rules +.RS +.IP 1. +The characters +.code \e +(backslash, reverse solidus) and +.code \(dq +(double quote) +are preceded by a backslash escape. +.IP 2. +The characters U+0008 (BS), U+0009 (TAB), U+000A (LF), U+000C (FF) and +U+000D (CR) are rendered as, respectively, +.codn \eb , +.codn \et , +.codn \en , +.code \ef +and +.codn \er . +.IP 3. If the character sequence .code " [ flat-p ]) +.syne +.desc +The +.code tojson +function converts +.meta obj +into JSON notation, returned as a character string. + +The function can be understood as constructing a string output stream, +calling the +.code put-json +function to write the object into that stream, +and then retrieving and returning the constructed string. + +The +.meta flat-p +argument is passed to +.codn put-json . .coNP Function @ get-json .synb -- cgit v1.2.3