diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-01-24 20:49:52 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-01-24 20:49:52 -0800 |
commit | 31540ce793f1aa551e1df3054f37fb3221618361 (patch) | |
tree | cecace480fad4edf6c08e8028bf8043c61103ed9 | |
parent | af458b10e3bc6709e68ef7d0ca1d2aabbeada414 (diff) | |
download | txr-31540ce793f1aa551e1df3054f37fb3221618361.tar.gz txr-31540ce793f1aa551e1df3054f37fb3221618361.tar.bz2 txr-31540ce793f1aa551e1df3054f37fb3221618361.zip |
New functions for producing CSV.
* stream.c (put_csv, tocsv): New functions.
(stream_init): put-csv and tocsv intrinsics registered.
* stream.h (put_csv, tocsv): Declared.
* tests/010/csv.tl (mtest-pcsv): New macro.
New test cases.
* txr.1: Documented.
-rw-r--r-- | stream.c | 42 | ||||
-rw-r--r-- | stream.h | 2 | ||||
-rw-r--r-- | tests/010/csv.tl | 19 | ||||
-rw-r--r-- | txr.1 | 48 |
4 files changed, 111 insertions, 0 deletions
@@ -5506,6 +5506,46 @@ val get_csv(val source_opt) return record; } +val put_csv(val record, val stream_in) +{ + val self = lit("put-csv"); + val dest = if3(missingp(stream_in), std_output, stream_in); + seq_iter_t rec_iter; + val field; + int comma = 0; + + seq_iter_init(self, &rec_iter, record); + + while (seq_get(&rec_iter, &field)) { + val str = tostringp(field); + if (comma) + put_char(chr(','), dest); + comma = 1; + if (find(chr('"'), str, nil, nil)) { + put_char(chr('"'), dest); + put_string(str_esc(lit("\""), chr('"'), str), dest); + put_char(chr('"'), dest); + } else if (break_str(str, lit(",\n\r"))) { + put_char(chr('"'), dest); + put_string(str, dest); + put_char(chr('"'), dest); + } else { + put_string(str, dest); + } + } + + put_char(chr('\n'), dest); + + return nil; +} + +val tocsv(val record) +{ + val ss = make_string_output_stream(); + put_csv(record, ss); + return get_string_from_stream(ss); +} + val tmpfile_wrap(void) { val self = lit("tmpfile"); @@ -5782,6 +5822,8 @@ void stream_init(void) reg_varl(intern(lit("indent-code"), user_package), num_fast(indent_code)); reg_varl(intern(lit("indent-foff"), user_package), num_fast(indent_foff)); reg_fun(intern(lit("get-csv"), user_package), func_n1o(get_csv, 0)); + reg_fun(intern(lit("put-csv"), user_package), func_n2o(put_csv, 1)); + reg_fun(intern(lit("tocsv"), user_package), func_n1(tocsv)); reg_fun(intern(lit("tmpfile"), user_package), func_n0(tmpfile_wrap)); #if HAVE_MKDTEMP reg_fun(intern(lit("mkdtemp"), user_package), func_n1(mkdtemp_wrap)); @@ -286,6 +286,8 @@ val path_cat(val dir_name, val base_name); val add_suffix(val name, val suffix); val make_byte_input_stream(val obj); val get_csv(val source_opt); +val put_csv(val record, val stream_in); +val tocsv(val record); val iobuf_get(void); void iobuf_put(val buf); void iobuf_list_empty(void); diff --git a/tests/010/csv.tl b/tests/010/csv.tl index fffd8d0d..6c93b6c8 100644 --- a/tests/010/csv.tl +++ b/tests/010/csv.tl @@ -6,6 +6,12 @@ ,(mapcar (op regsub "#" "\"") @2))) data expected)))) +(defmacro mtest-pcsv (. pairs) + (tree-bind (data expected) (flow pairs (tuples 2) transpose) + ^(mtest ,*(mappend (ret ^((tocsv ,(mapcar [iffi stringp (op regsub "#" "\"")] @1)) + ,(regsub "#" "\"" @2))) + data expected)))) + (mtest-csv "" #("") "," #("" "") @@ -164,3 +170,16 @@ "#Hello, ##Bob##!#\r\n" #("Hello, #Bob#!") "#Hello, ##Bob##!#\n" #("Hello, #Bob#!") "#Hello, ##Bob##!#\r" #("Hello, #Bob#!\r")) + +(mtest-pcsv + #() "\n" + #("") "\n" + #(hello) "hello\n" + #(3.14) "3.14\n" + #(1.0 2 #\c) "1,2,c\n" + #(hello world) "hello,world\n" + #("foo" "bar") "foo,bar\n" + #(",") "#,#\n" + #("a\nb" "c#d" "e,f") "#a\nb#,#c##d#,#e,f#\n" + #("a\n#,b") "#a\n##,b#\n" + #("a#\n,b\n") "#a##\n,b\n#\n") @@ -85151,6 +85151,54 @@ for the same input stream given as .meta source extract consecutive CSV records. +.coNP Functions @ put-csv and @ tocsv +.synb +.mets (put-csv < sequence <> [ stream ]) +.mets (tocsv << sequence ) +.syne +.desc +The +.code put-csv +and +.code tocsv +functions convert a sequence of values into a CSV ("comma-separated values") +record, terminated by a line break. The CSV format mostly adheres to RFC 4180, +with the following differences: + +Line breaks are represented as single NL characters, not CR-NL pairs. + +The character set is not restricted to ASCII; if the values contain characters +beyond U+007E, or if control characters appear, they shall appear in the CSV +output. + +Each value of +.meta sequence +is converted to a string as if by the +.code tostringp +function. + +If the resulting string contains ASCII double quote characters, each of those +characters is replaced by a pair of double quote characters. + +If the resulting string contains ASCII double quotes, ASCII commas or newlines, +then a leading and trailing double quote is added to it. + +The thus transformed strings are then combined into a CSV record +by interposing ASCII commas between them, and appending a newline +character. + +The +.code put-csv +function writes the resulting string to the destination stream which +defaults to +.codn *stdout* , +and returns +.codn nil . + +The +.code tocsv +function returns the string. + .SH* FOREIGN FUNCTION INTERFACE On platforms where it is supported, \*(TX provides a feature called the |