summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-01-24 20:49:52 -0800
committerKaz Kylheku <kaz@kylheku.com>2025-01-24 20:49:52 -0800
commit31540ce793f1aa551e1df3054f37fb3221618361 (patch)
treececace480fad4edf6c08e8028bf8043c61103ed9
parentaf458b10e3bc6709e68ef7d0ca1d2aabbeada414 (diff)
downloadtxr-31540ce793f1aa551e1df3054f37fb3221618361.tar.gz
txr-31540ce793f1aa551e1df3054f37fb3221618361.tar.bz2
txr-31540ce793f1aa551e1df3054f37fb3221618361.zip
New functions for producing CSV.
* stream.c (put_csv, tocsv): New functions. (stream_init): put-csv and tocsv intrinsics registered. * stream.h (put_csv, tocsv): Declared. * tests/010/csv.tl (mtest-pcsv): New macro. New test cases. * txr.1: Documented.
-rw-r--r--stream.c42
-rw-r--r--stream.h2
-rw-r--r--tests/010/csv.tl19
-rw-r--r--txr.148
4 files changed, 111 insertions, 0 deletions
diff --git a/stream.c b/stream.c
index 421b70eb..7701ba39 100644
--- a/stream.c
+++ b/stream.c
@@ -5506,6 +5506,46 @@ val get_csv(val source_opt)
return record;
}
+val put_csv(val record, val stream_in)
+{
+ val self = lit("put-csv");
+ val dest = if3(missingp(stream_in), std_output, stream_in);
+ seq_iter_t rec_iter;
+ val field;
+ int comma = 0;
+
+ seq_iter_init(self, &rec_iter, record);
+
+ while (seq_get(&rec_iter, &field)) {
+ val str = tostringp(field);
+ if (comma)
+ put_char(chr(','), dest);
+ comma = 1;
+ if (find(chr('"'), str, nil, nil)) {
+ put_char(chr('"'), dest);
+ put_string(str_esc(lit("\""), chr('"'), str), dest);
+ put_char(chr('"'), dest);
+ } else if (break_str(str, lit(",\n\r"))) {
+ put_char(chr('"'), dest);
+ put_string(str, dest);
+ put_char(chr('"'), dest);
+ } else {
+ put_string(str, dest);
+ }
+ }
+
+ put_char(chr('\n'), dest);
+
+ return nil;
+}
+
+val tocsv(val record)
+{
+ val ss = make_string_output_stream();
+ put_csv(record, ss);
+ return get_string_from_stream(ss);
+}
+
val tmpfile_wrap(void)
{
val self = lit("tmpfile");
@@ -5782,6 +5822,8 @@ void stream_init(void)
reg_varl(intern(lit("indent-code"), user_package), num_fast(indent_code));
reg_varl(intern(lit("indent-foff"), user_package), num_fast(indent_foff));
reg_fun(intern(lit("get-csv"), user_package), func_n1o(get_csv, 0));
+ reg_fun(intern(lit("put-csv"), user_package), func_n2o(put_csv, 1));
+ reg_fun(intern(lit("tocsv"), user_package), func_n1(tocsv));
reg_fun(intern(lit("tmpfile"), user_package), func_n0(tmpfile_wrap));
#if HAVE_MKDTEMP
reg_fun(intern(lit("mkdtemp"), user_package), func_n1(mkdtemp_wrap));
diff --git a/stream.h b/stream.h
index 23ca546d..04de8138 100644
--- a/stream.h
+++ b/stream.h
@@ -286,6 +286,8 @@ val path_cat(val dir_name, val base_name);
val add_suffix(val name, val suffix);
val make_byte_input_stream(val obj);
val get_csv(val source_opt);
+val put_csv(val record, val stream_in);
+val tocsv(val record);
val iobuf_get(void);
void iobuf_put(val buf);
void iobuf_list_empty(void);
diff --git a/tests/010/csv.tl b/tests/010/csv.tl
index fffd8d0d..6c93b6c8 100644
--- a/tests/010/csv.tl
+++ b/tests/010/csv.tl
@@ -6,6 +6,12 @@
,(mapcar (op regsub "#" "\"") @2)))
data expected))))
+(defmacro mtest-pcsv (. pairs)
+ (tree-bind (data expected) (flow pairs (tuples 2) transpose)
+ ^(mtest ,*(mappend (ret ^((tocsv ,(mapcar [iffi stringp (op regsub "#" "\"")] @1))
+ ,(regsub "#" "\"" @2)))
+ data expected))))
+
(mtest-csv
"" #("")
"," #("" "")
@@ -164,3 +170,16 @@
"#Hello, ##Bob##!#\r\n" #("Hello, #Bob#!")
"#Hello, ##Bob##!#\n" #("Hello, #Bob#!")
"#Hello, ##Bob##!#\r" #("Hello, #Bob#!\r"))
+
+(mtest-pcsv
+ #() "\n"
+ #("") "\n"
+ #(hello) "hello\n"
+ #(3.14) "3.14\n"
+ #(1.0 2 #\c) "1,2,c\n"
+ #(hello world) "hello,world\n"
+ #("foo" "bar") "foo,bar\n"
+ #(",") "#,#\n"
+ #("a\nb" "c#d" "e,f") "#a\nb#,#c##d#,#e,f#\n"
+ #("a\n#,b") "#a\n##,b#\n"
+ #("a#\n,b\n") "#a##\n,b\n#\n")
diff --git a/txr.1 b/txr.1
index 33e7dbf0..90beb5bf 100644
--- a/txr.1
+++ b/txr.1
@@ -85151,6 +85151,54 @@ for the same input stream given as
.meta source
extract consecutive CSV records.
+.coNP Functions @ put-csv and @ tocsv
+.synb
+.mets (put-csv < sequence <> [ stream ])
+.mets (tocsv << sequence )
+.syne
+.desc
+The
+.code put-csv
+and
+.code tocsv
+functions convert a sequence of values into a CSV ("comma-separated values")
+record, terminated by a line break. The CSV format mostly adheres to RFC 4180,
+with the following differences:
+
+Line breaks are represented as single NL characters, not CR-NL pairs.
+
+The character set is not restricted to ASCII; if the values contain characters
+beyond U+007E, or if control characters appear, they shall appear in the CSV
+output.
+
+Each value of
+.meta sequence
+is converted to a string as if by the
+.code tostringp
+function.
+
+If the resulting string contains ASCII double quote characters, each of those
+characters is replaced by a pair of double quote characters.
+
+If the resulting string contains ASCII double quotes, ASCII commas or newlines,
+then a leading and trailing double quote is added to it.
+
+The thus transformed strings are then combined into a CSV record
+by interposing ASCII commas between them, and appending a newline
+character.
+
+The
+.code put-csv
+function writes the resulting string to the destination stream which
+defaults to
+.codn *stdout* ,
+and returns
+.codn nil .
+
+The
+.code tocsv
+function returns the string.
+
.SH* FOREIGN FUNCTION INTERFACE
On platforms where it is supported, \*(TX provides a feature called the