summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-01-24 21:14:59 -0800
committerKaz Kylheku <kaz@kylheku.com>2025-01-24 21:14:59 -0800
commit17cfe8242b4c920f713edf460d44939da69e05cd (patch)
tree02222bb0947b5bc5b615fc0c7088d82239663176
parent31540ce793f1aa551e1df3054f37fb3221618361 (diff)
downloadtxr-17cfe8242b4c920f713edf460d44939da69e05cd.tar.gz
txr-17cfe8242b4c920f713edf460d44939da69e05cd.tar.bz2
txr-17cfe8242b4c920f713edf460d44939da69e05cd.zip
get-csv: bugfix: return nil on EOF.
* stream.c (get_csv): Let's add a new state init. If get_char returns nil and we are in the init state, let's bail to a nil return. While we are at it, let's not allocate the record or string until we read at least one character. If we read a character in the init state, let's allocate those two objects, and then change to the rfield state and fall through to it to handle the character. * tests/010/csv.tl: Fix one incorrect test: (tocsv "") now returns nil, as it should. Add tests for multiple record extraction, also covering missing line termination on the last record as well as CR-LF termination. * txr.1: Documented nil return conditions.
-rw-r--r--stream.c12
-rw-r--r--tests/010/csv.tl24
-rw-r--r--txr.19
3 files changed, 40 insertions, 5 deletions
diff --git a/stream.c b/stream.c
index 7701ba39..20e5c933 100644
--- a/stream.c
+++ b/stream.c
@@ -5429,9 +5429,8 @@ val get_csv(val source_opt)
if3(stringp(source_opt),
make_string_input_stream(source_opt),
source_opt));
- val record = vector(zero, nil);
- val field = mkstring(zero, chr(' '));
- enum { rfield, qfield, quot } state = rfield;
+ val record = nil, field = nil;
+ enum { init, rfield, qfield, quot } state = init;
int done = 0;
while (!done) {
@@ -5444,11 +5443,18 @@ val get_csv(val source_opt)
else if (ch2)
unget_char(ch2, source);
} else if (ch == nil) {
+ if (state == init)
+ return nil;
vec_push(record, field);
break;
}
switch (state) {
+ case init:
+ record = vector(zero, nil);
+ field = mkstring(zero, chr(' '));
+ state = rfield;
+ /* fallthrough */
case rfield:
switch (c_chr(ch)) {
case '\n':
diff --git a/tests/010/csv.tl b/tests/010/csv.tl
index 6c93b6c8..d8561c8c 100644
--- a/tests/010/csv.tl
+++ b/tests/010/csv.tl
@@ -13,7 +13,7 @@
data expected))))
(mtest-csv
- "" #("")
+ "" nil
"," #("" "")
",," #("" "" ""))
@@ -183,3 +183,25 @@
#("a\nb" "c#d" "e,f") "#a\nb#,#c##d#,#e,f#\n"
#("a\n#,b") "#a\n##,b#\n"
#("a#\n,b\n") "#a##\n,b\n#\n")
+
+(mtest
+ (with-in-string-stream (s "a,b,c\nd,e,f\r\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") #("d" "e" "f") nil)
+ (with-in-string-stream (s "a,b,c\nd,e,f\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") #("d" "e" "f") nil)
+ (with-in-string-stream (s "a,b,c\nd,e,f") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") #("d" "e" "f") nil)
+ (with-in-string-stream (s "a,b,c\r\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") nil nil)
+ (with-in-string-stream (s "a,b,c\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") nil nil)
+ (with-in-string-stream (s "a,b,c") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("a" "b" "c") nil nil)
+ (with-in-string-stream (s "") (list (get-csv s) (get-csv s) (get-csv s)))
+ (nil nil nil)
+ (with-in-string-stream (s "\r") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("\r") nil nil)
+ (with-in-string-stream (s "\r\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("") nil nil)
+ (with-in-string-stream (s "\n") (list (get-csv s) (get-csv s) (get-csv s)))
+ (#("") nil nil))
diff --git a/txr.1 b/txr.1
index 90beb5bf..68b31ff1 100644
--- a/txr.1
+++ b/txr.1
@@ -85142,7 +85142,14 @@ followed by a comma.
The
.code get-csv
-function does not recognize or diagnose any errors; it extracts the
+function reads characters from the source stream as if using the
+.code get-char
+function.
+If at least one character from the input source, a record shall be returned.
+If no characters can be read due to end-of-file, it returns
+.codn nil .
+
+The function does not recognize or diagnose any errors; it extracts the
maximal prefix of the input source which constitutes a valid CSV record.
Characters not belonging to the CSV record remain in the stream.
Multiple calls to