diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-01-24 21:14:59 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-01-24 21:14:59 -0800 |
commit | 17cfe8242b4c920f713edf460d44939da69e05cd (patch) | |
tree | 02222bb0947b5bc5b615fc0c7088d82239663176 | |
parent | 31540ce793f1aa551e1df3054f37fb3221618361 (diff) | |
download | txr-17cfe8242b4c920f713edf460d44939da69e05cd.tar.gz txr-17cfe8242b4c920f713edf460d44939da69e05cd.tar.bz2 txr-17cfe8242b4c920f713edf460d44939da69e05cd.zip |
get-csv: bugfix: return nil on EOF.
* stream.c (get_csv): Let's add a new state init. If get_char
returns nil and we are in the init state, let's bail to a
nil return. While we are at it, let's not allocate the record
or string until we read at least one character. If we read
a character in the init state, let's allocate those two
objects, and then change to the rfield state and fall through
to it to handle the character.
* tests/010/csv.tl: Fix one incorrect test: (tocsv "") now
returns nil, as it should. Add tests for multiple record
extraction, also covering missing line termination on the last
record as well as CR-LF termination.
* txr.1: Documented nil return conditions.
-rw-r--r-- | stream.c | 12 | ||||
-rw-r--r-- | tests/010/csv.tl | 24 | ||||
-rw-r--r-- | txr.1 | 9 |
3 files changed, 40 insertions, 5 deletions
@@ -5429,9 +5429,8 @@ val get_csv(val source_opt) if3(stringp(source_opt), make_string_input_stream(source_opt), source_opt)); - val record = vector(zero, nil); - val field = mkstring(zero, chr(' ')); - enum { rfield, qfield, quot } state = rfield; + val record = nil, field = nil; + enum { init, rfield, qfield, quot } state = init; int done = 0; while (!done) { @@ -5444,11 +5443,18 @@ val get_csv(val source_opt) else if (ch2) unget_char(ch2, source); } else if (ch == nil) { + if (state == init) + return nil; vec_push(record, field); break; } switch (state) { + case init: + record = vector(zero, nil); + field = mkstring(zero, chr(' ')); + state = rfield; + /* fallthrough */ case rfield: switch (c_chr(ch)) { case '\n': diff --git a/tests/010/csv.tl b/tests/010/csv.tl index 6c93b6c8..d8561c8c 100644 --- a/tests/010/csv.tl +++ b/tests/010/csv.tl @@ -13,7 +13,7 @@ data expected)))) (mtest-csv - "" #("") + "" nil "," #("" "") ",," #("" "" "")) @@ -183,3 +183,25 @@ #("a\nb" "c#d" "e,f") "#a\nb#,#c##d#,#e,f#\n" #("a\n#,b") "#a\n##,b#\n" #("a#\n,b\n") "#a##\n,b\n#\n") + +(mtest + (with-in-string-stream (s "a,b,c\nd,e,f\r\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") #("d" "e" "f") nil) + (with-in-string-stream (s "a,b,c\nd,e,f\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") #("d" "e" "f") nil) + (with-in-string-stream (s "a,b,c\nd,e,f") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") #("d" "e" "f") nil) + (with-in-string-stream (s "a,b,c\r\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") nil nil) + (with-in-string-stream (s "a,b,c\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") nil nil) + (with-in-string-stream (s "a,b,c") (list (get-csv s) (get-csv s) (get-csv s))) + (#("a" "b" "c") nil nil) + (with-in-string-stream (s "") (list (get-csv s) (get-csv s) (get-csv s))) + (nil nil nil) + (with-in-string-stream (s "\r") (list (get-csv s) (get-csv s) (get-csv s))) + (#("\r") nil nil) + (with-in-string-stream (s "\r\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("") nil nil) + (with-in-string-stream (s "\n") (list (get-csv s) (get-csv s) (get-csv s))) + (#("") nil nil)) @@ -85142,7 +85142,14 @@ followed by a comma. The .code get-csv -function does not recognize or diagnose any errors; it extracts the +function reads characters from the source stream as if using the +.code get-char +function. +If at least one character from the input source, a record shall be returned. +If no characters can be read due to end-of-file, it returns +.codn nil . + +The function does not recognize or diagnose any errors; it extracts the maximal prefix of the input source which constitutes a valid CSV record. Characters not belonging to the CSV record remain in the stream. Multiple calls to |