From b111bddfcc737b7f4ab854ce823094bcc8a9de48 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 4 Mar 2021 01:10:01 -0800 Subject: lib: defend against locale-specific wcstod. The wcstod function has locale-specific behavior. It uses a locale-specific decimal separator character, which may not be the period. Though in TXR we never call setlocale in order to activate localization, it is a good idea to put in code to defend against this. If locale is ever unleashed on the code, it really botches our floating-point handling. However, let's keep that defensive logic disabled for now using the preprocessor. The strategy is to inquire about the locale's decimal character at startup. Then, in the flo_str function, we preprocess the input by converting the decimal period to the locale-specific character before calling wcstod. On the output side, we also deal with it in the format function; we call sprintf, and then convert the locale-specific characer to period. I tested all this by temporarily introducing the setlocale call, and switching to a locale with a comma as the separator, geting make tests to pass, and doing some interactive testing. This is not going to receive coverage in the test suite. * lib.c (dec_point): New global variable. (flo_str): If dec_point isn't '.', we must copy the string into a local buffer, which we get from alloca, and edit any '.' that it contains to dec_point. (locale_init): New function; initializes dec_point. (init): Call locale_init. * lib.h (dec_point): Declared. * stream.c (formatv): Don't look for a '.' in the result of printing a double using %e; look for dec_point. Post-process floating-point sprinf by substituting occurrences of dec_point with the period. --- lib.c | 34 ++++++++++++++++++++++++++++++++-- lib.h | 2 ++ stream.c | 21 ++++++++++++++++++++- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/lib.c b/lib.c index f6c57552..338ff9fc 100644 --- a/lib.c +++ b/lib.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "config.h" #include "alloca.h" #if HAVE_GETENVIRONMENTSTRINGS @@ -130,6 +131,10 @@ val list_f, less_f, greater_f; val prog_string; +#if CONFIG_LOCALE_TOLERANCE +char dec_point = '.'; +#endif + static val recycled_conses; const seq_kind_t seq_kind_tab[MAXTYPE+1] = { @@ -5402,9 +5407,21 @@ val flo_str(val str) { const wchar_t *wcs = c_str(str); wchar_t *ptr; + double value; + +#if CONFIG_LOCALE_TOLERANCE + if (dec_point != '.') { + size_t size = c_unum(length_str(str), lit("flot-str")) + 1; + wchar_t *wcopy = alloca(sizeof *wcopy * size), *dot = wcopy; + wmemcpy(wcopy, wcs, size); + wcs = wcopy; + while ((dot = wcschr(dot, '.')) != 0) + *dot++ = dec_point; + } +#endif + + value = wcstod(wcs, &ptr); - /* TODO: detect if we have wcstod */ - double value = wcstod(wcs, &ptr); if (value == 0 && ptr == wcs) return nil; if ((value == HUGE_VAL || value == -HUGE_VAL) && errno == ERANGE) @@ -11888,6 +11905,16 @@ val in_range_star(val range, val num) } } +#if CONFIG_LOCALE_TOLERANCE + +static void locale_init(void) +{ + struct lconv *lc = localeconv(); + dec_point = *lc->decimal_point; +} + +#endif + static void obj_init(void) { /* @@ -12970,6 +12997,9 @@ void init(val *stack_bottom) t = one; gc_init(stack_bottom); +#if CONFIG_LOCALE_TOLERANCE + locale_init(); +#endif obj_init(); uw_init(); eval_init(); diff --git a/lib.h b/lib.h index e9c1bd16..3a519905 100644 --- a/lib.h +++ b/lib.h @@ -538,6 +538,8 @@ extern val list_f, less_f, greater_f; extern val prog_string; +extern char dec_point; + #if HAVE_ULONGLONG_T typedef ulonglong_t alloc_bytes_t; #define SIZEOF_ALLOC_BYTES_T SIZEOF_LONGLONG_T diff --git a/stream.c b/stream.c index ef2c0a29..07a0060c 100644 --- a/stream.c +++ b/stream.c @@ -3494,7 +3494,11 @@ val formatv(val stream_in, val fmtstr, struct args *al) if (ch == 'e') { sprintf(num_buf, "%.*e", precision, n); { +#if CONFIG_LOCALE_TOLERANCE + char *dec = strchr(num_buf, dec_point); +#else char *dec = strchr(num_buf, '.'); +#endif char *exp = strchr(dec ? dec : num_buf, 'e'); if (exp) { @@ -3525,6 +3529,13 @@ val formatv(val stream_in, val fmtstr, struct args *al) continue; } precision = (width ? width - 1 : 0); +#if CONFIG_LOCALE_TOLERANCE + if (dec_point != '.') { + char *dot = num_buf; + while ((dot = strchr(dot, dec_point)) != 0) + *dot++ = '.'; + } +#endif goto output_num; } case 'd': @@ -3584,6 +3595,14 @@ val formatv(val stream_in, val fmtstr, struct args *al) sprintf(num_buf, "%.*g", precision, obj->fl.n); +#if CONFIG_LOCALE_TOLERANCE + if (dec_point != '.') { + char *dot = num_buf; + while ((dot = strchr(dot, dec_point)) != 0) + *dot++ = '.'; + } +#endif + { char *dec = strchr(num_buf, '.'); char *exp = strchr(dec ? dec : num_buf, 'e'); @@ -3606,7 +3625,7 @@ val formatv(val stream_in, val fmtstr, struct args *al) } if (ch == 's' && (!precision_p || precision > 0) && !dec && !exp) - strcat(num_buf, ".0"); + strcat(num_buf, ".0"); } if (!isdigit(num_buf[0]) && !isdigit(num_buf[1])) { -- cgit v1.2.3