summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-04-20 07:45:30 -0700
committerKaz Kylheku <kaz@kylheku.com>2021-04-20 07:45:30 -0700
commit222adfcb6c232f4b91260e9253ae70af74274371 (patch)
tree224f5617e347fa39b371eb7716567187b34b8480
parent48808c951895bcc48ddb0fc3f406993d69620470 (diff)
downloadtxr-222adfcb6c232f4b91260e9253ae70af74274371.tar.gz
txr-222adfcb6c232f4b91260e9253ae70af74274371.tar.bz2
txr-222adfcb6c232f4b91260e9253ae70af74274371.zip
utf8: decode: reduce strictness of full unicode check.
* utf8.c (utf8_from_buf, utf8_deocde): On 16 bit wchar_t, we dont' have to throw on every value in the range 0xF0-0xFF. Only the values 0xF0 through 0xF4 are potential UTF-8 bytes; so we only need to error out on those. 0xF5 through 0xFF are invalid bytes, which we can map into the 0xDCNN range.
-rw-r--r--utf8.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/utf8.c b/utf8.c
index c23eefce..0d484f4f 100644
--- a/utf8.c
+++ b/utf8.c
@@ -84,16 +84,16 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
wch_min = 0x800;
break;
case 0xF:
-#ifdef FULL_UNICODE
if (ch < 0xF5) {
+#ifdef FULL_UNICODE
state = utf8_more3;
wch = (ch & 0x7);
wch_min = 0x10000;
break;
- }
#else
conversion_error();
#endif
+ }
/* fallthrough */
default:
if (wdst)
@@ -317,16 +317,16 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx)
ud->wch_min = 0x800;
break;
case 0xF:
-#ifdef FULL_UNICODE
if (ch < 0xF5) {
+#ifdef FULL_UNICODE
ud->state = utf8_more3;
ud->wch = (ch & 0x7);
ud->wch_min = 0x10000;
break;
- }
#else
conversion_error();
#endif
+ }
/* fallthrough */
default:
ud->back = ud->tail;