utf8: decode: reduce strictness of full unicode check.

* utf8.c (utf8_from_buf, utf8_deocde): On 16 bit wchar_t, we dont' have to throw on every value in the range 0xF0-0xFF. Only the values 0xF0 through 0xF4 are potential UTF-8 bytes; so we only need to error out on those. 0xF5 through 0xFF are invalid bytes, which we can map into the 0xDCNN range.
author: Kaz Kylheku <kaz@kylheku.com> 2021-04-20 07:45:30 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2021-04-20 07:45:30 -0700
commit: 222adfcb6c232f4b91260e9253ae70af74274371 (patch)
tree: 224f5617e347fa39b371eb7716567187b34b8480
parent: 48808c951895bcc48ddb0fc3f406993d69620470 (diff)
download: txr-222adfcb6c232f4b91260e9253ae70af74274371.tar.gz
txr-222adfcb6c232f4b91260e9253ae70af74274371.tar.bz2
txr-222adfcb6c232f4b91260e9253ae70af74274371.zip
1 files changed, 4 insertions, 4 deletions
diff --git a/utf8.c b/utf8.c
index c23eefce..0d484f4f 100644
--- a/utf8.c
+++ b/utf8.c
@@ -84,16 +84,16 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
         wch_min = 0x800;
         break;
       case 0xF:
-#ifdef FULL_UNICODE
         if (ch < 0xF5) {
+#ifdef FULL_UNICODE
           state = utf8_more3;
           wch = (ch & 0x7);
           wch_min = 0x10000;
           break;
-        }
 #else
         conversion_error();
 #endif
+        }
         /* fallthrough */
       default:
         if (wdst)
@@ -317,16 +317,16 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx)
         ud->wch_min = 0x800;
         break;
       case 0xF:
-#ifdef FULL_UNICODE
         if (ch < 0xF5) {
+#ifdef FULL_UNICODE
           ud->state = utf8_more3;
           ud->wch = (ch & 0x7);
           ud->wch_min = 0x10000;
           break;
-        }
 #else
         conversion_error();
 #endif
+        }
         /* fallthrough */
       default:
         ud->back = ud->tail;
author	Kaz Kylheku <kaz@kylheku.com>	2021-04-20 07:45:30 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2021-04-20 07:45:30 -0700
commit	222adfcb6c232f4b91260e9253ae70af74274371 (patch)
tree	224f5617e347fa39b371eb7716567187b34b8480
parent	48808c951895bcc48ddb0fc3f406993d69620470 (diff)
download	txr-222adfcb6c232f4b91260e9253ae70af74274371.tar.gz txr-222adfcb6c232f4b91260e9253ae70af74274371.tar.bz2 txr-222adfcb6c232f4b91260e9253ae70af74274371.zip