From c752742366d6ab0e69067243500a76ee7e9f16ae Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Mon, 17 Dec 2018 09:47:01 -0800
Subject: UTF-8: fix incorrect decoding of four-byte sequences.

utf8.c (utf8_decode): The wch_min value is set incorrectly for
the four byte case due to an extra zero; it should be only
0x10000.  Code points encoded to four utf8 bytes start at this value.
The consequence of this error is that utf8-encoded characters
in this range are treated as invalid bytes after being decoded
due to failing the range test.
---
 utf8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utf8.c b/utf8.c
index 3ddc74a5..eaef3864 100644
--- a/utf8.c
+++ b/utf8.c
@@ -324,7 +324,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx)
         if (ch < 0xF5) {
           ud->state = utf8_more3;
           ud->wch = (ch & 0x7);
-          ud->wch_min = 0x100000;
+          ud->wch_min = 0x10000;
           break;
         }
         /* fallthrough */
-- 
cgit v1.2.3