diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2018-12-17 09:47:01 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2018-12-17 09:47:01 -0800 |
commit | c752742366d6ab0e69067243500a76ee7e9f16ae (patch) | |
tree | 4534d351bd0b3ff06d7e2009b9381172cc51244d | |
parent | 2dfcdb848b2e9504227c6a86bc497a100808d125 (diff) | |
download | txr-c752742366d6ab0e69067243500a76ee7e9f16ae.tar.gz txr-c752742366d6ab0e69067243500a76ee7e9f16ae.tar.bz2 txr-c752742366d6ab0e69067243500a76ee7e9f16ae.zip |
UTF-8: fix incorrect decoding of four-byte sequences.
utf8.c (utf8_decode): The wch_min value is set incorrectly for
the four byte case due to an extra zero; it should be only
0x10000. Code points encoded to four utf8 bytes start at this value.
The consequence of this error is that utf8-encoded characters
in this range are treated as invalid bytes after being decoded
due to failing the range test.
-rw-r--r-- | utf8.c | 2 |
1 files changed, 1 insertions, 1 deletions
@@ -324,7 +324,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx) if (ch < 0xF5) { ud->state = utf8_more3; ud->wch = (ch & 0x7); - ud->wch_min = 0x100000; + ud->wch_min = 0x10000; break; } /* fallthrough */ |