From 6949749e00019594d17a2dd7788dadd1663aff64 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sat, 4 Feb 2012 22:22:57 +0100 Subject: * utf8.c (utf8_from_uc): Bugfix: incorrect condition in character range check (less than minimum *and* U+DCxx, rather than *or*). Also, we must check for out of range characters. UTF-8 sequences beginning with F4 can code beyond 0x10FFFF. (utf8_decode): Check for characters beyond 0x10FFFF. --- ChangeLog | 8 ++++++++ utf8.c | 8 +++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6840d7d7..2abc8c11 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2012-02-04 Kaz Kylheku + + * utf8.c (utf8_from_uc): Bugfix: incorrect condition in character + range check (less than minimum *and* U+DCxx, rather than *or*). + Also, we must check for out of range characters. UTF-8 sequences + beginning with F4 can code beyond 0x10FFFF. + (utf8_decode): Check for characters beyond 0x10FFFF. + 2012-02-03 Kaz Kylheku * eval.c (rest_s, op_s): New variables. diff --git a/utf8.c b/utf8.c index 0c9c109c..d4ca3513 100644 --- a/utf8.c +++ b/utf8.c @@ -104,8 +104,9 @@ size_t utf8_from_uc(wchar_t *wdst, const unsigned char *src) wch |= (ch & 0x3F); state = (enum utf8_state) (state - 1); if (state == utf8_init) { - if (wch < wch_min && - (wch <= 0xFFFF && (wch & 0xFF00) == 0xDC00)) + if (wch < wch_min || + (wch <= 0xFFFF && (wch & 0xFF00) == 0xDC00) || + (wch > 0x10FFFF)) { src = backtrack; if (wdst) @@ -311,7 +312,8 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx) ud->state = (enum utf8_state) (ud->state - 1); if (ud->state == utf8_init) { if (ud->wch < ud->wch_min || - (ud->wch <= 0xFFFF && (ud->wch & 0xFF00) == 0xDC00)) + (ud->wch <= 0xFFFF && (ud->wch & 0xFF00) == 0xDC00) || + (ud->wch > 0x10FFFF)) { wchar_t wch = 0xDC00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; -- cgit v1.2.3