From 0218ea2a9b80dffebf691ef7a4ccf3f459a8357f Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Fri, 17 Apr 2020 06:42:40 -0700 Subject: unicode: character width upkeep. Updating the regex for matching code points corresponding to wide and full width characters, with regard to the old 1998 document: http://www.unicode.org/reports/tr11-2/ More to follow. I neglected to comment where the original data came from, and neglected to comment it. In some cases it has more coverage than the 1998 document; in some cases less. * regex.c (create_wide_cs): Extending the 1100-115F range to 11F9 to cover all of Korean Hangeul. Replace two occurrences of 3000-303E with one 3000-303F. Merge 3250-32FE with 3300-4DB5, and extend to 4DBF. Add private use range E000-E757. --- regex.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/regex.c b/regex.c index a4dc69eb..2de3daa2 100644 --- a/regex.c +++ b/regex.c @@ -3226,14 +3226,13 @@ static char_set_t *create_wide_cs(void) char_set_t *cs = char_set_create(cst, 0, 1); - char_set_add_range(cs, 0x1100, 0x115F); + char_set_add_range(cs, 0x1100, 0x11F9); char_set_add_range(cs, 0x2329, 0x232A); char_set_add_range(cs, 0x2E80, 0x2E99); char_set_add_range(cs, 0x2E9B, 0x2EF3); char_set_add_range(cs, 0x2F00, 0x2FD5); char_set_add_range(cs, 0x2FF0, 0x2FFB); - char_set_add_range(cs, 0x3000, 0x303E); - char_set_add_range(cs, 0x3000, 0x303E); + char_set_add_range(cs, 0x3000, 0x303F); char_set_add_range(cs, 0x3041, 0x3096); char_set_add_range(cs, 0x3099, 0x30FF); char_set_add_range(cs, 0x3105, 0x312D); @@ -3242,13 +3241,13 @@ static char_set_t *create_wide_cs(void) char_set_add_range(cs, 0x31C0, 0x31E3); char_set_add_range(cs, 0x31F0, 0x321E); char_set_add_range(cs, 0x3220, 0x3247); - char_set_add_range(cs, 0x3250, 0x32FE); - char_set_add_range(cs, 0x3300, 0x4DB5); + char_set_add_range(cs, 0x3250, 0x4DBF); char_set_add_range(cs, 0x4E00, 0x9FFF); char_set_add_range(cs, 0xA000, 0xA48C); char_set_add_range(cs, 0xA490, 0xA4C6); char_set_add_range(cs, 0xA960, 0xA97C); char_set_add_range(cs, 0xAC00, 0xD7A3); + char_set_add_range(cs, 0xE000, 0xE757); char_set_add_range(cs, 0xF900, 0xFAFF); char_set_add_range(cs, 0xFE10, 0xFE19); char_set_add_range(cs, 0xFE30, 0xFE52); -- cgit v1.2.3