diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2010-02-12 17:46:39 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2010-02-12 17:46:39 +0000 |
commit | 7bd2296c8384f80585ace60b9d67cff4cab2cc6f (patch) | |
tree | 3d0527ee5a1c2c135d57d7c6151087d7c5bed5e6 /winsup/cygwin/regex/regexec.c | |
parent | e122c47112e7830e156fffb469ed389ecde0151a (diff) | |
download | cygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.tar.gz cygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.tar.bz2 cygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.zip |
* regex/regcomp.c (xwcrtomb): New function to convert wide chars
outside of the base plane to UTF-8. Call throughout instead of
wcrtomb.
(wgetnext): Handle surrogate pairs on UTF-16 systems.
* regex/regexec.c (xmbrtowc): Ditto.
Diffstat (limited to 'winsup/cygwin/regex/regexec.c')
-rw-r--r-- | winsup/cygwin/regex/regexec.c | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c index 6195e508c..788ef5eeb 100644 --- a/winsup/cygwin/regex/regexec.c +++ b/winsup/cygwin/regex/regexec.c @@ -84,8 +84,24 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) if (wi != NULL) *wi = dummy; return (1); - } else + } else { + if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) { + /* UTF-16 surrogate pair. Fetch second half and + compute UTF-32 value */ + int n2 = mbrtowc(&wc, s + nr, n - nr, mbs); + if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) { + memset(mbs, 0, sizeof(*mbs)); + if (wi != NULL) + *wi = dummy; + return (1); + } + if (wi != NULL) + *wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff)) + + 0x10000; + nr += n2; + } return (nr); + } } static __inline size_t |