summaryrefslogtreecommitdiffstats
path: root/winsup/cygwin/regex/regexec.c
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-12 17:46:39 +0000
committerCorinna Vinschen <corinna@vinschen.de>2010-02-12 17:46:39 +0000
commit7bd2296c8384f80585ace60b9d67cff4cab2cc6f (patch)
tree3d0527ee5a1c2c135d57d7c6151087d7c5bed5e6 /winsup/cygwin/regex/regexec.c
parente122c47112e7830e156fffb469ed389ecde0151a (diff)
downloadcygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.tar.gz
cygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.tar.bz2
cygnal-7bd2296c8384f80585ace60b9d67cff4cab2cc6f.zip
* regex/regcomp.c (xwcrtomb): New function to convert wide chars
outside of the base plane to UTF-8. Call throughout instead of wcrtomb. (wgetnext): Handle surrogate pairs on UTF-16 systems. * regex/regexec.c (xmbrtowc): Ditto.
Diffstat (limited to 'winsup/cygwin/regex/regexec.c')
-rw-r--r--winsup/cygwin/regex/regexec.c18
1 files changed, 17 insertions, 1 deletions
diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c
index 6195e508c..788ef5eeb 100644
--- a/winsup/cygwin/regex/regexec.c
+++ b/winsup/cygwin/regex/regexec.c
@@ -84,8 +84,24 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
if (wi != NULL)
*wi = dummy;
return (1);
- } else
+ } else {
+ if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
+ /* UTF-16 surrogate pair. Fetch second half and
+ compute UTF-32 value */
+ int n2 = mbrtowc(&wc, s + nr, n - nr, mbs);
+ if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
+ memset(mbs, 0, sizeof(*mbs));
+ if (wi != NULL)
+ *wi = dummy;
+ return (1);
+ }
+ if (wi != NULL)
+ *wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff))
+ + 0x10000;
+ nr += n2;
+ }
return (nr);
+ }
}
static __inline size_t