diff options
author | Thomas Fitzsimmons <fitzsim@redhat.com> | 2002-04-24 20:53:30 +0000 |
---|---|---|
committer | Thomas Fitzsimmons <fitzsim@redhat.com> | 2002-04-24 20:53:30 +0000 |
commit | eaa75b70e31aed8ba2823c7b37ff92230dd8c451 (patch) | |
tree | 255a744fbdffc3197c3f971ed1edae29c71a6134 /newlib/libc/stdlib | |
parent | b9f9f699372fae9f3b7926fa165d07fe68a949fb (diff) | |
download | cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.tar.gz cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.tar.bz2 cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.zip |
* Makefile.am (check-DEJAGNU): New target.
(site.exp): Likewise.
* acinclude.m4 (NEWLIB_CONFIGURE): Replace AC_CANONICAL_HOST
with AC_CANONICAL_SYSTEM. Remove AC_CANONICAL_BUILD.
* libc/locale/locale.c (_setlocale_r): Add UTF-8 support.
* libc/stdlib/mbtowc_r.c (_mbtowc_r): Likewise.
* libc/stdlib/wctomb_r.c (_wctomb_r): Likewise.
* testsuite: New directory.
* testsuite/config: Likewise.
* testsuite/lib: Likewise.
* testsuite/newlib.locale: Likewise.
* testsuite/newlib.string: Likewise.
* testsuite/config/default.exp: New file.
* testsuite/lib/checkoutput.exp: New file.
* testsuite/lib/newlib.exp: New file.
* testsuite/lib/passfail.exp: New file.
* testsuite/newlib.locale/UTF-8.c: New file.
* testsuite/newlib.locale/UTF-8.exp: New file.
* testsuite/newlib.locale/locale.exp: New file.
* testsuite/newlib.string/string.exp: New file.
* testsuite/newlib.string/tstring.c: New file.
Diffstat (limited to 'newlib/libc/stdlib')
-rw-r--r-- | newlib/libc/stdlib/Makefile.in | 4 | ||||
-rw-r--r-- | newlib/libc/stdlib/mbtowc_r.c | 187 | ||||
-rw-r--r-- | newlib/libc/stdlib/wctomb_r.c | 57 |
3 files changed, 248 insertions, 0 deletions
diff --git a/newlib/libc/stdlib/Makefile.in b/newlib/libc/stdlib/Makefile.in index 8ae3d154d..b29bfe6f5 100644 --- a/newlib/libc/stdlib/Makefile.in +++ b/newlib/libc/stdlib/Makefile.in @@ -58,8 +58,12 @@ POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : +build_alias = @build_alias@ +build_triplet = @build@ host_alias = @host_alias@ host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ AR = @AR@ AS = @AS@ CC = @CC@ diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index 4bf302359..866789f16 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -66,6 +66,193 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), if (r->_current_locale == NULL || (strlen (r->_current_locale) <= 1)) { /* fall-through */ } + else if (!strcmp (r->_current_locale, "UTF-8")) + { + wchar_t char1 = 0; + + if (s == NULL) + return 0; /* UTF-8 character encodings are not state-dependent */ + + /* we know n >= 1 if we get here */ + *pwc = 0; + char1 = (wchar_t)*t; + + if (char1 == '\0') + return 0; /* s points to the null character */ + + if (char1 >= 0x0 && char1 <= 0x7f) + { + /* single-byte sequence */ + *pwc = char1; + return 1; + } + else if (char1 >= 0xc0 && char1 <= 0xdf) + { + /* two-byte sequence */ + if (n >= 2) + { + wchar_t char2 = (wchar_t)*(t+1); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + + if (char1 < 0xc2) + /* overlong UTF-8 sequence */ + return -1; + + *pwc = ((char1 & 0x1f) << 6) + | (char2 & 0x3f); + return 2; + } + else + return -1; + } + else if (char1 >= 0xe0 && char1 <= 0xef) + { + /* three-byte sequence */ + if (n >= 3) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + + if (char1 == 0xe0) + { + if (char2 < 0xa0) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x0f) << 12) + | ((char2 & 0x3f) << 6) + | (char3 & 0x3f); + + if (*pwc >= 0xd800 && *pwc <= 0xdfff) + { + return -1; + } + else + return 3; + } + else + return -1; + } + else if (char1 >= 0xf0 && char1 <= 0xf7) + { + /* four-byte sequence */ + if (n >= 4) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + + if (char1 == 0xf0) + { + if (char2 < 0x90) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x07) << 18) + | ((char2 & 0x3f) << 12) + | ((char3 & 0x3f) << 6) + | (char4 & 0x3f); + + return 4; + } + else + return -1; + } + else if (char1 >= 0xf8 && char1 <= 0xfb) + { + /* five-byte sequence */ + if (n >= 5) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + wchar_t char5 = (wchar_t)*(t+4); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + if (char5 < 0x80 || char5 > 0xbf) + return -1; + + if (char1 == 0xf8) + { + if (char2 < 0x88) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x03) << 24) + | ((char2 & 0x3f) << 18) + | ((char3 & 0x3f) << 12) + | ((char4 & 0x3f) << 6) + | (char5 & 0x3f); + return 5; + } + else + return -1; + } + else if (char1 >= 0xfc && char1 <= 0xfd) + { + /* six-byte sequence */ + if (n >= 6) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + wchar_t char5 = (wchar_t)*(t+4); + wchar_t char6 = (wchar_t)*(t+5); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + if (char5 < 0x80 || char5 > 0xbf) + return -1; + if (char6 < 0x80 || char6 > 0xbf) + return -1; + + if (char1 == 0xfc) + { + if (char2 < 0x84) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x01) << 30) + | ((char2 & 0x3f) << 24) + | ((char3 & 0x3f) << 18) + | ((char4 & 0x3f) << 12) + | ((char5 & 0x3f) << 6) + | (char6 & 0x3f); + return 6; + } + else + return -1; + } + else + return -1; + } else if (!strcmp (r->_current_locale, "C-SJIS")) { int char1; diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index b382c995f..991e0a610 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -12,6 +12,63 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), { if (strlen (r->_current_locale) <= 1) { /* fall-through */ } + else if (!strcmp (r->_current_locale, "UTF-8")) + { + if (s == NULL) + return 0; /* UTF-8 encoding is not state-dependent */ + + if (wchar <= 0x7f) + { + *s = wchar; + return 1; + } + else if (wchar >= 0x80 && wchar <= 0x7ff) + { + *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 2; + } + else if (wchar >= 0x800 && wchar <= 0xffff) + { + /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ + if (wchar >= 0xd800 && wchar <= 0xdfff) + return -1; + + *s++ = 0xe0 | ((wchar & 0xf000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 3; + } + else if (wchar >= 0x10000 && wchar <= 0x1fffff) + { + *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); + *s++ = 0x80 | ((wchar & 0x3f000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 4; + } + else if (wchar >= 0x200000 && wchar <= 0x3ffffff) + { + *s++ = 0xf8 | ((wchar & 0x3000000) >> 24); + *s++ = 0x80 | ((wchar & 0xfc0000) >> 18); + *s++ = 0x80 | ((wchar & 0x3f000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 5; + } + else if (wchar >= 0x4000000 && wchar <= 0x7fffffff) + { + *s++ = 0xfc | ((wchar & 0x40000000) >> 30); + *s++ = 0x80 | ((wchar & 0x3f000000) >> 24); + *s++ = 0x80 | ((wchar & 0xfc0000) >> 18); + *s++ = 0x80 | ((wchar & 0x3f000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 6; + } + else + return -1; + } else if (!strcmp (r->_current_locale, "C-SJIS")) { unsigned char char2 = (unsigned char)wchar; |