diff options
Diffstat (limited to 'newlib/libc/stdlib/wctomb_r.c')
-rw-r--r-- | newlib/libc/stdlib/wctomb_r.c | 464 |
1 files changed, 301 insertions, 163 deletions
diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index 8d6d3fc92..64210f232 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -4,11 +4,11 @@ #include <wchar.h> #include <locale.h> #include "mbctype.h" +#include "local.h" -extern char *__locale_charset (); - -/* for some conversions, we use the __count field as a place to store a state value */ -#define __state __count +int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset, + mbstate_t *) + = __ascii_wctomb; int _DEFUN (_wctomb_r, (r, s, wchar, state), @@ -17,196 +17,287 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), wchar_t _wchar _AND mbstate_t *state) { + return __wctomb (r, s, _wchar, __locale_charset (), state); +} + +int +_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ /* Avoids compiler warnings about comparisons that are always false due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t) is 4, as is the case on cygwin. */ wint_t wchar = _wchar; - if (strlen (__locale_charset ()) <= 1) - { /* fall-through */ } - else if (!strcmp (__locale_charset (), "UTF-8")) + if (s == NULL) + return 0; + + if ((size_t)wchar >= 0x100) { - if (s == NULL) - return 0; /* UTF-8 encoding is not state-dependent */ + r->_errno = EILSEQ; + return -1; + } + + *s = (char) wchar; + return 1; +} - if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff)) +#ifdef _MB_CAPABLE +/* for some conversions, we use the __count field as a place to store a state value */ +#define __state __count + +int +_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + if (s == NULL) + return 0; /* UTF-8 encoding is not state-dependent */ + + if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff)) + { + /* At this point only the second half of a surrogate pair is valid. */ + r->_errno = EILSEQ; + return -1; + } + if (wchar <= 0x7f) + { + *s = wchar; + return 1; + } + if (wchar >= 0x80 && wchar <= 0x7ff) + { + *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 2; + } + if (wchar >= 0x800 && wchar <= 0xffff) + { + if (wchar >= 0xd800 && wchar <= 0xdfff) { - /* At this point only the second half of a surrogate pair is valid. */ - r->_errno = EILSEQ; - return -1; - } - if (wchar <= 0x7f) - { - *s = wchar; - return 1; - } - else if (wchar >= 0x80 && wchar <= 0x7ff) - { - *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 2; - } - else if (wchar >= 0x800 && wchar <= 0xffff) - { - if (wchar >= 0xd800 && wchar <= 0xdfff) + wint_t tmp; + /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ + if (sizeof (wchar_t) != 2) + { + r->_errno = EILSEQ; + return -1; + } + if (wchar >= 0xdc00) { - wint_t tmp; - /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ - if (sizeof (wchar_t) != 2) + /* Second half of a surrogate pair. It's not valid if + we don't have already read a first half of a surrogate + before. */ + if (state->__count != -4) { r->_errno = EILSEQ; return -1; } - if (wchar >= 0xdc00) - { - /* Second half of a surrogate pair. It's not valid if - we don't have already read a first half of a surrogate - before. */ - if (state->__count != -4) - { - r->_errno = EILSEQ; - return -1; - } - /* If it's valid, reconstruct the full Unicode value and - return the trailing three bytes of the UTF-8 char. */ - tmp = (state->__value.__wchb[0] << 16) - | (state->__value.__wchb[1] << 8) - | (wchar & 0x3ff); - state->__count = 0; - *s++ = 0x80 | ((tmp & 0x3f000) >> 12); - *s++ = 0x80 | ((tmp & 0xfc0) >> 6); - *s = 0x80 | (tmp & 0x3f); - return 3; - } - /* First half of a surrogate pair. Store the state and return - the first byte of the UTF-8 char. */ - tmp = ((wchar & 0x3ff) << 10) + 0x10000; - state->__value.__wchb[0] = (tmp >> 16) & 0xff; - state->__value.__wchb[1] = (tmp >> 8) & 0xff; - state->__count = -4; - *s = (0xf0 | ((tmp & 0x1c0000) >> 18)); - return 1; + /* If it's valid, reconstruct the full Unicode value and + return the trailing three bytes of the UTF-8 char. */ + tmp = (state->__value.__wchb[0] << 16) + | (state->__value.__wchb[1] << 8) + | (wchar & 0x3ff); + state->__count = 0; + *s++ = 0x80 | ((tmp & 0x3f000) >> 12); + *s++ = 0x80 | ((tmp & 0xfc0) >> 6); + *s = 0x80 | (tmp & 0x3f); + return 3; } - *s++ = 0xe0 | ((wchar & 0xf000) >> 12); - *s++ = 0x80 | ((wchar & 0xfc0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 3; - } - else if (wchar >= 0x10000 && wchar <= 0x10ffff) - { - *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); - *s++ = 0x80 | ((wchar & 0x3f000) >> 12); - *s++ = 0x80 | ((wchar & 0xfc0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 4; - } + /* First half of a surrogate pair. Store the state and return + the first byte of the UTF-8 char. */ + tmp = ((wchar & 0x3ff) << 10) + 0x10000; + state->__value.__wchb[0] = (tmp >> 16) & 0xff; + state->__value.__wchb[1] = (tmp >> 8) & 0xff; + state->__count = -4; + *s = (0xf0 | ((tmp & 0x1c0000) >> 18)); + return 1; + } + *s++ = 0xe0 | ((wchar & 0xf000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 3; + } + if (wchar >= 0x10000 && wchar <= 0x10ffff) + { + *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); + *s++ = 0x80 | ((wchar & 0x3f000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 4; + } + + r->_errno = EILSEQ; + return -1; +} + +/* Cygwin defines its own doublebyte charset conversion functions + because the underlying OS requires wchar_t == UTF-16. */ +#ifndef __CYGWIN__ +int +_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 0; /* not state-dependent */ + + if (char1 != 0x00) + { + /* first byte is non-zero..validate multi-byte char */ + if (_issjis1(char1) && _issjis2(char2)) + { + *s++ = (char)char1; + *s = (char)char2; + return 2; + } else { r->_errno = EILSEQ; return -1; } } - else if (!strcmp (__locale_charset (), "SJIS")) + *s = (char) wchar; + return 1; +} + +int +_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 0; /* not state-dependent */ + + if (char1 != 0x00) { - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 0; /* not state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_issjis1(char1) && _issjis2(char2)) - { - *s++ = (char)char1; - *s = (char)char2; - return 2; - } - else - { - r->_errno = EILSEQ; - return -1; - } - } + /* first byte is non-zero..validate multi-byte char */ + if (_iseucjp (char1) && _iseucjp (char2)) + { + *s++ = (char)char1; + *s = (char)char2; + return 2; + } + else + { + r->_errno = EILSEQ; + return -1; + } } - else if (!strcmp (__locale_charset (), "EUCJP")) + *s = (char) wchar; + return 1; +} + +int +_DEFUN (__jis_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + int cnt = 0; + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 1; /* state-dependent */ + + if (char1 != 0x00) { - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 0; /* not state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_iseucjp (char1) && _iseucjp (char2)) - { - *s++ = (char)char1; - *s = (char)char2; - return 2; - } - else + /* first byte is non-zero..validate multi-byte char */ + if (_isjis (char1) && _isjis (char2)) + { + if (state->__state == 0) { - r->_errno = EILSEQ; - return -1; + /* must switch from ASCII to JIS state */ + state->__state = 1; + *s++ = ESC_CHAR; + *s++ = '$'; + *s++ = 'B'; + cnt = 3; } - } + *s++ = (char)char1; + *s = (char)char2; + return cnt + 2; + } + r->_errno = EILSEQ; + return -1; } - else if (!strcmp (__locale_charset (), "JIS")) + if (state->__state != 0) { - int cnt = 0; - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 1; /* state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_isjis (char1) && _isjis (char2)) - { - if (state->__state == 0) - { - /* must switch from ASCII to JIS state */ - state->__state = 1; - *s++ = ESC_CHAR; - *s++ = '$'; - *s++ = 'B'; - cnt = 3; - } - *s++ = (char)char1; - *s = (char)char2; - return cnt + 2; - } - else - { - r->_errno = EILSEQ; - return -1; - } - } - else - { - if (state->__state != 0) - { - /* must switch from JIS to ASCII state */ - state->__state = 0; - *s++ = ESC_CHAR; - *s++ = '('; - *s++ = 'B'; - cnt = 3; - } - *s = (char)char2; - return cnt + 1; - } + /* must switch from JIS to ASCII state */ + state->__state = 0; + *s++ = ESC_CHAR; + *s++ = '('; + *s++ = 'B'; + cnt = 3; } + *s = (char)char2; + return cnt + 1; +} +#endif /* !__CYGWIN__ */ + +#ifdef _MB_EXTENDED_CHARSETS_ISO +int +_DEFUN (__iso_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; if (s == NULL) return 0; + + /* wchars <= 0x9f translate to all ISO charsets directly. */ + if (wchar >= 0xa0) + { + int iso_idx = __iso_8859_index (charset + 9); + if (iso_idx >= 0) + { + unsigned char mb; + + if (s == NULL) + return 0; + + for (mb = 0; mb < 0x60; ++mb) + if (__iso_8859_conv[iso_idx][mb] == wchar) + { + *s = (char) (mb + 0xa0); + return 1; + } + r->_errno = EILSEQ; + return -1; + } + } - /* otherwise we are dealing with a single byte character */ if ((size_t)wchar >= 0x100) { r->_errno = EILSEQ; @@ -216,4 +307,51 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), *s = (char) wchar; return 1; } - +#endif /* _MB_EXTENDED_CHARSETS_ISO */ + +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS +int +_DEFUN (__cp_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + if (s == NULL) + return 0; + + if (wchar >= 0x80) + { + int cp_idx = __cp_index (charset + 2); + if (cp_idx >= 0) + { + unsigned char mb; + + if (s == NULL) + return 0; + + for (mb = 0; mb < 0x80; ++mb) + if (__cp_conv[cp_idx][mb] == wchar) + { + *s = (char) (mb + 0x80); + return 1; + } + r->_errno = EILSEQ; + return -1; + } + } + + if ((size_t)wchar >= 0x100) + { + r->_errno = EILSEQ; + return -1; + } + + *s = (char) wchar; + return 1; +} +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif /* _MB_CAPABLE */ |