diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 12:18:34 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 12:18:34 +0000 |
commit | 161211d186a16e4f090b8b3c63040f0b9aee25d4 (patch) | |
tree | 4ac0e1154417f3b0119ba79407a8c5687d96bf83 /winsup/cygwin/miscfuncs.cc | |
parent | 6a32d500a9d601b4f25cee0e1ec6b2ac5195a7e9 (diff) | |
download | cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.tar.gz cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.tar.bz2 cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.zip |
* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.
(__ctype_default): New character class array for default ASCII
character set.
(__ctype_iso): New array of character class array for ISO charsets.
(__ctype_cp): Ditto for singlebyte Windows codepages.
(tolower): Implement as distinct function to support any singlebyte
charset.
(toupper): Ditto.
(__set_ctype): New function to copy singlebyte character classes
corresponding to current charset to ctype_b array.
Align copyright text to upstream.
* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
* environ.cc (set_file_api_mode): Remove.
(codepage_init): Remove.
(parse_thing): Remove "codepage" setting.
(environ_init): Set locale according to environment settings, or
to current codepage, before converting environment to multibyte.
* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
rather than MultiByteToWideChar.
(fhandler_console::write_replacement_char): Always print a funny
half filled square if a character isn't in the current charset.
(fhandler_console::write_normal): Convert to using __mbtowc
rather than next_char.
* fork.cc (frok::child): Drop call to set_file_api_mode.
* globals.cc (enum codepage_type) Remove.
(current_codepage): Remove.
* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous. Remove.
(cygwin_wcsupr): Ditto.
(is_cp_multibyte): Remove.
(next_char): Remove.
* miscfuncs.h (is_cp_multibyte): Drop declaration.
(next_char): Ditto.
* strfuncs.cc (get_cp): Remove.
(__db_wctomb): New function to implement _wctomb_r functionality for
doublebyte charsets using WideCharToMultiByte.
(__sjis_wctomb): New function to replace unusable newlib function.
(__jis_wctomb): Ditto.
(__eucjp_wctomb): Ditto.
(__gbk_wctomb): New function.
(__kr_wctomb): Ditto.
(__big5_wctomb): Ditto.
(__db_mbtowc): New function to implement _mbtowc_r functionality for
doublebyte charsets using MultiByteToWideChar.
(__sjis_mbtowc): New function to replace unusable newlib function.
(__jis_mbtowc): Ditto.
(__eucjp_mbtowc): Ditto.
(__gbk_mbtowc): New function.
(__kr_mbtowc): New function
(__big5_mbtowc): New function
(__set_charset_from_codepage): New function.
(sys_wcstombs): Reimplement, basically using same wide char to multibyte
conversion as newlib's application level functions. Plus extras.
Add lengthy comment to explain. Change return type to size_t.
(sys_wcstombs_alloc): Just use sys_wcstombs. Change return type to
size_t.
(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
argument. Explain why. Change return type to size_t.
(sys_mbstowcs_alloc): Just use sys_mbstowcs. Change return type to
size_t.
* wchar.h: Declare internal functions implemented in strfuncs.cc.
(wcscasecmp): Remove.
(wcsncasecmp): Remove.
(wcslwr): Remove.
(wcsupr): Remove.
* winsup.h (codepage_init): Remove declaration.
(get_cp): Ditto.
(sys_wcstombs): Align declaration to new implementation.
(sys_wcstombs_alloc): Ditto.
(sys_cp_mbstowcs): Add declaration.
(sys_mbstowcs): Define as inline function.
(sys_mbstowcs_alloc): Align declaration to new implementation.
(set_file_api_mode): Remove declaration.
* include/ctype.h (isblank): Redefine to use _B character class.
(toupper): Remove ASCII-only definition.
(tolower): Ditto.
Diffstat (limited to 'winsup/cygwin/miscfuncs.cc')
-rw-r--r-- | winsup/cygwin/miscfuncs.cc | 132 |
1 files changed, 0 insertions, 132 deletions
diff --git a/winsup/cygwin/miscfuncs.cc b/winsup/cygwin/miscfuncs.cc index 845575edf..29b8159ad 100644 --- a/winsup/cygwin/miscfuncs.cc +++ b/winsup/cygwin/miscfuncs.cc @@ -141,26 +141,6 @@ cygwin_strncasecmp (const char *cs, const char *ct, size_t n) return RtlCompareUnicodeString (&us, &ut, TRUE); } -extern "C" wchar_t * __stdcall -cygwin_wcslwr (wchar_t *string) -{ - UNICODE_STRING us; - - RtlInitUnicodeString (&us, string); - RtlDowncaseUnicodeString (&us, &us, FALSE); - return string; -} - -extern "C" wchar_t * __stdcall -cygwin_wcsupr (wchar_t *string) -{ - UNICODE_STRING us; - - RtlInitUnicodeString (&us, string); - RtlUpcaseUnicodeString (&us, &us, FALSE); - return string; -} - extern "C" char * __stdcall cygwin_strlwr (char *string) { @@ -189,118 +169,6 @@ cygwin_strupr (char *string) return string; } -/* FIXME? We only support standard ANSI/OEM codepages according to - http://www.microsoft.com/globaldev/reference/cphome.mspx as well - as UTF-8 and codepage 1361, which is also mentioned as valid - doublebyte codepage in MSDN man pages (e.g. IsDBCSLeadByteEx). - Everything else will be hosed. */ - -bool -is_cp_multibyte (UINT cp) -{ - switch (cp) - { - case 932: - case 936: - case 949: - case 950: - case 1361: - case 65001: - return true; - } - return false; -} - -/* OMYGOD! CharNextExA is not UTF-8 aware! It only works fine with - double byte charsets. So we have to do it ourselves for UTF-8. - - While being at it, we do more. If a double-byte or multibyte - sequence is truncated due to an early end, we need a way to recognize - it. The reason is that multiple buffered write statements might - accidentally stop and start in the middle of a single character byte - sequence. If we have to interpret the byte sequences (as in - fhandler_console), we would print wrong output in these cases. - - So we have four possible return values here: - - ret = end if str >= end - ret = NULL if we encounter an invalid byte sequence - ret = str if we encounter the start byte of a truncated byte sequence - ret = str + n if we encounter a vaild byte sequence -*/ - -const unsigned char * -next_char (UINT cp, const unsigned char *str, const unsigned char *end) -{ - const unsigned char *ret = NULL; - - if (str >= end) - return end; - - switch (cp) - { - case 932: - case 936: - case 949: - case 950: - case 1361: - if (*str <= 0x7f) - ret = str + 1; - else if (str == end - 1 && IsDBCSLeadByteEx (cp, *str)) - ret = str; - else - ret = (const unsigned char *) CharNextExA (cp, (const CHAR *) str, 0); - break; - case CP_UTF8: - switch (str[0] >> 4) - { - case 0x0 ... 0x7: /* One byte character. */ - ret = str + 1; - break; - case 0x8 ... 0xb: /* Followup byte. Invalid as first byte. */ - ret = NULL; - break; - case 0xc ... 0xd: /* Two byte character. */ - /* Check followup bytes for validity. */ - if (str >= end - 1) - ret = str; - else if (str[1] <= 0xbf) - ret = str + 2; - else - ret = NULL; - break; - case 0xe: /* Three byte character. */ - if (str >= end - 2) - ret = str; - else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80 - && (str[0] != 0xe0 || str[1] >= 0xa0) - && (str[0] != 0xed || str[1] <= 0x9f)) - ret = str + 3; - else - ret = NULL; - break; - case 0xf: /* Four byte character. */ - if (str[0] >= 0xf8) - ret = NULL; - else if (str >= end - 3) - ret = str; - else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80 - && (str[3] & 0xc0) == 0x80 - && (str[0] == 0xf0 || str[1] >= 0x90) - && (str[0] == 0xf4 || str[1] <= 0x8f)) - ret = str + 4; - else - ret = NULL; - break; - } - break; - default: - ret = str + 1; - break; - } - return ret; -} - int __stdcall check_invalid_virtual_addr (const void *s, unsigned sz) { |