diff options
Diffstat (limited to 'newlib/libc/sys/linux/iconv')
30 files changed, 9454 insertions, 0 deletions
diff --git a/newlib/libc/sys/linux/iconv/Makefile.am b/newlib/libc/sys/linux/iconv/Makefile.am new file mode 100644 index 000000000..732644062 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/Makefile.am @@ -0,0 +1,27 @@ +## Process this file with automake to generate Makefile.in + +AUTOMAKE_OPTIONS = cygnus + +INCLUDES = -DGCONV_DIR='"$(pkglibdir)"' -DGCONV_PATH='"$(pkglibdir)"' -I$(srcdir) -I$(srcdir)/.. $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) + +LIB_SOURCES = \ + iconv.h gconv.h \ + iconv_open.c iconv.c iconv_close.c \ + gconv_open.c gconv.c gconv_close.c \ + gconv_db.c gconv_dl.c gconv_conf.c gconv_builtin.c \ + gconv_simple.c gconv_trans.c gconv_cache.c + +libiconv_la_CFLAGS = +libiconv_la_LDFLAGS = -Xcompiler -nostdlib + +if USE_LIBTOOL +noinst_LTLIBRARIES = libiconv.la +libiconv_la_SOURCES = $(LIB_SOURCES) +noinst_DATA = objectlist.awk.in +else +noinst_LIBRARIES = lib.a +lib_a_SOURCES = $(LIB_SOURCES) +noinst_DATA = +endif # USE_LIBTOOL + +include $(srcdir)/../../../../Makefile.shared diff --git a/newlib/libc/sys/linux/iconv/Makefile.in b/newlib/libc/sys/linux/iconv/Makefile.in new file mode 100644 index 000000000..a20b37805 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/Makefile.in @@ -0,0 +1,375 @@ +# Makefile.in generated automatically by automake 1.4 from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_alias = @build_alias@ +build_triplet = @build@ +host_alias = @host_alias@ +host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ +AR = @AR@ +AS = @AS@ +AWK = @AWK@ +CC = @CC@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DLLTOOL = @DLLTOOL@ +EXEEXT = @EXEEXT@ +GCJ = @GCJ@ +GCJFLAGS = @GCJFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBTOOL = @LIBTOOL@ +LINUX_MACH_LIB = @LINUX_MACH_LIB@ +LN_S = @LN_S@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +NEWLIB_CFLAGS = @NEWLIB_CFLAGS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +STRIP = @STRIP@ +VERSION = @VERSION@ +aext = @aext@ +libm_machine_dir = @libm_machine_dir@ +machine_dir = @machine_dir@ +newlib_basedir = @newlib_basedir@ +oext = @oext@ +sys_dir = @sys_dir@ + +AUTOMAKE_OPTIONS = cygnus + +INCLUDES = -DGCONV_DIR='"$(pkglibdir)"' -DGCONV_PATH='"$(pkglibdir)"' -I$(srcdir) -I$(srcdir)/.. $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) + +LIB_SOURCES = \ + iconv.h gconv.h \ + iconv_open.c iconv.c iconv_close.c \ + gconv_open.c gconv.c gconv_close.c \ + gconv_db.c gconv_dl.c gconv_conf.c gconv_builtin.c \ + gconv_simple.c gconv_trans.c gconv_cache.c + + +libiconv_la_CFLAGS = +libiconv_la_LDFLAGS = -Xcompiler -nostdlib + +@USE_LIBTOOL_TRUE@noinst_LTLIBRARIES = @USE_LIBTOOL_TRUE@libiconv.la +@USE_LIBTOOL_TRUE@libiconv_la_SOURCES = @USE_LIBTOOL_TRUE@$(LIB_SOURCES) +@USE_LIBTOOL_TRUE@noinst_DATA = @USE_LIBTOOL_TRUE@objectlist.awk.in +@USE_LIBTOOL_FALSE@noinst_DATA = +@USE_LIBTOOL_FALSE@noinst_LIBRARIES = @USE_LIBTOOL_FALSE@lib.a +@USE_LIBTOOL_FALSE@lib_a_SOURCES = @USE_LIBTOOL_FALSE@$(LIB_SOURCES) +mkinstalldirs = $(SHELL) $(top_srcdir)/../../../../mkinstalldirs +CONFIG_CLEAN_FILES = +LIBRARIES = $(noinst_LIBRARIES) + + +DEFS = @DEFS@ -I. -I$(srcdir) +CPPFLAGS = @CPPFLAGS@ +LIBS = @LIBS@ +lib_a_LIBADD = +@USE_LIBTOOL_FALSE@lib_a_OBJECTS = iconv_open.$(OBJEXT) iconv.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@iconv_close.$(OBJEXT) gconv_open.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@gconv.$(OBJEXT) gconv_close.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@gconv_db.$(OBJEXT) gconv_dl.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@gconv_conf.$(OBJEXT) gconv_builtin.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@gconv_simple.$(OBJEXT) gconv_trans.$(OBJEXT) \ +@USE_LIBTOOL_FALSE@gconv_cache.$(OBJEXT) +LTLIBRARIES = $(noinst_LTLIBRARIES) + +libiconv_la_LIBADD = +@USE_LIBTOOL_TRUE@libiconv_la_OBJECTS = iconv_open.lo iconv.lo \ +@USE_LIBTOOL_TRUE@iconv_close.lo gconv_open.lo gconv.lo gconv_close.lo \ +@USE_LIBTOOL_TRUE@gconv_db.lo gconv_dl.lo gconv_conf.lo \ +@USE_LIBTOOL_TRUE@gconv_builtin.lo gconv_simple.lo gconv_trans.lo \ +@USE_LIBTOOL_TRUE@gconv_cache.lo +CFLAGS = @CFLAGS@ +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ +DATA = $(noinst_DATA) + +DIST_COMMON = Makefile.am Makefile.in + + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) + +TAR = gtar +GZIP_ENV = --best +SOURCES = $(lib_a_SOURCES) $(libiconv_la_SOURCES) +OBJECTS = $(lib_a_OBJECTS) $(libiconv_la_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .S .c .lo .o .obj .s +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) $(srcdir)/../../../../Makefile.shared + cd $(top_srcdir) && $(AUTOMAKE) --cygnus iconv/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +mostlyclean-noinstLIBRARIES: + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +distclean-noinstLIBRARIES: + +maintainer-clean-noinstLIBRARIES: + +.c.o: + $(COMPILE) -c $< + +# FIXME: We should only use cygpath when building on Windows, +# and only if it is available. +.c.obj: + $(COMPILE) -c `cygpath -w $<` + +.s.o: + $(COMPILE) -c $< + +.S.o: + $(COMPILE) -c $< + +mostlyclean-compile: + -rm -f *.o core *.core + -rm -f *.$(OBJEXT) + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +.c.lo: + $(LIBTOOL) --mode=compile $(COMPILE) -c $< + +.s.lo: + $(LIBTOOL) --mode=compile $(COMPILE) -c $< + +.S.lo: + $(LIBTOOL) --mode=compile $(COMPILE) -c $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + +maintainer-clean-libtool: + +lib.a: $(lib_a_OBJECTS) $(lib_a_DEPENDENCIES) + -rm -f lib.a + $(AR) cru lib.a $(lib_a_OBJECTS) $(lib_a_LIBADD) + $(RANLIB) lib.a + +mostlyclean-noinstLTLIBRARIES: + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + +distclean-noinstLTLIBRARIES: + +maintainer-clean-noinstLTLIBRARIES: + +libiconv.la: $(libiconv_la_OBJECTS) $(libiconv_la_DEPENDENCIES) + $(LINK) $(libiconv_la_LDFLAGS) $(libiconv_la_OBJECTS) $(libiconv_la_LIBADD) $(LIBS) + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + here=`pwd` && cd $(srcdir) \ + && mkid -f$$here/ID $$unique $(LISP) + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +subdir = iconv + +distdir: $(DISTFILES) + @for file in $(DISTFILES); do \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + cp -pr $$d/$$file $(distdir)/$$file; \ + else \ + test -f $(distdir)/$$file \ + || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done +info-am: +info: info-am +dvi-am: +dvi: dvi-am +check-am: +check: check-am +installcheck-am: +installcheck: installcheck-am +install-info-am: +install-info: install-info-am +install-exec-am: +install-exec: install-exec-am + +install-data-am: +install-data: install-data-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-am +uninstall-am: +uninstall: uninstall-am +all-am: Makefile $(LIBRARIES) $(LTLIBRARIES) $(DATA) +all-redirect: all-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install +installdirs: + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: +mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \ + mostlyclean-libtool mostlyclean-noinstLTLIBRARIES \ + mostlyclean-tags mostlyclean-generic + +mostlyclean: mostlyclean-am + +clean-am: clean-noinstLIBRARIES clean-compile clean-libtool \ + clean-noinstLTLIBRARIES clean-tags clean-generic \ + mostlyclean-am + +clean: clean-am + +distclean-am: distclean-noinstLIBRARIES distclean-compile \ + distclean-libtool distclean-noinstLTLIBRARIES \ + distclean-tags distclean-generic clean-am + -rm -f libtool + +distclean: distclean-am + +maintainer-clean-am: maintainer-clean-noinstLIBRARIES \ + maintainer-clean-compile maintainer-clean-libtool \ + maintainer-clean-noinstLTLIBRARIES \ + maintainer-clean-tags maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-am + +.PHONY: mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ +clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile mostlyclean-libtool distclean-libtool \ +clean-libtool maintainer-clean-libtool mostlyclean-noinstLTLIBRARIES \ +distclean-noinstLTLIBRARIES clean-noinstLTLIBRARIES \ +maintainer-clean-noinstLTLIBRARIES tags mostlyclean-tags distclean-tags \ +clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \ +check-am installcheck-am installcheck install-info-am install-info \ +install-exec-am install-exec install-data-am install-data install-am \ +install uninstall-am uninstall all-redirect all-am all installdirs \ +mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + + +objectlist.awk.in: $(noinst_LTLIBRARIES) + -rm -f objectlist.awk.in + for i in `ls *.lo` ; \ + do \ + echo $$i `pwd`/$$i >> objectlist.awk.in ; \ + done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/newlib/libc/sys/linux/iconv/categories.def b/newlib/libc/sys/linux/iconv/categories.def new file mode 100644 index 000000000..257fac73a --- /dev/null +++ b/newlib/libc/sys/linux/iconv/categories.def @@ -0,0 +1,344 @@ +/* Definition of all available locale categories and their items. -*- C -*- + Copyright (C) 1995-2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* These definitions are used by the locale-related files in the C library + and the programs `localedef' and `locale'. + + The general format of the descriptions is like this: + + DEFINE_CATEGORY (ID, name, ( items ), setlocale-postload) + + where items itself is an array of entries in the form + + { ID, name, standard, value-type, min, max } + + The usage of the load, check, output functions depends on the individual + program code which loads this file. + + The various value types for the items are `string', `stringarray', `byte' + `bytearray', and `word'. These cover all possible values in the current + locale definitions. `min' and `max' can be individually used again. */ + +#ifndef NO_POSTLOAD +#define NO_POSTLOAD NULL +#endif + +DEFINE_CATEGORY +( + LC_COLLATE, "LC_COLLATE", + ( + DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word) + DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string) + DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_CODESET, "collate-codeset", std, string) + ), NO_POSTLOAD) + + +/* The actual definition of ctype is meaningless here. It is hard coded in + the code because it has to be handled very specially. Only the names of + the functions and the value types are important. */ +DEFINE_CATEGORY +( + LC_CTYPE, "LC_CTYPE", + ( + DEFINE_ELEMENT (_NL_CTYPE_CLASS, "ctype-class", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOUPPER, "ctype-toupper", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOLOWER, "ctype-tolower", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS32, "ctype-class32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS_NAMES, "ctype-class-names", std, stringlist, 10, 32) + DEFINE_ELEMENT (_NL_CTYPE_MAP_NAMES, "ctype-map-names", std, stringlist, 2, 32) + DEFINE_ELEMENT (_NL_CTYPE_WIDTH, "ctype-width", std, bytearray) + DEFINE_ELEMENT (_NL_CTYPE_MB_CUR_MAX, "ctype-mb-cur-max", std, word) + DEFINE_ELEMENT (_NL_CTYPE_CODESET_NAME, "charmap", std, string) + DEFINE_ELEMENT (_NL_CTYPE_TOUPPER32, "ctype-toupper32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOLOWER32, "ctype-tolower32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS_OFFSET, "ctype-class-offset", std, word) + DEFINE_ELEMENT (_NL_CTYPE_MAP_OFFSET, "ctype-map-offset", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_MB_LEN, "ctype-indigits_mb-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_MB, "ctype-indigits0_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_MB, "ctype-indigits1_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS2_MB, "ctype-indigits2_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS3_MB, "ctype-indigits3_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS4_MB, "ctype-indigits4_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS5_MB, "ctype-indigits5_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS6_MB, "ctype-indigits6_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS7_MB, "ctype-indigits7_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS8_MB, "ctype-indigits8_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS9_MB, "ctype-indigits9_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_WC_LEN, "ctype-indigits_wc-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_WC, "ctype-indigits0_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_WC, "ctype-indigits1_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS2_WC, "ctype-indigits2_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS3_WC, "ctype-indigits3_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS4_WC, "ctype-indigits4_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS5_WC, "ctype-indigits5_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS6_WC, "ctype-indigits6_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS7_WC, "ctype-indigits7_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS8_WC, "ctype-indigits8_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS9_WC, "ctype-indigits9_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT0_MB, "ctype-outdigit0_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT1_MB, "ctype-outdigit1_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT2_MB, "ctype-outdigit2_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT3_MB, "ctype-outdigit3_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT4_MB, "ctype-outdigit4_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT5_MB, "ctype-outdigit5_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT6_MB, "ctype-outdigit6_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_MB, "ctype-outdigit7_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_MB, "ctype-outdigit8_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_MB, "ctype-outdigit9_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT0_WC, "ctype-outdigit0_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT1_WC, "ctype-outdigit1_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT2_WC, "ctype-outdigit2_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT3_WC, "ctype-outdigit3_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT4_WC, "ctype-outdigit4_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT5_WC, "ctype-outdigit5_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT6_WC, "ctype-outdigit6_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_TBL, "ctype-translit-to-tbl", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN, "ctype-translit-default-missing-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string) + ), _nl_postload_ctype) + + +DEFINE_CATEGORY +( + LC_MONETARY, "LC_MONETARY", + ( + DEFINE_ELEMENT (INT_CURR_SYMBOL, "int_curr_symbol", std, string) + DEFINE_ELEMENT (CURRENCY_SYMBOL, "currency_symbol", std, string) + DEFINE_ELEMENT (MON_DECIMAL_POINT, "mon_decimal_point", std, string) + DEFINE_ELEMENT (MON_THOUSANDS_SEP, "mon_thousands_sep", std, string) + DEFINE_ELEMENT (MON_GROUPING, "mon_grouping", std, bytearray) + DEFINE_ELEMENT (POSITIVE_SIGN, "positive_sign", std, string) + DEFINE_ELEMENT (NEGATIVE_SIGN, "negative_sign", std, string) + DEFINE_ELEMENT (INT_FRAC_DIGITS, "int_frac_digits", std, byte) + DEFINE_ELEMENT (FRAC_DIGITS, "frac_digits", std, byte) + DEFINE_ELEMENT (P_CS_PRECEDES, "p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (P_SEP_BY_SPACE, "p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (N_CS_PRECEDES, "n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (N_SEP_BY_SPACE, "n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (P_SIGN_POSN, "p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (N_SIGN_POSN, "n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (__INT_P_CS_PRECEDES, "int_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (__INT_P_SEP_BY_SPACE, "int_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (__INT_N_CS_PRECEDES, "int_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (__INT_N_SEP_BY_SPACE, "int_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (__INT_P_SIGN_POSN, "int_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (__INT_N_SIGN_POSN, "int_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_CURR_SYMBOL, "duo_int_curr_symbol", std, string) + DEFINE_ELEMENT (_NL_MONETARY_DUO_CURRENCY_SYMBOL, "duo_currency_symbol", std, string) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_FRAC_DIGITS, "duo_int_frac_digits", std, byte) + DEFINE_ELEMENT (_NL_MONETARY_DUO_FRAC_DIGITS, "duo_frac_digits", std, byte) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_CS_PRECEDES, "duo_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_SEP_BY_SPACE, "duo_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_CS_PRECEDES, "duo_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_SEP_BY_SPACE, "duo_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_CS_PRECEDES, "duo_int_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_SEP_BY_SPACE, "duo_int_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_CS_PRECEDES, "duo_int_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_SEP_BY_SPACE, "duo_int_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_SIGN_POSN, "duo_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_SIGN_POSN, "duo_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_SIGN_POSN, "duo_int_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_SIGN_POSN, "duo_int_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_UNO_VALID_FROM, "uno_valid_from", std, word) + DEFINE_ELEMENT (_NL_MONETARY_UNO_VALID_TO, "uno_valid_to", std, word) + DEFINE_ELEMENT (_NL_MONETARY_DUO_VALID_FROM, "duo_valid_from", std, word) + DEFINE_ELEMENT (_NL_MONETARY_DUO_VALID_TO, "duo_valid_to", std, word) + DEFINE_ELEMENT (_NL_MONETARY_CONVERSION_RATE, "conversion_rate", std, wordarray, 2, 2) + DEFINE_ELEMENT (_NL_MONETARY_DECIMAL_POINT_WC, "monetary-decimal-point-wc", std, word) + DEFINE_ELEMENT (_NL_MONETARY_THOUSANDS_SEP_WC, "monetary-thousands-sep-wc", std, word) + DEFINE_ELEMENT (_NL_MONETARY_CODESET, "monetary-codeset", std, string) + ), NO_POSTLOAD) + + +DEFINE_CATEGORY +( + LC_NUMERIC, "LC_NUMERIC", + ( + DEFINE_ELEMENT (DECIMAL_POINT, "decimal_point", std, string) + DEFINE_ELEMENT (THOUSANDS_SEP, "thousands_sep", std, string) + DEFINE_ELEMENT (GROUPING, "grouping", std, bytearray) + DEFINE_ELEMENT (_NL_NUMERIC_DECIMAL_POINT_WC, "numeric-decimal-point-wc", std, word) + DEFINE_ELEMENT (_NL_NUMERIC_THOUSANDS_SEP_WC, "numeric-thousands-sep-wc", std, word) + DEFINE_ELEMENT (_NL_NUMERIC_CODESET, "numeric-codeset", std, string) + + ), NO_POSTLOAD) + + +DEFINE_CATEGORY +( + LC_TIME, "LC_TIME", + ( + DEFINE_ELEMENT (ABDAY_1, "abday", std, stringarray, 7, 7) + DEFINE_ELEMENT (DAY_1, "day", std, stringarray, 7, 7) + DEFINE_ELEMENT (ABMON_1, "abmon", std, stringarray, 12, 12) + DEFINE_ELEMENT (MON_1, "mon", std, stringarray, 12, 12) + DEFINE_ELEMENT (AM_STR, "am_pm", std, stringarray, 2, 2) + DEFINE_ELEMENT (D_T_FMT, "d_t_fmt", std, string) + DEFINE_ELEMENT (D_FMT, "d_fmt", std, string) + DEFINE_ELEMENT (T_FMT, "t_fmt", std, string) + DEFINE_ELEMENT (T_FMT_AMPM, "t_fmt_ampm", std, string) + DEFINE_ELEMENT (ERA, "era", opt, stringlist, 0, 100) + DEFINE_ELEMENT (ERA_YEAR, "era_year", opt, string) + DEFINE_ELEMENT (ERA_D_FMT, "era_d_fmt", opt, string) + DEFINE_ELEMENT (ALT_DIGITS, "alt_digits", opt, stringlist, 100, 100) + DEFINE_ELEMENT (ERA_D_T_FMT, "era_d_t_fmt", opt, string) + DEFINE_ELEMENT (ERA_T_FMT, "era_t_fmt", opt, string) + DEFINE_ELEMENT (_NL_TIME_ERA_NUM_ENTRIES, "time-era-num-entries", opt, word) + DEFINE_ELEMENT (_NL_TIME_ERA_ENTRIES, "time-era-entries", opt, string) + DEFINE_ELEMENT (_NL_WABDAY_1, "wide-abday", std, wstringarray, 7, 7) + DEFINE_ELEMENT (_NL_WDAY_1, "wide-day", std, wstringarray, 7, 7) + DEFINE_ELEMENT (_NL_WABMON_1, "wide-abmon", std, wstringarray, 12, 12) + DEFINE_ELEMENT (_NL_WMON_1, "wide-mon", std, wstringarray, 12, 12) + DEFINE_ELEMENT (_NL_WAM_STR, "wide-am_pm", std, wstringarray, 2, 2) + DEFINE_ELEMENT (_NL_WD_T_FMT, "wide-d_t_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WD_FMT, "wide-d_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WT_FMT, "wide-t_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WT_FMT_AMPM, "wide-t_fmt_ampm", std, wstring) + DEFINE_ELEMENT (_NL_WERA_YEAR, "wide-era_year", opt, wstring) + DEFINE_ELEMENT (_NL_WERA_D_FMT, "wide-era_d_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_WALT_DIGITS, "wide-alt_digits", opt, wstringlist, 1000, 100) + DEFINE_ELEMENT (_NL_WERA_D_T_FMT, "wide-era_d_t_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_WERA_T_FMT, "wide-era_t_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_TIME_WEEK_NDAYS, "week-ndays", std, byte) + DEFINE_ELEMENT (_NL_TIME_WEEK_1STDAY, "week-1stday", std, word) + DEFINE_ELEMENT (_NL_TIME_WEEK_1STWEEK, "week-1stweek", std, byte) + DEFINE_ELEMENT (_NL_TIME_FIRST_WEEKDAY, "first_weekday", std, byte) + DEFINE_ELEMENT (_NL_TIME_FIRST_WORKDAY, "first_workday", std, byte) + DEFINE_ELEMENT (_NL_TIME_CAL_DIRECTION, "cal_direction", std, byte) + DEFINE_ELEMENT (_NL_TIME_TIMEZONE, "timezone", std, string) + DEFINE_ELEMENT (_DATE_FMT, "date_fmt", opt, string) + DEFINE_ELEMENT (_NL_W_DATE_FMT, "wide-date_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_TIME_CODESET, "time-codeset", std, string) + ), _nl_postload_time) + + +DEFINE_CATEGORY +( + LC_MESSAGES, "LC_MESSAGES", + ( + DEFINE_ELEMENT (YESEXPR, "yesexpr", std, string) + DEFINE_ELEMENT (NOEXPR, "noexpr", std, string) + DEFINE_ELEMENT (YESSTR, "yesstr", opt, string) + DEFINE_ELEMENT (NOSTR, "nostr", opt, string) + DEFINE_ELEMENT (_NL_MESSAGES_CODESET, "messages-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_PAPER, "LC_PAPER", + ( + DEFINE_ELEMENT (_NL_PAPER_HEIGHT, "height", std, word) + DEFINE_ELEMENT (_NL_PAPER_WIDTH, "width", std, word) + DEFINE_ELEMENT (_NL_PAPER_CODESET, "paper-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_NAME, "LC_NAME", + ( + DEFINE_ELEMENT (_NL_NAME_NAME_FMT, "name_fmt", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_GEN, "name_gen", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MR, "name_mr", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MRS, "name_mrs", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MISS, "name_miss", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MS, "name_ms", std, string) + DEFINE_ELEMENT (_NL_NAME_CODESET, "name-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_ADDRESS, "LC_ADDRESS", + ( + DEFINE_ELEMENT (_NL_ADDRESS_POSTAL_FMT, "postal_fmt", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_NAME, "country_name", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_POST, "country_post", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_AB2, "country_ab2", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_AB3, "country_ab3", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_CAR, "country_car", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_NUM, "country_num", std, word) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_ISBN, "country_isbn", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_NAME, "lang_name", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_AB, "lang_ab", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_TERM, "lang_term", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_LIB, "lang_lib", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_CODESET, "address-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_TELEPHONE, "LC_TELEPHONE", + ( + DEFINE_ELEMENT (_NL_TELEPHONE_TEL_INT_FMT, "tel_int_fmt", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_TEL_DOM_FMT, "tel_dom_fmt", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_INT_SELECT, "int_select", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_INT_PREFIX, "int_prefix", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_CODESET, "telephone-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_MEASUREMENT, "LC_MEASUREMENT", + ( + DEFINE_ELEMENT (_NL_MEASUREMENT_MEASUREMENT, "measurement", std, byte) + DEFINE_ELEMENT (_NL_MEASUREMENT_CODESET, "measurement-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_IDENTIFICATION, "LC_IDENTIFICATION", + ( + DEFINE_ELEMENT (_NL_IDENTIFICATION_TITLE, "title", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_SOURCE, "source", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_ADDRESS, "address", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CONTACT, "contact", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_EMAIL, "email", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_TEL, "tel", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_FAX, "fax", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_LANGUAGE, "language", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_TERRITORY, "territory", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_AUDIENCE, "audience", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_APPLICATION, "applcation", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_ABBREVIATION, "abbreviation", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_REVISION, "revision", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_DATE, "date", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CATEGORY, "category", std, stringarray, 13, 13) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CODESET, "identification-codeset", std, string) + ), NO_POSTLOAD) diff --git a/newlib/libc/sys/linux/iconv/dummy-repertoire.c b/newlib/libc/sys/linux/iconv/dummy-repertoire.c new file mode 100644 index 000000000..a195460f9 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/dummy-repertoire.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* For iconv we don't have to handle repertoire maps. Provide dummy + definitions to allow the use of linereader.c unchanged. */ +#include <repertoire.h> + + +uint32_t +repertoire_find_value (const struct repertoire_t *repertoire, const char *name, + size_t len) +{ + return ILLEGAL_CHAR_VALUE; +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *repertoire, uint32_t ucs) +{ + return NULL; +} diff --git a/newlib/libc/sys/linux/iconv/gconv.c b/newlib/libc/sys/linux/iconv/gconv.c new file mode 100644 index 000000000..b413e26c5 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv.c @@ -0,0 +1,73 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <assert.h> +#include <gconv_int.h> +#include <sys/param.h> +#include <dlfcn.h> +#include <stddef.h> + +int +internal_function +__gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) +{ + size_t last_step; + int result; + + if (cd == (__gconv_t) -1L) + return __GCONV_ILLEGAL_DESCRIPTOR; + + last_step = cd->__nsteps - 1; + + assert (irreversible != NULL); + *irreversible = 0; + + cd->__data[last_step].__outbuf = outbuf != NULL ? *outbuf : NULL; + cd->__data[last_step].__outbufend = outbufend; + + if (inbuf == NULL || *inbuf == NULL) + /* We just flush. */ + result = cd->__steps->__fct (cd->__steps, cd->__data, NULL, NULL, NULL, + irreversible, + cd->__data[last_step].__outbuf == NULL ? 2 : 1, 0); + else + { + const unsigned char *last_start; + + assert (outbuf != NULL && *outbuf != NULL); + + do + { + last_start = *inbuf; + result = cd->__steps->__fct (cd->__steps, cd->__data, inbuf, inbufend, + NULL, irreversible, 0, 0); + } + while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf + && *inbuf + cd->__steps->__min_needed_from <= inbufend); + } + + if (outbuf != NULL && *outbuf != NULL) + *outbuf = cd->__data[last_step].__outbuf; + + return result; +} diff --git a/newlib/libc/sys/linux/iconv/gconv_builtin.c b/newlib/libc/sys/linux/iconv/gconv_builtin.c new file mode 100644 index 000000000..45bd4e7e0 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_builtin.c @@ -0,0 +1,84 @@ +/* Table for builtin transformation mapping. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <endian.h> +#include <limits.h> +#include <string.h> + +#include <gconv_int.h> + +#include <assert.h> + + +static struct builtin_map +{ + const char *name; + __gconv_fct fct; + + int min_needed_from; + int max_needed_from; + int min_needed_to; + int max_needed_to; + +} map[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ + { \ + .name = Name, \ + .fct = Fct, \ + \ + .min_needed_from = MinF, \ + .max_needed_from = MaxF, \ + .min_needed_to = MinT, \ + .max_needed_to = MaxT \ + }, +#define BUILTIN_ALIAS(From, To) + +#include <gconv_builtin.h> +}; + + +void +internal_function +__gconv_get_builtin_trans (const char *name, struct __gconv_step *step) +{ + size_t cnt; + + for (cnt = 0; cnt < sizeof (map) / sizeof (map[0]); ++cnt) + if (strcmp (name, map[cnt].name) == 0) + break; + + assert (cnt < sizeof (map) / sizeof (map[0])); + + step->__fct = map[cnt].fct; + step->__init_fct = NULL; + step->__end_fct = NULL; + step->__shlib_handle = NULL; + step->__modname = NULL; + + step->__min_needed_from = map[cnt].min_needed_from; + step->__max_needed_from = map[cnt].max_needed_from; + step->__min_needed_to = map[cnt].min_needed_to; + step->__max_needed_to = map[cnt].max_needed_to; + + /* None of the builtin converters handles stateful encoding. */ + step->__stateful = 0; +} diff --git a/newlib/libc/sys/linux/iconv/gconv_builtin.h b/newlib/libc/sys/linux/iconv/gconv_builtin.h new file mode 100644 index 000000000..1a9d8a80f --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_builtin.h @@ -0,0 +1,115 @@ +/* Builtin transformations. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4BE//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("CSUCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("OSF00010104//", "ISO-10646/UCS4/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", + __gconv_transform_internal_ucs4, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL", + __gconv_transform_ucs4_internal, 4, 4, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le", + __gconv_transform_internal_ucs4le, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL", + __gconv_transform_ucs4le_internal, 4, 4, 4, 4) + +BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL") + +BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-IR-193//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-10646/UTF-8/", "ISO-10646/UTF8/") + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", + __gconv_transform_internal_utf8, 4, 4, 1, 6) + +BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL", + __gconv_transform_utf8_internal, 1, 6, 4, 4) + +BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("OSF00010100//", "ISO-10646/UCS2/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL", + __gconv_transform_ucs2_internal, 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", + __gconv_transform_internal_ucs2, 4, 4, 2, 2) + + +BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO-IR-6//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ANSI_X3.4-1986//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO_646.IRV:1991//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO646-US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US-ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("IBM367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CP367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//") + +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL", + __gconv_transform_ascii_internal, 4, 4, 1, 1) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii", + __gconv_transform_internal_ascii, 4, 4, 1, 1) + + +#if BYTE_ORDER == BIG_ENDIAN +BUILTIN_ALIAS ("UNICODEBIG//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2BE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//") + +BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) +#else +BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//") + +BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) +#endif diff --git a/newlib/libc/sys/linux/iconv/gconv_cache.c b/newlib/libc/sys/linux/iconv/gconv_cache.c new file mode 100644 index 000000000..c887be970 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_cache.c @@ -0,0 +1,459 @@ +/* Cache handling for iconv modules. + Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <dlfcn.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <gconv_int.h> +#include <iconvconfig.h> + +#include "hash-string.h" + +void *__gconv_cache; +static size_t cache_size; +static int cache_malloced; + + +int +internal_function +__gconv_load_cache (void) +{ + int fd; + struct stat64 st; + struct gconvcache_header *header; + + /* We cannot use the cache if the GCONV_PATH environment variable is + set. */ + __gconv_path_envvar = getenv ("GCONV_PATH"); + if (__gconv_path_envvar != NULL) + return -1; + + /* See whether the cache file exists. */ + fd = open (GCONV_MODULES_CACHE, O_RDONLY); + if (__builtin_expect (fd, 0) == -1) + /* Not available. */ + return -1; + +#ifdef _POSIX_ASYNC_IO + /* Get information about the file. */ + if (__builtin_expect (fstat64 (fd, &st), 0) < 0 + /* We do not have to start looking at the file if it cannot contain + at least the cache header. */ + || st.st_size < sizeof (struct gconvcache_header)) + { +#endif + close_and_exit: + close (fd); + return -1; +#ifdef _POSIX_ASYNC_IO + } +#endif + + /* Make the file content available. */ + cache_size = st.st_size; +#ifdef _POSIX_MAPPED_FILES + __gconv_cache = mmap (NULL, cache_size, PROT_READ, MAP_SHARED, fd, 0); + if (__builtin_expect (__gconv_cache == MAP_FAILED, 0)) +#endif + { + size_t already_read; + + __gconv_cache = malloc (cache_size); + if (__gconv_cache == NULL) + goto close_and_exit; + + already_read = 0; + do + { + ssize_t n = read (fd, (char *) __gconv_cache + already_read, + cache_size - already_read); + if (__builtin_expect (n, 0) == -1) + { + free (__gconv_cache); + __gconv_cache = NULL; + goto close_and_exit; + } + + already_read += n; + } + while (already_read < cache_size); + + cache_malloced = 1; + } + + /* We don't need the file descriptor anymore. */ + close (fd); + + /* Check the consistency. */ + header = (struct gconvcache_header *) __gconv_cache; + if (__builtin_expect (header->magic, GCONVCACHE_MAGIC) != GCONVCACHE_MAGIC + || __builtin_expect (header->string_offset >= cache_size, 0) + || __builtin_expect (header->hash_offset >= cache_size, 0) + || __builtin_expect (header->hash_size == 0, 0) + || __builtin_expect ((header->hash_offset + + header->hash_size * sizeof (struct hash_entry)) + > cache_size, 0) + || __builtin_expect (header->module_offset >= cache_size, 0) + || __builtin_expect (header->otherconv_offset > cache_size, 0)) + { + if (cache_malloced) + { + free (__gconv_cache); + cache_malloced = 0; + } +#ifdef _POSIX_MAPPED_FILES + else + __munmap (__gconv_cache, cache_size); +#endif + __gconv_cache = NULL; + + return -1; + } + + /* That worked. */ + return 0; +} + + +static int +internal_function +find_module_idx (const char *str, size_t *idxp) +{ + unsigned int idx; + unsigned int hval; + unsigned int hval2; + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + unsigned int limit; + + header = (const struct gconvcache_header *) __gconv_cache; + strtab = (char *) __gconv_cache + header->string_offset; + hashtab = (struct hash_entry *) ((char *) __gconv_cache + + header->hash_offset); + + hval = hash_string (str); + idx = hval % header->hash_size; + hval2 = 1 + hval % (header->hash_size - 2); + + limit = cache_size - header->string_offset; + while (hashtab[idx].string_offset != 0) + if (hashtab[idx].string_offset < limit + && strcmp (str, strtab + hashtab[idx].string_offset) == 0) + { + *idxp = hashtab[idx].module_idx; + return 0; + } + else + if ((idx += hval2) >= header->hash_size) + idx -= header->hash_size; + + /* Nothing found. */ + return -1; +} + + +#ifndef STATIC_GCONV +static int +internal_function +find_module (const char *directory, const char *filename, + struct __gconv_step *result) +{ + size_t dirlen = strlen (directory); + size_t fnamelen = strlen (filename) + 1; + char fullname[dirlen + fnamelen]; + int status = __GCONV_NOCONV; + char *tmp; + + tmp = mempcpy (fullname, directory, dirlen); + tmp += dirlen; + memcpy (tmp, filename, fnamelen); + + result->__shlib_handle = __gconv_find_shlib (fullname); + if (result->__shlib_handle != NULL) + { + status = __GCONV_OK; + + result->__modname = NULL; + result->__fct = result->__shlib_handle->fct; + result->__init_fct = result->__shlib_handle->init_fct; + result->__end_fct = result->__shlib_handle->end_fct; + + result->__data = NULL; + if (result->__init_fct != NULL) + status = result->__init_fct (result); + } + + return status; +} +#endif + + +int +internal_function +__gconv_compare_alias_cache (const char *name1, const char *name2, int *result) +{ + size_t name1_idx; + size_t name2_idx; + + if (__gconv_cache == NULL) + return -1; + + if (find_module_idx (name1, &name1_idx) != 0 + || find_module_idx (name2, &name2_idx) != 0) + *result = strcmp (name1, name2); + else + *result = (int) (name1_idx - name2_idx); + + return 0; +} + + +int +internal_function +__gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, int flags) +{ + const struct gconvcache_header *header; + const char *strtab; + size_t fromidx; + size_t toidx; + const struct module_entry *modtab; + const struct module_entry *from_module; + const struct module_entry *to_module; + struct __gconv_step *result; + + if (__gconv_cache == NULL) + /* We have no cache available. */ + return __GCONV_NODB; + + header = (const struct gconvcache_header *) __gconv_cache; + strtab = (char *) __gconv_cache + header->string_offset; + modtab = (const struct module_entry *) ((char *) __gconv_cache + + header->module_offset); + + if (find_module_idx (fromset, &fromidx) != 0 + || (header->module_offset + (fromidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + from_module = &modtab[fromidx]; + + if (find_module_idx (toset, &toidx) != 0 + || (header->module_offset + (toidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + to_module = &modtab[toidx]; + + /* Avoid copy-only transformations if the user requests. */ + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) && fromidx == toidx) + return __GCONV_NOCONV; + + /* If there are special conversions available examine them first. */ + if (fromidx != 0 && toidx != 0 + && __builtin_expect (from_module->extra_offset, 0) != 0) + { + /* Search through the list to see whether there is a module + matching the destination character set. */ + const struct extra_entry *extra; + + /* Note the -1. This is due to the offset added in iconvconfig. + See there for more explanations. */ + extra = (const struct extra_entry *) ((char *) __gconv_cache + + header->otherconv_offset + + from_module->extra_offset - 1); + while (extra->module_cnt != 0 + && extra->module[extra->module_cnt - 1].outname_offset != toidx) + extra = (const struct extra_entry *) ((char *) extra + + sizeof (struct extra_entry) + + (extra->module_cnt + * sizeof (struct extra_entry_module))); + + if (extra->module_cnt != 0) + { + /* Use the extra module. First determine how many steps. */ + char *fromname; + int idx; + + *nsteps = extra->module_cnt; + *handle = result = + (struct __gconv_step *) malloc (extra->module_cnt + * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + fromname = (char *) strtab + from_module->canonname_offset; + idx = 0; + do + { + result[idx].__from_name = fromname; + fromname = result[idx].__to_name = + (char *) strtab + modtab[extra->module[idx].outname_offset].canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[extra->module[idx].dir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res; + + res = find_module (strtab + extra->module[idx].dir_offset, + strtab + extra->module[idx].name_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + goto try_internal; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + + extra->module[idx].name_offset, + &result[idx]); + + } + while (++idx < extra->module_cnt); + + return __GCONV_OK; + } + } + + try_internal: + /* See whether we can convert via the INTERNAL charset. */ + if ((fromidx != 0 && __builtin_expect (from_module->fromname_offset, 1) == 0) + || (toidx != 0 && __builtin_expect (to_module->toname_offset, 1) == 0) + || (fromidx == 0 && toidx == 0)) + /* Not possible. Nothing we can do. */ + return __GCONV_NOCONV; + + /* We will use up to two modules. Always allocate room for two. */ + result = (struct __gconv_step *) malloc (2 * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + *handle = result; + *nsteps = 0; + + /* Generate data structure for conversion to INTERNAL. */ + if (fromidx != 0) + { + result[0].__from_name = (char *) strtab + from_module->canonname_offset; + result[0].__to_name = (char *) "INTERNAL"; + + result[0].__counter = 1; + result[0].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[from_module->todir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + from_module->todir_offset, + strtab + from_module->toname_offset, + &result[0]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + from_module->toname_offset, + &result[0]); + + ++*nsteps; + } + + /* Generate data structure for conversion from INTERNAL. */ + if (toidx != 0) + { + int idx = *nsteps; + + result[idx].__from_name = (char *) "INTERNAL"; + result[idx].__to_name = (char *) strtab + to_module->canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[to_module->fromdir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + to_module->fromdir_offset, + strtab + to_module->fromname_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + if (idx != 0) + __gconv_release_step (&result[0]); + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + to_module->fromname_offset, + &result[idx]); + + ++*nsteps; + } + + return __GCONV_OK; +} + + +/* Free memory allocated for the transformation record. */ +void +internal_function +__gconv_release_cache (struct __gconv_step *steps, size_t nsteps) +{ + if (__gconv_cache != NULL) + /* The only thing we have to deallocate is the record with the + steps. */ + free (steps); +} + + +/* Free all resources if necessary. */ +static void __attribute__ ((unused)) +free_mem (void) +{ + if (cache_malloced) + free (__gconv_cache); +#ifdef _POSIX_MAPPED_FILES + else + __munmap (__gconv_cache, cache_size); +#endif +} + +text_set_element (__libc_subfreeres, free_mem); diff --git a/newlib/libc/sys/linux/iconv/gconv_charset.h b/newlib/libc/sys/linux/iconv/gconv_charset.h new file mode 100644 index 000000000..706669825 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_charset.h @@ -0,0 +1,73 @@ +/* Charset name normalization. + Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <ctype.h> +#include <locale.h> + + +static inline void +strip (char *wp, const char *s) +{ + int slash_count = 0; + char old_locale[20], *old_locale_p; + + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + while (*s != '\0') + { + if (isalnum (*s) + || *s == '_' || *s == '-' || *s == '.') + *wp++ = toupper (*s); + else if (*s == '/') + { + if (++slash_count == 3) + break; + *wp++ = '/'; + } + ++s; + } + + while (slash_count++ < 2) + *wp++ = '/'; + + *wp = '\0'; + setlocale(LC_ALL, old_locale); +} + + +static char * __attribute__ ((unused)) +upstr (char *dst, const char *str) +{ + char *cp = dst; + char old_locale[20], *old_locale_p; + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + while ((*cp++ = toupper (*str++)) != '\0') + /* nothing */; + setlocale(LC_ALL, old_locale); + return dst; +} + + +/* If NAME is an codeset alias expand it. */ +extern int __gconv_compare_alias (const char *name1, const char *name2) + internal_function; diff --git a/newlib/libc/sys/linux/iconv/gconv_close.c b/newlib/libc/sys/linux/iconv/gconv_close.c new file mode 100644 index 000000000..cc0ecd650 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_close.c @@ -0,0 +1,65 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <stdlib.h> + +#include <gconv_int.h> + + +int +internal_function +__gconv_close (__gconv_t cd) +{ + struct __gconv_step *srunp; + struct __gconv_step_data *drunp; + size_t nsteps; + + /* Free all resources by calling destructor functions and release + the implementations. */ + srunp = cd->__steps; + nsteps = cd->__nsteps; + drunp = cd->__data; + do + { + struct __gconv_trans_data *transp; + + transp = drunp->__trans; + while (transp != NULL) + { + struct __gconv_trans_data *curp = transp; + transp = transp->__next; + + if (__builtin_expect (curp->__trans_end_fct != NULL, 0)) + curp->__trans_end_fct (curp->__data); + + free (curp); + } + + if (!(drunp->__flags & __GCONV_IS_LAST) && drunp->__outbuf != NULL) + free (drunp->__outbuf); + } + while (!((drunp++)->__flags & __GCONV_IS_LAST)); + + /* Free the data allocated for the descriptor. */ + free (cd); + + /* Close the participating modules. */ + return __gconv_close_transform (srunp, nsteps); +} diff --git a/newlib/libc/sys/linux/iconv/gconv_conf.c b/newlib/libc/sys/linux/iconv/gconv_conf.c new file mode 100644 index 000000000..c714102a3 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_conf.c @@ -0,0 +1,680 @@ +/* Handle configuration data. + Copyright (C) 1997,98,99,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <search.h> +#include <stddef.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> + +#include <dirent.h> +#include <gconv_int.h> + +/* This is the default path where we look for module lists. */ +static const char default_gconv_path[] = GCONV_PATH; + +/* The path elements, as determined by the __gconv_get_path function. + All path elements end in a slash. */ +struct path_elem *__gconv_path_elem; +/* Maximum length of a single path element in __gconv_path_elem. */ +size_t __gconv_max_path_elem_len; + +/* We use the following struct if we couldn't allocate memory. */ +static const struct path_elem empty_path_elem; + +/* Name of the file containing the module information in the directories + along the path. */ +static const char gconv_conf_filename[] = "gconv-modules"; + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + +/* We have a few builtin transformations. */ +static struct gconv_module builtin_modules[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ + { \ + from_string: From, \ + to_string: To, \ + cost_hi: Cost, \ + cost_lo: INT_MAX, \ + module_name: Name \ + }, +#define BUILTIN_ALIAS(From, To) + +#include "gconv_builtin.h" +}; + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS + +static const char *builtin_aliases[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) +#define BUILTIN_ALIAS(From, To) From " " To, + +#include "gconv_builtin.h" +}; + +#ifdef USE_IN_LIBIO +# include <libio/libioP.h> +# define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp) +#endif + + +/* Value of the GCONV_PATH environment variable. */ +const char *__gconv_path_envvar; + + +/* Test whether there is already a matching module known. */ +static int +internal_function +detect_conflict (const char *alias) +{ + struct gconv_module *node = __gconv_modules_db; + + while (node != NULL) + { + int cmpres = strcmp (alias, node->from_string); + + if (cmpres == 0) + /* We have a conflict. */ + return 1; + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + + return node != NULL; +} + + +/* Add new alias. */ +static inline void +add_alias (char *rp, void *modules) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + struct gconv_alias *new_alias; + char *from, *to, *wp; + char old_locale[20], *old_locale_p; + + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + + while (isspace (*rp)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + { + setlocale(LC_ALL, old_locale); + /* There is no `to' string on the line. Ignore it. */ + return; + } + *wp++ = '\0'; + to = ++rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (to == wp) + { + setlocale(LC_ALL, old_locale); + /* No `to' string, ignore the line. */ + return; + } + *wp++ = '\0'; + + /* Test whether this alias conflicts with any available module. */ + if (detect_conflict (from)) + { + setlocale(LC_ALL, old_locale); + /* It does conflict, don't add the alias. */ + return; + } + + new_alias = (struct gconv_alias *) malloc (sizeof (struct gconv_alias) + (wp - from)); + if (new_alias != NULL) + { + void **inserted; + + new_alias->fromname = memcpy ((char *) new_alias + + sizeof (struct gconv_alias), + from, wp - from); + new_alias->toname = new_alias->fromname + (to - from); + + inserted = (void **) tsearch (new_alias, &__gconv_alias_db, + __gconv_alias_compare); + if (inserted == NULL || *inserted != new_alias) + /* Something went wrong, free this entry. */ + free (new_alias); + } + setlocale(LC_ALL, old_locale); +} + + +/* Insert a data structure for a new module in the search tree. */ +static inline void +internal_function +insert_module (struct gconv_module *newp, int tobefreed) +{ + struct gconv_module **rootp = &__gconv_modules_db; + + while (*rootp != NULL) + { + struct gconv_module *root = *rootp; + int cmpres; + + cmpres = strcmp (newp->from_string, root->from_string); + if (cmpres == 0) + { + /* Both strings are identical. Insert the string at the + end of the `same' list if it is not already there. */ + while (strcmp (newp->from_string, root->from_string) != 0 + || strcmp (newp->to_string, root->to_string) != 0) + { + rootp = &root->same; + root = *rootp; + if (root == NULL) + break; + } + + if (root != NULL) + { + /* This is a no new conversion. But maybe the cost is + better. */ + if (newp->cost_hi < root->cost_hi + || (newp->cost_hi == root->cost_hi + && newp->cost_lo < root->cost_lo)) + { + newp->left = root->left; + newp->right = root->right; + newp->same = root->same; + *rootp = newp; + + free (root); + } + else if (tobefreed) + free (newp); + return; + } + + break; + } + else if (cmpres < 0) + rootp = &root->left; + else + rootp = &root->right; + } + + /* Plug in the new node here. */ + *rootp = newp; +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory, size_t dir_len, void **modules, + size_t *nmodules, int modcounter) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + struct gconv_alias fake_alias; + struct gconv_module *new_module; + char *from, *to, *module, *wp; + int need_ext; + int cost_hi; + char old_locale[20], *old_locale_p; + char *old; + size_t len; + char *new; + + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + + while (isspace (*rp)) + ++rp; + from = rp; + while (*rp != '\0' && !isspace (*rp)) + { + *rp = toupper (*rp); + ++rp; + } + if (*rp == '\0') + { + setlocale(LC_ALL, old_locale); + return; + } + *rp++ = '\0'; + to = wp = rp; + while (isspace (*rp)) + { + setlocale(LC_ALL, old_locale); + ++rp; + } + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + { + setlocale(LC_ALL, old_locale); + return; + } + *wp++ = '\0'; + do + ++rp; + while (isspace (*rp)); + module = wp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost_hi = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost_hi = strtol (rp, &endp, 10); + if (rp == endp || cost_hi < 1) + /* No useful information. */ + cost_hi = 1; + } + + if (module[0] == '\0') + { + setlocale(LC_ALL, old_locale); + /* No module name given. */ + return; + } + if (module[0] == '/') + dir_len = 0; + + /* See whether we must add the ending. */ + need_ext = 0; + if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + /* See whether we have already an alias with this name defined. */ + old = from; + len = strnlen (old, to - from); + new = (char *) alloca (len + 1); + new[len] = '\0'; + fake_alias.fromname = (char *) memcpy (new, old, len); + + if (tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL) + { + setlocale(LC_ALL, old_locale); + /* This module duplicates an alias. */ + return; + } + + new_module = (struct gconv_module *) calloc (1, + sizeof (struct gconv_module) + + (wp - from) + + dir_len + need_ext); + if (new_module != NULL) + { + char *tmp; + + new_module->from_string = tmp = (char *) (new_module + 1); + tmp = memcpy (tmp, from, to - from); + tmp += (to - from); + + new_module->to_string = tmp; + tmp = memcpy (tmp, to, module - to); + tmp += (module - to); + + new_module->cost_hi = cost_hi; + new_module->cost_lo = modcounter; + + new_module->module_name = tmp; + + if (dir_len != 0) + { + tmp = memcpy (tmp, directory, dir_len); + tmp += dir_len; + } + + tmp = memcpy (tmp, module, wp - module); + tmp += (wp - module); + + if (need_ext) + memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); + + /* Now insert the new module data structure in our search tree. */ + insert_module (new_module, 1); + } + setlocale(LC_ALL, old_locale); +} + + +/* Read the next configuration file. */ +static void +internal_function +read_conf_file (const char *filename, const char *directory, size_t dir_len, + void **modules, size_t *nmodules) +{ + FILE *fp = fopen (filename, "r"); + char *line = NULL; + size_t line_len = 0; + static int modcounter; + char old_locale[20], *old_locale_p; + + /* Don't complain if a file is not present or readable, simply silently + ignore it. */ + if (fp == NULL) + return; + + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + + /* Process the known entries of the file. Comments start with `#' and + end with the end of the line. Empty lines are ignored. */ + while (!feof (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &line_len, '\n', fp); + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) by an NUL byte + to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (isspace (*rp)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !isspace (*rp)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp, *modules); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, directory, dir_len, modules, nmodules, modcounter++); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); + + setlocale(LC_ALL, old_locale); +} + + +/* Determine the directories we are looking for data in. */ +void +__gconv_get_path (void) +{ + struct path_elem *result; + __LOCK_INIT(static, path_lock); + +#ifdef HAVE_DD_LOCK + __lock_acquire(path_lock); +#endif + + /* Make sure there wasn't a second thread doing it already. */ + result = (struct path_elem *) __gconv_path_elem; + if (result == NULL) + { + /* Determine the complete path first. */ + char *gconv_path; + size_t gconv_path_len; + char *elem; + char *oldp; + char *cp; + int nelems; + char *cwd; + size_t cwdlen; + + if (__gconv_path_envvar == NULL) + { + char * old = default_gconv_path; + size_t len = strlen (old) + 1; + char *new = (char *) alloca (len); + + /* No user-defined path. Make a modifiable copy of the + default path. */ + gconv_path = (char *) memcpy (new, old, len); + gconv_path_len = sizeof (default_gconv_path); + cwd = NULL; + cwdlen = 0; + } + else + { + /* Append the default path to the user-defined path. */ + size_t user_len = strlen (__gconv_path_envvar); + char *tmp; + + gconv_path_len = user_len + 1 + sizeof (default_gconv_path); + gconv_path = alloca (gconv_path_len); + tmp = memcpy (gconv_path, __gconv_path_envvar, + user_len); + tmp += user_len; + memcpy (tmp, ":", 1); + tmp += 1; + memcpy (tmp, + default_gconv_path, sizeof (default_gconv_path)); + + cwd = getcwd (NULL, 0); + cwdlen = strlen (cwd); + } + assert (default_gconv_path[0] == '/'); + + /* In a first pass we calculate the number of elements. */ + oldp = NULL; + cp = strchr (gconv_path, ':'); + nelems = 1; + while (cp != NULL) + { + if (cp != oldp + 1) + ++nelems; + oldp = cp; + cp = strchr (cp + 1, ':'); + } + + /* Allocate the memory for the result. */ + result = (struct path_elem *) malloc ((nelems + 1) + * sizeof (struct path_elem) + + gconv_path_len + nelems + + (nelems - 1) * (cwdlen + 1)); + if (result != NULL) + { + char *strspace = (char *) &result[nelems + 1]; + int n = 0; + + /* Separate the individual parts. */ + __gconv_max_path_elem_len = 0; + elem = strtok_r (gconv_path, ":", &gconv_path); + assert (elem != NULL); + do + { + result[n].name = strspace; + if (elem[0] != '/') + { + assert (cwd != NULL); + strspace = memcpy (strspace, cwd, cwdlen); + strspace += cwdlen; + *strspace++ = '/'; + } + strspace = strcpy (strspace, elem); + while(*strspace != '\0') strspace++; + + if (strspace[-1] != '/') + *strspace++ = '/'; + + result[n].len = strspace - result[n].name; + if (result[n].len > __gconv_max_path_elem_len) + __gconv_max_path_elem_len = result[n].len; + + *strspace++ = '\0'; + ++n; + } + while ((elem = strtok_r (NULL, ":", &gconv_path)) != NULL); + + result[n].name = NULL; + result[n].len = 0; + } + + __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem; + + if (cwd != NULL) + free (cwd); + } + +#ifdef HAVE_DD_LOCK + __lock_release(path_lock); +#endif +} + + +/* Read all configuration files found in the user-specified and the default + path. */ +void +__gconv_read_conf (void) +{ + void *modules = NULL; + size_t nmodules = 0; + int save_errno = errno; + size_t cnt; + char *filename; + char *tmp; + const char *elem; + size_t elem_len; + + /* First see whether we should use the cache. */ + if (__gconv_load_cache () == 0) + { + /* Yes, we are done. */ + __set_errno (save_errno); + return; + } + +#ifndef STATIC_GCONV + /* Find out where we have to look. */ + if (__gconv_path_elem == NULL) + __gconv_get_path (); + + for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt) + { + elem = __gconv_path_elem[cnt].name; + elem_len = __gconv_path_elem[cnt].len; + + /* No slash needs to be inserted between elem and gconv_conf_filename; + elem already ends in a slash. */ + filename = alloca (elem_len + sizeof (gconv_conf_filename)); + tmp = memcpy (filename, elem, elem_len); + tmp += elem_len; + memcpy (tmp, gconv_conf_filename, sizeof (gconv_conf_filename)); + + /* Read the next configuration file. */ + read_conf_file (filename, elem, elem_len, &modules, &nmodules); + } +#endif + + /* Add the internal modules. */ + for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); + ++cnt) + { + struct gconv_alias fake_alias; + + fake_alias.fromname = (char *) builtin_modules[cnt].from_string; + + if (tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) + != NULL) + /* It'll conflict so don't add it. */ + continue; + + insert_module (&builtin_modules[cnt], 0); + } + + /* Add aliases for builtin conversions. */ + cnt = sizeof (builtin_aliases) / sizeof (builtin_aliases[0]); + while (cnt > 0) + { + char * old = builtin_aliases[--cnt]; + size_t len = strlen (old) + 1; + char *new = (char *) alloca (len); + char *copy = (char *) memcpy (new, old, len); + + add_alias (copy, modules); + } + + /* Restore the error number. */ + __set_errno (save_errno); +} + + + +/* Free all resources if necessary. */ +static void __attribute__ ((unused)) +free_mem (void) +{ + if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem) + free ((void *) __gconv_path_elem); +} + +text_set_element (__libc_subfreeres, free_mem); diff --git a/newlib/libc/sys/linux/iconv/gconv_db.c b/newlib/libc/sys/linux/iconv/gconv_db.c new file mode 100644 index 000000000..749995ee3 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_db.c @@ -0,0 +1,803 @@ +/* Provide access to the collection of available transformation modules. + Copyright (C) 1997,98,99,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <limits.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> +#include <dirent.h> + +#include <dlfcn.h> +#include <gconv_int.h> +#include <gconv_charset.h> + + +/* Simple data structure for alias mapping. We have two names, `from' + and `to'. */ +void *__gconv_alias_db; + +/* Array with available modules. */ +struct gconv_module *__gconv_modules_db; + +/* We modify global data. */ +__LOCK_INIT(static, lock); + + +/* Function for searching alias. */ +int +__gconv_alias_compare (const void *p1, const void *p2) +{ + const struct gconv_alias *s1 = (const struct gconv_alias *) p1; + const struct gconv_alias *s2 = (const struct gconv_alias *) p2; + return strcmp (s1->fromname, s2->fromname); +} + + +/* To search for a derivation we create a list of intermediate steps. + Each element contains a pointer to the element which precedes it + in the derivation order. */ +struct derivation_step +{ + const char *result_set; + size_t result_set_len; + int cost_lo; + int cost_hi; + struct gconv_module *code; + struct derivation_step *last; + struct derivation_step *next; +}; + +#define NEW_STEP(result, hi, lo, module, last_mod) \ + ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \ + newp->result_set = result; \ + newp->result_set_len = strlen (result); \ + newp->cost_hi = hi; \ + newp->cost_lo = lo; \ + newp->code = module; \ + newp->last = last_mod; \ + newp->next = NULL; \ + newp; }) + + +/* If a specific transformation is used more than once we should not need + to start looking for it again. Instead cache each successful result. */ +struct known_derivation +{ + const char *from; + const char *to; + struct __gconv_step *steps; + size_t nsteps; +}; + +/* Compare function for database of found derivations. */ +static int +derivation_compare (const void *p1, const void *p2) +{ + const struct known_derivation *s1 = (const struct known_derivation *) p1; + const struct known_derivation *s2 = (const struct known_derivation *) p2; + int result; + + result = strcmp (s1->from, s2->from); + if (result == 0) + result = strcmp (s1->to, s2->to); + return result; +} + +/* The search tree for known derivations. */ +static void *known_derivations; + +/* Look up whether given transformation was already requested before. */ +static int +internal_function +derivation_lookup (const char *fromset, const char *toset, + struct __gconv_step **handle, size_t *nsteps) +{ + struct known_derivation key = { fromset, toset, NULL, 0 }; + struct known_derivation **result; + + result = tfind (&key, &known_derivations, derivation_compare); + + if (result == NULL) + return __GCONV_NOCONV; + + *handle = (*result)->steps; + *nsteps = (*result)->nsteps; + + /* Please note that we return GCONV_OK even if the last search for + this transformation was unsuccessful. */ + return __GCONV_OK; +} + +/* Add new derivation to list of known ones. */ +static void +internal_function +add_derivation (const char *fromset, const char *toset, + struct __gconv_step *handle, size_t nsteps) +{ + struct known_derivation *new_deriv; + size_t fromset_len = strlen (fromset) + 1; + size_t toset_len = strlen (toset) + 1; + + new_deriv = (struct known_derivation *) + malloc (sizeof (struct known_derivation) + fromset_len + toset_len); + if (new_deriv != NULL) + { + char *tmp; + new_deriv->from = (char *) (new_deriv + 1); + tmp = memcpy (new_deriv + 1, fromset, fromset_len); + tmp += fromset_len; + + new_deriv->to = memcpy (tmp, + toset, toset_len); + + new_deriv->steps = handle; + new_deriv->nsteps = nsteps; + + if (tsearch (new_deriv, &known_derivations, derivation_compare) + == NULL) + /* There is some kind of memory allocation problem. */ + free (new_deriv); + } + /* Please note that we don't complain if the allocation failed. This + is not tragically but in case we use the memory debugging facilities + not all memory will be freed. */ +} + +static void +free_derivation (void *p) +{ + struct known_derivation *deriv = (struct known_derivation *) p; + size_t cnt; + + for (cnt = 0; cnt < deriv->nsteps; ++cnt) + if (deriv->steps[cnt].__counter > 0 + && deriv->steps[cnt].__end_fct != NULL) + deriv->steps[cnt].__end_fct (&deriv->steps[cnt]); + + /* Free the name strings. */ + free ((char *) deriv->steps[0].__from_name); + free ((char *) deriv->steps[deriv->nsteps - 1].__to_name); + + free ((struct __gconv_step *) deriv->steps); + free (deriv); +} + + +/* Decrement the reference count for a single step in a steps array. */ +void +internal_function +__gconv_release_step (struct __gconv_step *step) +{ + if (--step->__counter == 0) + { + /* Call the destructor. */ + if (step->__end_fct != NULL) + step->__end_fct (step); + +#ifndef STATIC_GCONV + /* Skip builtin modules; they are not reference counted. */ + if (step->__shlib_handle != NULL) + { + /* Release the loaded module. */ + __gconv_release_shlib (step->__shlib_handle); + step->__shlib_handle = NULL; + } +#endif + } +} + +static int +internal_function +gen_steps (struct derivation_step *best, const char *toset, + const char *fromset, struct __gconv_step **handle, size_t *nsteps) +{ + size_t step_cnt = 0; + struct __gconv_step *result; + struct derivation_step *current; + int status = __GCONV_NOMEM; + + /* First determine number of steps. */ + for (current = best; current->last != NULL; current = current->last) + ++step_cnt; + + result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step) + * step_cnt); + if (result != NULL) + { + int failed = 0; + + status = __GCONV_OK; + *nsteps = step_cnt; + current = best; + while (step_cnt-- > 0) + { + result[step_cnt].__from_name = (step_cnt == 0 + ? strdup (fromset) + : (char *)current->last->result_set); + result[step_cnt].__to_name = (step_cnt + 1 == *nsteps + ? strdup (current->result_set) + : result[step_cnt + 1].__from_name); + + result[step_cnt].__counter = 1; + result[step_cnt].__data = NULL; + +#ifndef STATIC_GCONV + if (current->code->module_name[0] == '/') + { + /* Load the module, return handle for it. */ + struct __gconv_loaded_object *shlib_handle = + __gconv_find_shlib (current->code->module_name); + + if (shlib_handle == NULL) + { + failed = 1; + break; + } + + result[step_cnt].__shlib_handle = shlib_handle; + result[step_cnt].__modname = shlib_handle->name; + result[step_cnt].__fct = shlib_handle->fct; + result[step_cnt].__init_fct = shlib_handle->init_fct; + result[step_cnt].__end_fct = shlib_handle->end_fct; + + /* Call the init function. */ + if (result[step_cnt].__init_fct != NULL) + { + status = result[step_cnt].__init_fct (&result[step_cnt]); + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + { + failed = 1; + /* Make sure we unload this modules. */ + --step_cnt; + result[step_cnt].__end_fct = NULL; + break; + } + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (current->code->module_name, + &result[step_cnt]); + + current = current->last; + } + + if (__builtin_expect (failed, 0) != 0) + { + /* Something went wrong while initializing the modules. */ + while (++step_cnt < *nsteps) + __gconv_release_step (&result[step_cnt]); + free (result); + *nsteps = 0; + *handle = NULL; + if (status == __GCONV_OK) + status = __GCONV_NOCONV; + } + else + *handle = result; + } + else + { + *nsteps = 0; + *handle = NULL; + } + + return status; +} + + +#ifndef STATIC_GCONV +static int +internal_function +increment_counter (struct __gconv_step *steps, size_t nsteps) +{ + /* Increment the user counter. */ + size_t cnt = nsteps; + int result = __GCONV_OK; + + while (cnt-- > 0) + { + struct __gconv_step *step = &steps[cnt]; + + if (step->__counter++ == 0) + { + /* Skip builtin modules. */ + if (step->__modname != NULL) + { + /* Reopen a previously used module. */ + step->__shlib_handle = __gconv_find_shlib (step->__modname); + if (step->__shlib_handle == NULL) + { + /* Oops, this is the second time we use this module + (after unloading) and this time loading failed!? */ + --step->__counter; + while (++cnt < nsteps) + __gconv_release_step (&steps[cnt]); + result = __GCONV_NOCONV; + break; + } + + /* The function addresses defined by the module may + have changed. */ + step->__fct = step->__shlib_handle->fct; + step->__init_fct = step->__shlib_handle->init_fct; + step->__end_fct = step->__shlib_handle->end_fct; + } + + if (step->__init_fct != NULL) + step->__init_fct (step); + } + } + return result; +} +#endif + + +/* The main function: find a possible derivation from the `fromset' (either + the given name or the alias) to the `toset' (again with alias). */ +static int +internal_function +find_derivation (const char *toset, const char *toset_expand, + const char *fromset, const char *fromset_expand, + struct __gconv_step **handle, size_t *nsteps) +{ + struct derivation_step *first, *current, **lastp, *solution = NULL; + int best_cost_hi = INT_MAX; + int best_cost_lo = INT_MAX; + int result; + + /* Look whether an earlier call to `find_derivation' has already + computed a possible derivation. If so, return it immediately. */ + result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, + handle, nsteps); + if (result == __GCONV_OK) + { +#ifndef STATIC_GCONV + result = increment_counter (*handle, *nsteps); +#endif + return result; + } + + /* The task is to find a sequence of transformations, backed by the + existing modules - whether builtin or dynamically loadable -, + starting at `fromset' (or `fromset_expand') and ending at `toset' + (or `toset_expand'), and with minimal cost. + + For computer scientists, this is a shortest path search in the + graph where the nodes are all possible charsets and the edges are + the transformations listed in __gconv_modules_db. + + For now we use a simple algorithm with quadratic runtime behaviour. + A breadth-first search, starting at `fromset' and `fromset_expand'. + The list starting at `first' contains all nodes that have been + visited up to now, in the order in which they have been visited -- + excluding the goal nodes `toset' and `toset_expand' which get + managed in the list starting at `solution'. + `current' walks through the list starting at `first' and looks + which nodes are reachable from the current node, adding them to + the end of the list [`first' or `solution' respectively] (if + they are visited the first time) or updating them in place (if + they have have already been visited). + In each node of either list, cost_lo and cost_hi contain the + minimum cost over any paths found up to now, starting at `fromset' + or `fromset_expand', ending at that node. best_cost_lo and + best_cost_hi represent the minimum over the elements of the + `solution' list. */ + + if (fromset_expand != NULL) + { + first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); + first->next = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next->next; + } + else + { + first = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next; + } + + for (current = first; current != NULL; current = current->next) + { + /* Now match all the available module specifications against the + current charset name. If any of them matches check whether + we already have a derivation for this charset. If yes, use the + one with the lower costs. Otherwise add the new charset at the + end. + + The module database is organized in a tree form which allows + searching for prefixes. So we search for the first entry with a + matching prefix and any other matching entry can be found from + this place. */ + struct gconv_module *node; + + /* Maybe it is not necessary anymore to look for a solution for + this entry since the cost is already as high (or higher) as + the cost for the best solution so far. */ + if (current->cost_hi > best_cost_hi + || (current->cost_hi == best_cost_hi + && current->cost_lo >= best_cost_lo)) + continue; + + node = __gconv_modules_db; + while (node != NULL) + { + int cmpres = strcmp (current->result_set, node->from_string); + if (cmpres == 0) + { + /* Walk through the list of modules with this prefix and + try to match the name. */ + struct gconv_module *runp; + + /* Check all the modules with this prefix. */ + runp = node; + do + { + const char *result_set = (strcmp (runp->to_string, "-") == 0 + ? (toset_expand ?: toset) + : runp->to_string); + int cost_hi = runp->cost_hi + current->cost_hi; + int cost_lo = runp->cost_lo + current->cost_lo; + struct derivation_step *step; + + /* We managed to find a derivation. First see whether + we have reached one of the goal nodes. */ + if (strcmp (result_set, toset) == 0 + || (toset_expand != NULL + && strcmp (result_set, toset_expand) == 0)) + { + /* Append to the `solution' list if there + is no entry with this name. */ + for (step = solution; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + step = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + step->next = solution; + solution = step; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `solution' list. */ + step->code = runp; + step->last = current; + step->cost_hi = cost_hi; + step->cost_lo = cost_lo; + } + + /* Update best_cost accordingly. */ + if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + best_cost_hi = cost_hi; + best_cost_lo = cost_lo; + } + } + else if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + /* Append at the end of the `first' list if there + is no entry with this name. */ + for (step = first; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + *lastp = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + lastp = &(*lastp)->next; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `first' list. */ + step->code = runp; + step->last = current; + + /* Update the cost for all steps. */ + for (step = first; step != NULL; + step = step->next) + /* But don't update the start nodes. */ + if (step->code != NULL) + { + struct derivation_step *back; + int hi, lo; + + hi = step->code->cost_hi; + lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + hi += back->code->cost_hi; + lo += back->code->cost_lo; + } + + step->cost_hi = hi; + step->cost_lo = lo; + } + + /* Likewise for the nodes on the solution list. + Also update best_cost accordingly. */ + for (step = solution; step != NULL; + step = step->next) + { + step->cost_hi = (step->code->cost_hi + + step->last->cost_hi); + step->cost_lo = (step->code->cost_lo + + step->last->cost_lo); + + if (step->cost_hi < best_cost_hi + || (step->cost_hi == best_cost_hi + && step->cost_lo < best_cost_lo)) + { + best_cost_hi = step->cost_hi; + best_cost_lo = step->cost_lo; + } + } + } + } + + runp = runp->same; + } + while (runp != NULL); + + break; + } + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + } + + if (solution != NULL) + { + /* We really found a way to do the transformation. */ + + /* Choose the best solution. This is easy because we know that + the solution list has at most length 2 (one for every possible + goal node). */ + if (solution->next != NULL) + { + struct derivation_step *solution2 = solution->next; + + if (solution2->cost_hi < solution->cost_hi + || (solution2->cost_hi == solution->cost_hi + && solution2->cost_lo < solution->cost_lo)) + solution = solution2; + } + + /* Now build a data structure describing the transformation steps. */ + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); + } + else + { + /* We haven't found a transformation. Clear the result values. */ + *handle = NULL; + *nsteps = 0; + } + + /* Add result in any case to list of known derivations. */ + add_derivation (fromset_expand ?: fromset, toset_expand ?: toset, + *handle, *nsteps); + + return result; +} + + +/* Control of initialization. */ +__libc_once_define (static, once); + + +static const char * +do_lookup_alias (const char *name) +{ + struct gconv_alias key; + struct gconv_alias **found; + + key.fromname = (char *) name; + found = tfind (&key, &__gconv_alias_db, __gconv_alias_compare); + return found != NULL ? (*found)->toname : NULL; +} + + +int +internal_function +__gconv_compare_alias (const char *name1, const char *name2) +{ + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + if (__gconv_compare_alias_cache (name1, name2, &result) != 0) + result = strcmp (do_lookup_alias (name1) ?: name1, + do_lookup_alias (name2) ?: name2); + + return result; +} + + +int +internal_function +__gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) +{ + const char *fromset_expand; + const char *toset_expand; + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + /* Acquire the lock. */ +#ifdef HAVE_DD_LOCK + __lock_acquire(lock); +#endif + + result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags); + if (result != __GCONV_NODB) + { + /* We have a cache and could resolve the request, successful or not. */ +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + return result; + } + + /* If we don't have a module database return with an error. */ + if (__gconv_modules_db == NULL) + { +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + return __GCONV_NOCONV; + } + + /* See whether the names are aliases. */ + fromset_expand = do_lookup_alias (fromset); + toset_expand = do_lookup_alias (toset); + + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) + /* We are not supposed to create a pseudo transformation (means + copying) when the input and output character set are the same. */ + && (strcmp (toset, fromset) == 0 + || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0) + || (fromset_expand != NULL + && (strcmp (toset, fromset_expand) == 0 + || (toset_expand != NULL + && strcmp (toset_expand, fromset_expand) == 0))))) + { + /* Both character sets are the same. */ +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + return __GCONV_NOCONV; + } + + result = find_derivation (toset, toset_expand, fromset, fromset_expand, + handle, nsteps); + + /* Release the lock. */ +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + + /* The following code is necessary since `find_derivation' will return + GCONV_OK even when no derivation was found but the same request + was processed before. I.e., negative results will also be cached. */ + return (result == __GCONV_OK + ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK) + : result); +} + + +/* Release the entries of the modules list. */ +int +internal_function +__gconv_close_transform (struct __gconv_step *steps, size_t nsteps) +{ + int result = __GCONV_OK; + size_t cnt; + + /* Acquire the lock. */ +#ifdef HAVE_DD_LOCK + __lock_acquire(lock); +#endif + + +#ifndef STATIC_GCONV + cnt = nsteps; + while (cnt-- > 0) + __gconv_release_step (&steps[cnt]); +#endif + + /* If we use the cache we free a bit more since we don't keep any + transformation records around, they are cheap enough to + recreate. */ + __gconv_release_cache (steps, nsteps); + + /* Release the lock. */ +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + + return result; +} + + +/* Free the modules mentioned. */ +static void +internal_function +free_modules_db (struct gconv_module *node) +{ + if (node->left != NULL) + free_modules_db (node->left); + if (node->right != NULL) + free_modules_db (node->right); + do + { + struct gconv_module *act = node; + node = node->same; + if (act->module_name[0] == '/') + free (act); + } + while (node != NULL); +} + + +/* Free all resources if necessary. */ +static void __attribute__ ((unused)) +free_mem (void) +{ + if (__gconv_alias_db != NULL) + tdestroy (__gconv_alias_db, free); + + if (__gconv_modules_db != NULL) + free_modules_db (__gconv_modules_db); + + if (known_derivations != NULL) + tdestroy (known_derivations, free_derivation); +} + +text_set_element (__libc_subfreeres, free_mem); diff --git a/newlib/libc/sys/linux/iconv/gconv_dl.c b/newlib/libc/sys/linux/iconv/gconv_dl.c new file mode 100644 index 000000000..ee725149f --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_dl.c @@ -0,0 +1,237 @@ +/* Handle loading/unloading of shared object for transformation. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <assert.h> +#include <dlfcn.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <bits/libc-lock.h> +#include <sys/param.h> +#include <ltdl.h> + +#include <gconv_int.h> + + +#ifdef DEBUG +/* For debugging purposes. */ +static void print_all (void); +#endif + + +/* This is a tuning parameter. If a transformation module is not used + anymore it gets not immediately unloaded. Instead we wait a certain + number of load attempts for further modules. If none of the + subsequent load attempts name the same object it finally gets unloaded. + Otherwise it is still available which hopefully is the frequent case. + The following number is the number of unloading attempts we wait + before unloading. */ +#define TRIES_BEFORE_UNLOAD 2 + +/* Array of loaded objects. This is shared by all threads so we have + to use semaphores to access it. */ +static void *loaded; + +/* Comparison function for searching `loaded_object' tree. */ +static int +known_compare (const void *p1, const void *p2) +{ + const struct __gconv_loaded_object *s1 = + (const struct __gconv_loaded_object *) p1; + const struct __gconv_loaded_object *s2 = + (const struct __gconv_loaded_object *) p2; + + return strcmp (s1->name, s2->name); +} + +/* Open the gconv database if necessary. A non-negative return value + means success. */ +struct __gconv_loaded_object * +internal_function +__gconv_find_shlib (const char *name) +{ + struct __gconv_loaded_object *found; + void *keyp; + + + + /* Search the tree of shared objects previously requested. Data in + the tree are `loaded_object' structures, whose first member is a + `const char *', the lookup key. The search returns a pointer to + the tree node structure; the first member of the is a pointer to + our structure (i.e. what will be a `loaded_object'); since the + first member of that is the lookup key string, &FCT_NAME is close + enough to a pointer to our structure to use as a lookup key that + will be passed to `known_compare' (above). */ + + keyp = tfind (&name, &loaded, known_compare); + if (keyp == NULL) + { + /* This name was not known before. */ + size_t namelen = strlen (name) + 1; + + found = malloc (sizeof (struct __gconv_loaded_object) + namelen); + if (found != NULL) + { + /* Point the tree node at this new structure. */ + found->name = (char *) memcpy (found + 1, name, namelen); + found->counter = -TRIES_BEFORE_UNLOAD - 1; + found->handle = NULL; + + if (__builtin_expect (tsearch (found, &loaded, known_compare) + == NULL, 0)) + { + /* Something went wrong while inserting the entry. */ + free (found); + found = NULL; + } + } + } + else + found = *(struct __gconv_loaded_object **) keyp; + + /* Try to load the shared object if the usage count is 0. This + implies that if the shared object is not loadable, the handle is + NULL and the usage count > 0. */ + if (found != NULL) + { + if (found->counter < -TRIES_BEFORE_UNLOAD) + { + assert (found->handle == NULL); + found->handle = __libc_dlopen (found->name); + if (found->handle != NULL) + { + found->fct = __libc_dlsym (found->handle, "gconv"); + if (found->fct == NULL) + { + /* Argh, no conversion function. There is something + wrong here. */ + __gconv_release_shlib (found); + found = NULL; + } + else + { + found->init_fct = __libc_dlsym (found->handle, "gconv_init"); + found->end_fct = __libc_dlsym (found->handle, "gconv_end"); + + /* We have succeeded in loading the shared object. */ + found->counter = 1; + } + } + else + /* Error while loading the shared object. */ + found = NULL; + } + else if (found->handle != NULL) + found->counter = MAX (found->counter + 1, 1); + } + + return found; +} + + +/* This is very ugly but the tsearch functions provide no way to pass + information to the walker function. So we use a global variable. + It is MT safe since we use a lock. */ +static struct __gconv_loaded_object *release_handle; + +static void +do_release_shlib (void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + + + if (value != preorder && value != leaf) + return; + + if (obj == release_handle) + { + /* This is the object we want to unload. Now decrement the + reference counter. */ + assert (obj->counter > 0); + --obj->counter; + } + else if (obj->counter <= 0 && obj->counter >= -TRIES_BEFORE_UNLOAD + && --obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) + { + /* Unload the shared object. */ + __libc_dlclose (obj->handle); + obj->handle = NULL; + } +} + + +/* Notify system that a shared object is not longer needed. */ +void +internal_function +__gconv_release_shlib (struct __gconv_loaded_object *handle) +{ + /* Urgh, this is ugly but we have no other possibility. */ + release_handle = handle; + + /* Process all entries. Please note that we also visit entries + with release counts <= 0. This way we can finally unload them + if necessary. */ + twalk (loaded, (void *) do_release_shlib); +} + + +/* We run this if we debug the memory allocation. */ +static void +do_release_all (void *nodep) +{ + struct __gconv_loaded_object *obj = (struct __gconv_loaded_object *) nodep; + + + /* Unload the shared object. */ + if (obj->handle != NULL) + __libc_dlclose (obj->handle); + + free (obj); +} + +static void __attribute__ ((unused)) +free_mem (void) +{ + tdestroy (loaded, do_release_all); +} +text_set_element (__libc_subfreeres, free_mem); + + +#ifdef DEBUG +static void +do_print (const void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + printf ("%10s: \"%s\", %d\n", + value == leaf ? "leaf" : + value == preorder ? "preorder" : + value == postorder ? "postorder" : "endorder", + obj->name, obj->counter); +} + +static void +print_all (void) +{ + __twalk (loaded, do_print); +} +#endif diff --git a/newlib/libc/sys/linux/iconv/gconv_int.h b/newlib/libc/sys/linux/iconv/gconv_int.h new file mode 100644 index 000000000..bcd50a2c6 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_int.h @@ -0,0 +1,288 @@ +/* Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _GCONV_INT_H +#define _GCONV_INT_H 1 + +#include "gconv.h" +#include <libc-symbols.h> + +__BEGIN_DECLS + + +/* Type to represent search path. */ +struct path_elem +{ + const char *name; + size_t len; +}; + +/* Variable with search path for `gconv' implementation. */ +extern struct path_elem *__gconv_path_elem; +/* Maximum length of a single path element. */ +extern size_t __gconv_max_path_elem_len; + + +/* Structure for alias definition. Simply two strings. */ +struct gconv_alias +{ + char *fromname; + char *toname; +}; + + +/* How many character should be conveted in one call? */ +#define GCONV_NCHAR_GOAL 8160 + + +/* Structure describing one loaded shared object. This normally are + objects to perform conversation but as a special case the db shared + object is also handled. */ +struct __gconv_loaded_object +{ + /* Name of the object. It must be the first structure element. */ + const char *name; + + /* Reference counter for the db functionality. If no conversion is + needed we unload the db library. */ + int counter; + + /* The handle for the shared object. */ + void *handle; + + /* Pointer to the functions the module defines. */ + __gconv_fct fct; + __gconv_init_fct init_fct; + __gconv_end_fct end_fct; +}; + + +/* Description for an available conversion module. */ +struct gconv_module +{ + const char *from_string; + const char *to_string; + + int cost_hi; + int cost_lo; + + const char *module_name; + + struct gconv_module *left; /* Prefix smaller. */ + struct gconv_module *same; /* List of entries with identical prefix. */ + struct gconv_module *right; /* Prefix larger. */ +}; + + +/* Internal data structure to represent transliteration module. */ +struct trans_struct +{ + const char *name; + struct trans_struct *next; + + const char **csnames; + size_t ncsnames; + __gconv_trans_fct trans_fct; + __gconv_trans_context_fct trans_context_fct; + __gconv_trans_init_fct trans_init_fct; + __gconv_trans_end_fct trans_end_fct; +}; + + +/* Flags for `gconv_open'. */ +enum +{ + GCONV_AVOID_NOCONV = 1 << 0 +}; + + +/* Global variables. */ + +/* Database of alias names. */ +extern void *__gconv_alias_db; + +/* Array with available modules. */ +extern size_t __gconv_nmodules; +extern struct gconv_module *__gconv_modules_db; + +/* Value of the GCONV_PATH environment variable. */ +extern const char *__gconv_path_envvar; + + +/* The gconv functions expects the name to be in upper case and complete, + including the trailing slashes if necessary. */ +#define norm_add_slashes(str,suffix) \ + ({ \ + const char *cp = (str); \ + char *result; \ + char *tmp; \ + size_t cnt = 0; \ + size_t suffix_len = (suffix) == NULL ? 0 : strlen (suffix); \ + \ + while (*cp != '\0') \ + if (*cp++ == '/') \ + ++cnt; \ + \ + tmp = result = alloca (cp - (str) + 3 + suffix_len); \ + cp = (str); \ + while (*cp != '\0') \ + *tmp++ = __toupper_l (*cp++, &_nl_C_locobj); \ + if (cnt < 2) \ + { \ + *tmp++ = '/'; \ + if (cnt < 1) \ + { \ + *tmp++ = '/'; \ + if (suffix != NULL) \ + { \ + tmp = memcpy (tmp, suffix, suffix_len); \ + tmp += suffix_len; \ + } \ + } \ + } \ + *tmp = '\0'; \ + result; \ + }) + + +/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ +extern int __gconv_open (const char *toset, const char *fromset, + __gconv_t *handle, int flags) + internal_function; + +/* Free resources associated with transformation descriptor CD. */ +extern int __gconv_close (__gconv_t cd) + internal_function; + +/* Transform at most *INBYTESLEFT bytes from buffer starting at *INBUF + according to rules described by CD and place up to *OUTBYTESLEFT + bytes in buffer starting at *OUTBUF. Return number of non-identical + conversions in *IRREVERSIBLE if this pointer is not null. */ +extern int __gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) + internal_function; + +/* Return in *HANDLE a pointer to an array with *NSTEPS elements describing + the single steps necessary for transformation from FROMSET to TOSET. */ +extern int __gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, + size_t *nsteps, int flags) + internal_function; + +/* Search for transformation in cache data. */ +extern int __gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) + internal_function; + +/* Compare the two name for whether they are after alias expansion the + same. This function uses the cache and fails if none is + loaded. */ +extern int __gconv_compare_alias_cache (const char *name1, const char *name2, + int *result) internal_function; + +/* Free data associated with a step's structure. */ +extern void __gconv_release_step (struct __gconv_step *step) + internal_function; + +/* Read all the configuration data and cache it. */ +extern void __gconv_read_conf (void); + +/* Try to read module cache file. */ +extern int __gconv_load_cache (void) internal_function; + +/* Determine the directories we are looking in. */ +extern void __gconv_get_path (void); + +/* Comparison function to search alias. */ +extern int __gconv_alias_compare (const void *p1, const void *p2); + +/* Clear reference to transformation step implementations which might + cause the code to be unloaded. */ +extern int __gconv_close_transform (struct __gconv_step *steps, + size_t nsteps) + internal_function; + +/* Free all resources allocated for the transformation record when + using the cache. */ +extern void __gconv_release_cache (struct __gconv_step *steps, size_t nsteps) + internal_function; + +/* Load shared object named by NAME. If already loaded increment reference + count. */ +extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) + internal_function; + +/* Release shared object. If no further reference is available unload + the object. */ +extern void __gconv_release_shlib (struct __gconv_loaded_object *handle) + internal_function; + +/* Fill STEP with information about builtin module with NAME. */ +extern void __gconv_get_builtin_trans (const char *name, + struct __gconv_step *step) + internal_function; + +/* Try to load transliteration step module. */ +extern int __gconv_translit_find (struct trans_struct *trans) + internal_function; + +/* Transliteration using the locale's data. */ +extern int __gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + void *trans_data, + __const unsigned char *inbufstart, + __const unsigned char **inbufp, + __const unsigned char *inbufend, + unsigned char **outbufstart, + size_t *irreversible); + + +/* Builtin transformations. */ +#ifdef _LIBC +# define __BUILTIN_TRANS(Name) \ + extern int Name (struct __gconv_step *step, \ + struct __gconv_step_data *data, \ + const unsigned char **inbuf, \ + const unsigned char *inbufend, \ + unsigned char **outbufstart, size_t *irreversible, \ + int do_flush, int consume_incomplete) + +__BUILTIN_TRANS (__gconv_transform_ascii_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ascii); +__BUILTIN_TRANS (__gconv_transform_utf8_internal); +__BUILTIN_TRANS (__gconv_transform_internal_utf8); +__BUILTIN_TRANS (__gconv_transform_ucs2_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ucs2); +__BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse); +__BUILTIN_TRANS (__gconv_transform_internal_ucs4); +__BUILTIN_TRANS (__gconv_transform_ucs4_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ucs4le); +__BUILTIN_TRANS (__gconv_transform_ucs4le_internal); +__BUILTIN_TRANS (__gconv_transform_internal_utf16); +__BUILTIN_TRANS (__gconv_transform_utf16_internal); +# undef __BUITLIN_TRANS + +#endif + +__END_DECLS + +#endif /* gconv_int.h */ diff --git a/newlib/libc/sys/linux/iconv/gconv_open.c b/newlib/libc/sys/linux/iconv/gconv_open.c new file mode 100644 index 000000000..5c10d601e --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_open.c @@ -0,0 +1,326 @@ +/* Find matching transformation algorithms and initialize steps. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <errno.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> + +int +internal_function +__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + int flags) +{ + struct __gconv_step *steps; + size_t nsteps; + __gconv_t result = NULL; + size_t cnt = 0; + int res; + int conv_flags = 0; + const char *errhand; + const char *ignore; + struct trans_struct *trans = NULL; + char old_locale[20], *old_locale_p; + char *old, *new; + size_t len; + + /* Find out whether any error handling method is specified. */ + errhand = strchr (toset, '/'); + if (errhand != NULL) + errhand = strchr (errhand + 1, '/'); + if (__builtin_expect (errhand != NULL, 1)) + { + if (*++errhand == '\0') + errhand = NULL; + else + { + /* Make copy without the error handling description. */ + char *newtoset = (char *) alloca (errhand - toset + 1); + char *tok; + char *ptr; + + newtoset[errhand - toset] = '\0'; + toset = memcpy (newtoset, toset, errhand - toset); + + /* Find the appropriate transliteration handlers. */ + old = (char *)(errhand); + len = strlen (old) + 1; + new = (char *) alloca (len); + tok = (char *) memcpy (new, old, len); + + tok = strtok_r (tok, ",", &ptr); + + /* Set locale to default C locale. */ + old_locale_p = setlocale(LC_ALL, "C"); + strncpy(old_locale, old_locale_p, 20); + + while (tok != NULL) + { + if (strcasecmp (tok, "TRANSLIT") == 0) + { + /* It's the builtin transliteration handling. We only + support it for working on the internal encoding. */ + static const char *internal_trans_names[1] = { "INTERNAL" }; + struct trans_struct *lastp = NULL; + struct trans_struct *runp; + + for (runp = trans; runp != NULL; runp = runp->next) + if (runp->trans_fct == __gconv_transliterate) + break; + else + lastp = runp; + + if (runp == NULL) + { + struct trans_struct *newp; + + newp = (struct trans_struct *) alloca (sizeof (*newp)); + memset (newp, '\0', sizeof (*newp)); + + /* We leave the `name' field zero to signal that + this is an internal transliteration step. */ + newp->csnames = internal_trans_names; + newp->ncsnames = 1; + newp->trans_fct = __gconv_transliterate; + + if (lastp == NULL) + trans = newp; + else + lastp->next = newp; + } + } + else if (strcasecmp (tok, "IGNORE") == 0) + /* Set the flag to ignore all errors. */ + conv_flags |= __GCONV_IGNORE_ERRORS; + else + { + /* `tok' is possibly a module name. We'll see later + whether we can find it. But first see that we do + not already a module of this name. */ + struct trans_struct *lastp = NULL; + struct trans_struct *runp; + + for (runp = trans; runp != NULL; runp = runp->next) + if (runp->name != NULL + && strcasecmp (tok, runp->name) == 0) + break; + else + lastp = runp; + + if (runp == NULL) + { + struct trans_struct *newp; + + newp = (struct trans_struct *) alloca (sizeof (*newp)); + memset (newp, '\0', sizeof (*newp)); + newp->name = tok; + + if (lastp == NULL) + trans = newp; + else + lastp->next = newp; + } + } + + tok = strtok_r (NULL, ",", &ptr); + } + } + } + + /* For the source character set we ignore the error handler specification. + XXX Is this really always the best? */ + ignore = strchr (fromset, '/'); + if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL + && *++ignore != '\0') + { + char *newfromset = (char *) alloca (ignore - fromset + 1); + + newfromset[ignore - fromset] = '\0'; + fromset = memcpy (newfromset, fromset, ignore - fromset); + } + + res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); + if (res == __GCONV_OK) + { + /* Find the modules. */ + struct trans_struct *lastp = NULL; + struct trans_struct *runp; + + for (runp = trans; runp != NULL; runp = runp->next) + { + if (runp->name == NULL + || __builtin_expect (__gconv_translit_find (runp), 0) == 0) + lastp = runp; + else + /* This means we haven't found the module. Remove it. */ + (lastp == NULL ? trans : lastp->next) = runp->next; + } + + /* Allocate room for handle. */ + result = (__gconv_t) malloc (sizeof (struct __gconv_info) + + (nsteps + * sizeof (struct __gconv_step_data))); + if (result == NULL) + res = __GCONV_NOMEM; + else + { + size_t n; + + /* Remember the list of steps. */ + result->__steps = steps; + result->__nsteps = nsteps; + + /* Clear the array for the step data. */ + memset (result->__data, '\0', + nsteps * sizeof (struct __gconv_step_data)); + + /* Call all initialization functions for the transformation + step implementations. */ + for (cnt = 0; cnt < nsteps; ++cnt) + { + size_t size; + + /* Would have to be done if we would not clear the whole + array above. */ +#if 0 + /* Reset the counter. */ + result->__data[cnt].__invocation_counter = 0; + + /* It's a regular use. */ + result->__data[cnt].__internal_use = 0; +#endif + + /* We use the `mbstate_t' member in DATA. */ + result->__data[cnt].__statep = &result->__data[cnt].__state; + + /* Now see whether we can use any of the transliteration + modules for this step. */ + for (runp = trans; runp != NULL; runp = runp->next) + for (n = 0; n < runp->ncsnames; ++n) + if (strcasecmp (steps[cnt].__from_name, runp->csnames[n]) == 0) + { + void *data = NULL; + + /* Match! Now try the initializer. */ + if (runp->trans_init_fct == NULL + || (runp->trans_init_fct (&data, + steps[cnt].__to_name) + == __GCONV_OK)) + { + /* Append at the end of the list. */ + struct __gconv_trans_data *newp; + struct __gconv_trans_data **lastp; + + newp = (struct __gconv_trans_data *) + malloc (sizeof (struct __gconv_trans_data)); + if (newp == NULL) + { + res = __GCONV_NOMEM; + goto bail; + } + + newp->__trans_fct = runp->trans_fct; + newp->__trans_context_fct = runp->trans_context_fct; + newp->__trans_end_fct = runp->trans_end_fct; + newp->__data = data; + newp->__next = NULL; + + lastp = &result->__data[cnt].__trans; + while (*lastp != NULL) + lastp = &(*lastp)->__next; + + *lastp = newp; + } + break; + } + + /* If this is the last step we must not allocate an + output buffer. */ + if (cnt < nsteps - 1) + { + result->__data[cnt].__flags = conv_flags; + + /* Allocate the buffer. */ + size = (GCONV_NCHAR_GOAL * steps[cnt].__max_needed_to); + + result->__data[cnt].__outbuf = (char *) malloc (size); + if (result->__data[cnt].__outbuf == NULL) + { + res = __GCONV_NOMEM; + goto bail; + } + + result->__data[cnt].__outbufend = + result->__data[cnt].__outbuf + size; + } + else + { + /* Handle the last entry. */ + result->__data[cnt].__flags = conv_flags | __GCONV_IS_LAST; + + break; + } + } + } + + if (res != __GCONV_OK) + { + /* Something went wrong. Free all the resources. */ + int serrno; + bail: + serrno = errno; + + if (result != NULL) + { + while (cnt-- > 0) + { + struct __gconv_trans_data *transp; + + transp = result->__data[cnt].__trans; + while (transp != NULL) + { + struct __gconv_trans_data *curp = transp; + transp = transp->__next; + + if (__builtin_expect (curp->__trans_end_fct != NULL, 0)) + curp->__trans_end_fct (curp->__data); + + free (curp); + } + + free (result->__data[cnt].__outbuf); + } + + free (result); + result = NULL; + } + + __gconv_close_transform (steps, nsteps); + + __set_errno (serrno); + } + } + + *handle = result; + setlocale(LC_ALL, old_locale); + return res; +} diff --git a/newlib/libc/sys/linux/iconv/gconv_simple.c b/newlib/libc/sys/linux/iconv/gconv_simple.c new file mode 100644 index 000000000..a4a99ea81 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_simple.c @@ -0,0 +1,1327 @@ +/* Simple transformations functions. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <byteswap.h> +#include <dlfcn.h> +#include <endian.h> +#include <errno.h> +#include <gconv.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> + +#define BUILTIN_ALIAS(s1, s2) /* nothing */ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ + extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ + __const unsigned char **, __const unsigned char *, \ + unsigned char **, size_t *, int, int); +#include "gconv_builtin.h" + + +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + + +/* Transform from the internal, UCS4-like format, to UCS4. The + difference between the internal ucs4 format and the real UCS4 + format is, if any, the endianess. The Unicode/ISO 10646 says that + unless some higher protocol specifies it differently, the byte + order is big endian.*/ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4_loop +#define TO_LOOP internal_ucs4_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4 + + +static inline int +internal_ucs4_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); + + *inptrp = inptr; + *outptrp = outptr; +#elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = memcpy (outptr, inptr, n_convert * 4); + *outptrp += n_convert * 4; +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +internal_ucs4_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = memcpy (outptr, inptr, n_convert * 4); + *outptrp += n_convert * 4; +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +internal_ucs4_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__builtin_expect (cnt < 4, 0)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + + *outptrp += 4; +#elif __BYTE_ORDER == __BIG_ENDIAN + /* XXX unaligned */ + *(*((uint32_t **) outptrp)++) = state->__value.__wch; +#else +# error "This endianess is not supported." +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Transform from UCS4 to the internal, UCS4-like format. Unlike + for the other direction we have to check for correct values here. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4_internal_loop +#define TO_LOOP ucs4_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4_internal + + +static inline int +ucs4_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__builtin_expect (inval > 0x7fffffff, 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr)++ = inval; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +ucs4_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__builtin_expect (inptr[0] > 0x80, 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __LITTLE_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +ucs4_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__builtin_expect (cnt < 4, 0)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); + return __GCONV_ILLEGAL_INPUT; + } + } + else + { +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Similarly for the little endian form. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4le_loop +#define TO_LOOP internal_ucs4le_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4le + + +static inline int +internal_ucs4le_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); + + *inptrp = inptr; + *outptrp = outptr; +#elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = memcpy (outptr, inptr, n_convert * 4); + *outptrp += n_convert * 4; +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +internal_ucs4le_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = memcpy (outptr, inptr, n_convert * 4); + *outptrp += n_convert * 4; +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp + 4 > inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +internal_ucs4le_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__builtin_expect (cnt < 4, 0)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + + *outptrp += 4; +#else + /* XXX unaligned */ + *(*((uint32_t **) outptrp)++) = state->__value.__wch; +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* And finally from UCS4-LE to the internal encoding. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4le_internal_loop +#define TO_LOOP ucs4le_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4le_internal + + +static inline int +ucs4le_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __BIG_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__builtin_expect (inval > 0x7fffffff, 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr)++ = inval; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +ucs4le_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__builtin_expect (inptr[3] > 0x80, 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __BIG_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +ucs4le_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__builtin_expect (cnt < 4, 0)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + return __GCONV_ILLEGAL_INPUT; + } + else + { +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ascii_internal_loop +#define TO_LOOP ascii_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ascii_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__builtin_expect (*inptr > '\x7f', 0)) \ + { \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ + if (! ignore_errors_p ()) \ + { \ + /* This is no correct ANSI_X3.4-1968 character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++*irreversible; \ + ++inptr; \ + } \ + else \ + /* It's an one byte sequence. */ \ + *((uint32_t *) outptr)++ = *inptr++; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ascii_loop +#define TO_LOOP internal_ascii_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ascii +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \ + { \ + UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ + STANDARD_ERR_HANDLER (4); \ + } \ + else \ + /* It's an one byte sequence. */ \ + *outptr++ = *((const uint32_t *) inptr)++; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UTF-8. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define MAX_NEEDED_TO 6 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_utf8_loop +#define TO_LOOP internal_utf8_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_utf8 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t wc = *((const uint32_t *) inptr); \ + \ + if (wc < 0x80) \ + /* It's an one byte sequence. */ \ + *outptr++ = (unsigned char) wc; \ + else if (__builtin_expect (wc <= 0x7fffffff, 1)) \ + { \ + size_t step; \ + char *start; \ + \ + for (step = 2; step < 6; ++step) \ + if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ + break; \ + \ + if (__builtin_expect (outptr + step > outend, 0)) \ + { \ + /* Too long. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + start = outptr; \ + *outptr = (unsigned char) (~0xff >> step); \ + outptr += step; \ + --step; \ + do \ + { \ + start[step] = 0x80 | (wc & 0x3f); \ + wc >>= 6; \ + } \ + while (--step > 0); \ + start[0] |= wc; \ + } \ + else \ + { \ + STANDARD_ERR_HANDLER (4); \ + } \ + \ + inptr += 4; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UTF-8 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 6 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP utf8_internal_loop +#define TO_LOOP utf8_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_utf8_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch; \ + uint32_t cnt; \ + uint32_t i; \ + \ + /* Next input byte. */ \ + ch = *inptr; \ + \ + if (ch < 0x80) \ + { \ + /* One byte sequence. */ \ + cnt = 1; \ + ++inptr; \ + } \ + else \ + { \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ + otherwise the wide character could have been represented \ + using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + else \ + { \ + int skipped; \ + \ + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal encoding. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* Search the end of this ill-formed UTF-8 character. This \ + is the next byte with (x & 0xc0) != 0x80. */ \ + skipped = 0; \ + do \ + { \ + ++inptr; \ + ++skipped; \ + } \ + while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \ + \ + continue; \ + } \ + \ + if (__builtin_expect (inptr + cnt > inend, 0)) \ + { \ + /* We don't have enough input. But before we report that check \ + that all the bytes are correct. */ \ + for (i = 1; inptr + i < inend; ++i) \ + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ + if (__builtin_expect (inptr + i == inend, 1)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + if (ignore_errors_p ()) \ + { \ + /* Ignore it. */ \ + inptr += i; \ + ++*irreversible; \ + continue; \ + } \ + \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* Read the possible remaining bytes. */ \ + for (i = 1; i < cnt; ++i) \ + { \ + uint32_t byte = inptr[i]; \ + \ + if ((byte & 0xc0) != 0x80) \ + /* This is an illegal encoding. */ \ + break; \ + \ + ch <<= 6; \ + ch |= byte & 0x3f; \ + } \ + \ + /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ + If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ + have been represented with fewer than cnt bytes. */ \ + if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ + { \ + /* This is an illegal encoding. */ \ + if (ignore_errors_p ()) \ + { \ + inptr += i; \ + ++*irreversible; \ + continue; \ + } \ + \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += cnt; \ + } \ + \ + /* Now adjust the pointers and store the result. */ \ + *((uint32_t *) outptr)++ = ch; \ + } +#define LOOP_NEED_FLAGS + +#define STORE_REST \ + { \ + /* We store the remaining bytes while converting them into the UCS4 \ + format. We can assume that the first byte in the buffer is \ + correct and that it requires a larger number of bytes than there \ + are in the input buffer. */ \ + wint_t ch = **inptrp; \ + size_t cnt; \ + \ + state->__count = inend - *inptrp; \ + \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or \ + 0xc1, otherwise the wide character could have been \ + represented using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + \ + /* The first byte is already consumed. */ \ + --cnt; \ + while (++(*inptrp) < inend) \ + { \ + ch <<= 6; \ + ch |= **inptrp & 0x3f; \ + --cnt; \ + } \ + \ + /* Shift for the so far missing bytes. */ \ + ch <<= cnt * 6; \ + \ + /* Store the value. */ \ + state->__value.__wch = ch; \ + } + +#define UNPACK_BYTES \ + { \ + wint_t wch = state->__value.__wch; \ + size_t ntotal; \ + inlen = state->__count; \ + \ + if (state->__value.__wch <= 0x7ff) \ + { \ + bytebuf[0] = 0xc0; \ + ntotal = 2; \ + } \ + else if (__builtin_expect (state->__value.__wch <= 0xffff, 1)) \ + { \ + bytebuf[0] = 0xe0; \ + ntotal = 3; \ + } \ + else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1)) \ + { \ + bytebuf[0] = 0xf0; \ + ntotal = 4; \ + } \ + else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1)) \ + { \ + bytebuf[0] = 0xf8; \ + ntotal = 5; \ + } \ + else \ + { \ + bytebuf[0] = 0xfc; \ + ntotal = 6; \ + } \ + \ + do \ + { \ + if (--ntotal < inlen) \ + bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ + wch >>= 6; \ + } \ + while (ntotal > 1); \ + \ + bytebuf[0] |= wch; \ + } + +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2_internal_loop +#define TO_LOOP ucs2_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs2_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = *((const uint16_t *) inptr); \ + \ + if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr)++ = u1; \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2_loop +#define TO_LOOP internal_ucs2_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs2 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + \ + if (__builtin_expect (val >= 0x10000, 0)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_ERR_HANDLER (4); \ + } \ + else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + *((uint16_t *) outptr)++ = val; \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2reverse_internal_loop +#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \ + \ + if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr)++ = u1; \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2reverse_loop +#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + if (__builtin_expect (val >= 0x10000, 0)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_ERR_HANDLER (4); \ + } \ + else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + *((uint16_t *) outptr)++ = bswap_16 (val); \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> diff --git a/newlib/libc/sys/linux/iconv/gconv_trans.c b/newlib/libc/sys/linux/iconv/gconv_trans.c new file mode 100644 index 000000000..dcc1004e1 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/gconv_trans.c @@ -0,0 +1,230 @@ +/* Transliteration using the locale's data. + Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <assert.h> +#include <dlfcn.h> +#include <search.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <dirent.h> +#include <ltdl.h> + +#include "gconv_int.h" +#include "localeinfo.h" + +int +__gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + void *trans_data __attribute__ ((unused)), + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, size_t *irreversible) +{ + return 0; +} + + +/* Structure to represent results of found (or not) transliteration + modules. */ +struct known_trans +{ + /* This structure must remain the first member. */ + struct trans_struct info; + + char *fname; + void *handle; + int open_count; +}; + + +/* Tree with results of previous calls to __gconv_translit_find. */ +static void *search_tree; + +/* We modify global data. */ +__LOCK_INIT(static, lock); + +/* Compare two transliteration entries. */ +static int +trans_compare (const void *p1, const void *p2) +{ + const struct known_trans *s1 = (const struct known_trans *) p1; + const struct known_trans *s2 = (const struct known_trans *) p2; + + return strcmp (s1->info.name, s2->info.name); +} + + +/* Open (maybe reopen) the module named in the struct. Get the function + and data structure pointers we need. */ +static int +open_translit (struct known_trans *trans) +{ + __gconv_trans_query_fct queryfct; + + trans->handle = __libc_dlopen (trans->fname); + if (trans->handle == NULL) + /* Not available. */ + return 1; + + /* Find the required symbol. */ + queryfct = __libc_dlsym (trans->handle, "gconv_trans_context"); + if (queryfct == NULL) + { + /* We cannot live with that. */ + close_and_out: + __libc_dlclose (trans->handle); + trans->handle = NULL; + return 1; + } + + /* Get the context. */ + if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames) + != 0) + goto close_and_out; + + /* Of course we also have to have the actual function. */ + trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans"); + if (trans->info.trans_fct == NULL) + goto close_and_out; + + /* Now the optional functions. */ + trans->info.trans_init_fct = + __libc_dlsym (trans->handle, "gconv_trans_init"); + trans->info.trans_context_fct = + __libc_dlsym (trans->handle, "gconv_trans_context"); + trans->info.trans_end_fct = + __libc_dlsym (trans->handle, "gconv_trans_end"); + + trans->open_count = 1; + + return 0; +} + + +int +internal_function +__gconv_translit_find (struct trans_struct *trans) +{ + struct known_trans **found; + const struct path_elem *runp; + int res = 1; + + /* We have to have a name. */ + assert (trans->name != NULL); + + /* Acquire the lock. */ +#ifdef HAVE_DD_LOCK + __lock_acquire(lock); +#endif + + /* See whether we know this module already. */ + found = tfind (trans, &search_tree, trans_compare); + if (found != NULL) + { + /* Is this module available? */ + if ((*found)->handle != NULL) + { + /* Maybe we have to reopen the file. */ + if ((*found)->handle != (void *) -1) + /* The object is not unloaded. */ + res = 0; + else if (open_translit (*found) == 0) + { + /* Copy the data. */ + *trans = (*found)->info; + (*found)->open_count++; + res = 0; + } + } + } + else + { + size_t name_len = strlen (trans->name) + 1; + int need_so = 0; + struct known_trans *newp; + + /* We have to continue looking for the module. */ + if (__gconv_path_elem == NULL) + __gconv_get_path (); + + /* See whether we have to append .so. */ + if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0) + need_so = 1; + + /* Create a new entry. */ + newp = (struct known_trans *) malloc (sizeof (struct known_trans) + + (__gconv_max_path_elem_len + + name_len + 3) + + name_len); + if (newp != NULL) + { + char *cp; + + /* Clear the struct. */ + memset (newp, '\0', sizeof (struct known_trans)); + + /* Store a copy of the module name. */ + newp->info.name = cp = (char *) (newp + 1); + cp = memcpy (cp, trans->name, name_len); + cp += name_len; + + newp->fname = cp; + + /* Search in all the directories. */ + for (runp = __gconv_path_elem; runp->name != NULL; ++runp) + { + strcpy ((char *) newp->fname, runp->name); + while(newp->fname != '\0') newp->fname++; + + cp = memcpy (newp->fname, + trans->name, name_len); + cp += name_len; + if (need_so) + memcpy (cp, ".so", sizeof (".so")); + + if (open_translit (newp) == 0) + { + /* We found a module. */ + res = 0; + break; + } + } + + if (res) + newp->fname = NULL; + + /* In any case we'll add the entry to our search tree. */ + if (tsearch (newp, &search_tree, trans_compare) == NULL) + { + /* Yickes, this should not happen. Unload the object. */ + res = 1; + /* XXX unload here. */ + } + } + } + +#ifdef HAVE_DD_LOCK + __lock_release(lock); +#endif + + return res; +} diff --git a/newlib/libc/sys/linux/iconv/hash-string.h b/newlib/libc/sys/linux/iconv/hash-string.h new file mode 100644 index 000000000..ae4bcfb3e --- /dev/null +++ b/newlib/libc/sys/linux/iconv/hash-string.h @@ -0,0 +1,60 @@ +/* Implements a string hashing function. + Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* @@ end of prolog @@ */ + +#ifndef PARAMS +# if __STDC__ +# define PARAMS(Args) Args +# else +# define PARAMS(Args) () +# endif +#endif + +/* We assume to have `unsigned long int' value with at least 32 bits. */ +#define HASHWORDBITS 32 + + +/* Defines the so called `hashpjw' function by P.J. Weinberger + [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools, + 1986, 1987 Bell Telephone Laboratories, Inc.] */ +static unsigned long int hash_string PARAMS ((const char *__str_param)); + +static inline unsigned long int +hash_string (str_param) + const char *str_param; +{ + unsigned long int hval, g; + const char *str = str_param; + + /* Compute the hash value for the given string. */ + hval = 0; + while (*str != '\0') + { + hval <<= 4; + hval += (unsigned long int) *str++; + g = hval & ((unsigned long int) 0xf << (HASHWORDBITS - 4)); + if (g != 0) + { + hval ^= g >> (HASHWORDBITS - 8); + hval ^= g; + } + } + return hval; +} diff --git a/newlib/libc/sys/linux/iconv/iconv.c b/newlib/libc/sys/linux/iconv/iconv.c new file mode 100644 index 000000000..d0f5528f5 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconv.c @@ -0,0 +1,96 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <stddef.h> /* for NULL */ +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + +#include <assert.h> + + +size_t +iconv (iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, + size_t *outbytesleft) +{ + __gconv_t gcd = (__gconv_t) cd; + char *outstart = outbuf ? *outbuf : NULL; + size_t irreversible; + int result; + + if (__builtin_expect (inbuf == NULL || *inbuf == NULL, 0)) + { + if (outbuf == NULL || *outbuf == NULL) + result = __gconv (gcd, NULL, NULL, NULL, NULL, &irreversible); + else + result = __gconv (gcd, NULL, NULL, (unsigned char **) outbuf, + (unsigned char *) (outstart + *outbytesleft), + &irreversible); + } + else + { + const char *instart = *inbuf; + + result = __gconv (gcd, (const unsigned char **) inbuf, + (const unsigned char *) (*inbuf + *inbytesleft), + (unsigned char **) outbuf, + (unsigned char *) (*outbuf + *outbytesleft), + &irreversible); + + *inbytesleft -= *inbuf - instart; + } + if (outstart != NULL) + *outbytesleft -= *outbuf - outstart; + + switch (__builtin_expect (result, __GCONV_OK)) + { + case __GCONV_ILLEGAL_DESCRIPTOR: + __set_errno (EBADF); + irreversible = (size_t) -1L; + break; + + case __GCONV_ILLEGAL_INPUT: + __set_errno (EILSEQ); + irreversible = (size_t) -1L; + break; + + case __GCONV_FULL_OUTPUT: + __set_errno (E2BIG); + irreversible = (size_t) -1L; + break; + + case __GCONV_INCOMPLETE_INPUT: + __set_errno (EINVAL); + irreversible = (size_t) -1L; + break; + + case __GCONV_EMPTY_INPUT: + case __GCONV_OK: + /* Nothing. */ + break; + + default: + assert (!"Nothing like this should happen"); + } + + return irreversible; +} diff --git a/newlib/libc/sys/linux/iconv/iconv.h b/newlib/libc/sys/linux/iconv/iconv.h new file mode 100644 index 000000000..5a795dc5d --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconv.h @@ -0,0 +1,51 @@ +/* Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _ICONV_H +#define _ICONV_H 1 + +#include <features.h> +#define __need_size_t +#include <stddef.h> + + +__BEGIN_DECLS + +/* Identifier for conversion method from one codeset to another. */ +typedef void *iconv_t; + + +/* Allocate descriptor for code conversion from codeset FROMCODE to + codeset TOCODE. */ +extern iconv_t iconv_open (__const char *__tocode, __const char *__fromcode) + __THROW; + +/* Convert at most *INBYTESLEFT bytes from *INBUF according to the + code conversion algorithm specified by CD and place up to + *OUTBYTESLEFT bytes in buffer at *OUTBUF. */ +extern size_t iconv (iconv_t __cd, char **__restrict __inbuf, + size_t *__restrict __inbytesleft, + char **__restrict __outbuf, + size_t *__restrict __outbytesleft); + +/* Free resources allocated for descriptor CD for code conversion. */ +extern int iconv_close (iconv_t __cd) __THROW; + +__END_DECLS + +#endif /* iconv.h */ diff --git a/newlib/libc/sys/linux/iconv/iconv_charmap.c b/newlib/libc/sys/linux/iconv/iconv_charmap.c new file mode 100644 index 000000000..141c8eca2 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconv_charmap.c @@ -0,0 +1,563 @@ +/* Convert using charmaps and possibly iconv(). + Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "iconv_prog.h" + + +/* Prototypes for a few program-wide used functions. */ +extern void *xmalloc (size_t __n); +extern void *xcalloc (size_t __n, size_t __s); + + +struct convtable +{ + int term[256 / 8]; + union + { + struct convtable *sub; + struct charseq *out; + } val[256]; +}; + + +static inline struct convtable * +allocate_table (void) +{ + return (struct convtable *) xcalloc (1, sizeof (struct convtable)); +} + + +static inline int +is_term (struct convtable *tbl, unsigned int idx) +{ + return tbl->term[idx / 8] & (1 << (idx % 8)); +} + + +static inline void +clear_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] &= ~(1 << (idx % 8)); +} + + +static inline void +set_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] |= 1 << (idx % 8); +} + + +/* Generate the conversion table. */ +static struct convtable *use_from_charmap (struct charmap_t *from_charmap, + const char *to_code); +static struct convtable *use_to_charmap (const char *from_code, + struct charmap_t *to_charmap); +static struct convtable *use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap); + +/* Prototypes for the functions doing the actual work. */ +static int process_block (struct convtable *tbl, char *addr, size_t len, + FILE *output); +static int process_fd (struct convtable *tbl, int fd, FILE *output); +static int process_file (struct convtable *tbl, FILE *input, FILE *output); + + +int +charmap_conversion (const char *from_code, struct charmap_t *from_charmap, + const char *to_code, struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], FILE *output) +{ + struct convtable *cvtbl; + int status = EXIT_SUCCESS; + + /* We have three different cases to handle: + + - both, from_charmap and to_charmap, are available. This means we + can assume that the symbolic names match and use them to create + the mapping. + + - only from_charmap is available. In this case we can only hope that + the symbolic names used are of the <Uxxxx> form in which case we + can use a UCS4->"to_code" iconv() conversion for the second step. + + - only to_charmap is available. This is similar, only that we would + use iconv() for the "to_code"->UCS4 conversion. + + We first create a table which maps input bytes into output bytes. + Once this is done we can handle all three of the cases above + equally. */ + if (from_charmap != NULL) + { + if (to_charmap == NULL) + cvtbl = use_from_charmap (from_charmap, to_code); + else + cvtbl = use_both_charmaps (from_charmap, to_charmap); + } + else + { + assert (to_charmap != NULL); + cvtbl = use_to_charmap (from_code, to_charmap); + } + + /* If we couldn't generate a table stop now. */ + if (cvtbl == NULL) + return EXIT_FAILURE; + + /* We can now start the conversion. */ + if (remaining == argc) + { + if (process_file (cvtbl, stdin, output) != 0) + status = EXIT_FAILURE; + } + else + do + { + struct stat st; + char *addr; + int fd; + + if (verbose) + printf ("%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), argv[remaining]); + + if (process_block (cvtbl, addr, st.st_size, output) < 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + if (process_fd (cvtbl, fd, output) != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input file anymore. */ + close (fd); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* Now close the file. */ + close (fd); + } + } + while (++remaining < argc); + + /* All done. */ + return status; +} + + +static void +add_bytes (struct convtable *tbl, struct charseq *in, struct charseq *out) +{ + int n = 0; + unsigned int byte; + + assert (in->nbytes > 0); + + byte = ((unsigned char *) in->bytes)[n]; + while (n + 1 < in->nbytes) + { + if (is_term (tbl, byte) || tbl->val[byte].sub == NULL) + { + /* Note that we simply ignore a definition for a byte sequence + which is also the prefix for a longer one. */ + clear_term (tbl, byte); + tbl->val[byte].sub = + (struct convtable *) xcalloc (1, sizeof (struct convtable)); + } + + tbl = tbl->val[byte].sub; + + byte = ((unsigned char *) in->bytes)[++n]; + } + + /* Only add the new sequence if there is none yet and the byte sequence + is not part of an even longer one. */ + if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL) + { + set_term (tbl, byte); + tbl->val[byte].out = out; + } +} + + +static struct convtable * +use_from_charmap (struct charmap_t *from_charmap, const char *to_code) +{ + /* We iterate over all entries in the from_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the to_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + cd = iconv_open (to_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = (struct charseq *) data; + + if (in->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { in->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + struct charseq *newp; + + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + newp = (struct charseq *) xmalloc (sizeof (struct charseq) + + outlen); + newp->name = in->name; + newp->ucs4 = in->ucs4; + newp->nbytes = outlen; + memcpy (newp->bytes, outbuf, outlen); + + add_bytes (rettbl, in, newp); + } + + /* Clear any possible state left behind. */ + (void) iconv (cd, NULL, NULL, NULL, NULL); + } + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_to_charmap (const char *from_code, struct charmap_t *to_charmap) +{ + /* We iterate over all entries in the to_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the from_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + /* Note that the conversion we use here is the reverse direction. Without + exhaustive search we cannot figure out which input yields the UCS4 + character we are looking for. Therefore we determine it the other + way round. */ + cd = iconv_open (from_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *out = (struct charseq *) data; + + if (out->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { out->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + struct charseq *newp; + + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + newp = (struct charseq *) xmalloc (sizeof (struct charseq) + + outlen); + newp->name = out->name; + newp->ucs4 = out->ucs4; + newp->nbytes = outlen; + memcpy (newp->bytes, outbuf, outlen); + + add_bytes (rettbl, newp, out); + } + + /* Clear any possible state left behind. */ + (void) iconv (cd, NULL, NULL, NULL, NULL); + } + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap) +{ + /* In this case we iterate over all the entries in the from_charmap, + determine the internal name, and find an appropriate entry in the + to_charmap (if it exists). */ + struct convtable *rettbl = allocate_table (); + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = (struct charseq *) data; + struct charseq *out = charmap_find_value (to_charmap, key, keylen); + + if (out != NULL) + add_bytes (rettbl, in, out); + } + + return rettbl; +} + + +static int +process_block (struct convtable *tbl, char *addr, size_t len, FILE *output) +{ + size_t n = 0; + + while (n < len) + { + struct convtable *cur = tbl; + unsigned char *curp = (unsigned char *) addr; + unsigned int byte = *curp; + int cnt; + struct charseq *out; + + while (! is_term (cur, byte)) + if (cur->val[byte].sub == NULL) + { + /* This is a invalid sequence. Skip the first byte if we are + ignoring errors. Otherwise punt. */ + if (! omit_invalid) + { + error (0, 0, _("illegal input sequence at position %Zd"), n); + return -1; + } + + n -= curp - (unsigned char *) addr; + + byte = *(curp = (unsigned char *) ++addr); + if (++n >= len) + /* All converted. */ + return 0; + + cur = tbl; + } + else + { + cur = cur->val[byte].sub; + + if (++n >= len) + { + error (0, 0, _("\ +incomplete character or shift sequence at end of buffer")); + return -1; + } + + byte = *++curp; + } + + /* We found a final byte. Write the output bytes. */ + out = cur->val[byte].out; + for (cnt = 0; cnt < out->nbytes; ++cnt) + fputc_unlocked (out->bytes[cnt], output); + + addr = (char *) curp + 1; + ++n; + } + + return 0; +} + + +static int +process_fd (struct convtable *tbl, int fd, FILE *output) +{ + /* we have a problem with reading from a desriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; + char *inptr = NULL; + size_t actlen = 0; + + while (actlen < maxlen) + { + ssize_t n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + + if (actlen == maxlen) + while (1) + { + ssize_t n; + + /* Increase the buffer. */ + maxlen += 32768; + inbuf = realloc (inbuf, maxlen); + if (inbuf == NULL) + error (0, errno, _("unable to allocate buffer for input")); + inptr = inbuf + actlen; + + do + { + n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + while (actlen < maxlen); + + if (n == 0) + /* Break again so we leave both loops. */ + break; + } + + /* Now we have all the input in the buffer. Process it in one run. */ + return process_block (tbl, inbuf, actlen, output); +} + + +static int +process_file (struct convtable *tbl, FILE *input, FILE *output) +{ + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ + return process_fd (tbl, fileno (input), output); +} diff --git a/newlib/libc/sys/linux/iconv/iconv_close.c b/newlib/libc/sys/linux/iconv/iconv_close.c new file mode 100644 index 000000000..038acc69e --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconv_close.c @@ -0,0 +1,37 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + + +int +iconv_close (iconv_t cd) +{ + if (__builtin_expect (cd == (iconv_t *) -1L, 0)) + { + __set_errno (EBADF); + return -1; + } + + return __gconv_close ((__gconv_t) cd) ? -1 : 0; +} diff --git a/newlib/libc/sys/linux/iconv/iconv_open.c b/newlib/libc/sys/linux/iconv/iconv_open.c new file mode 100644 index 000000000..ac44af80c --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconv_open.c @@ -0,0 +1,65 @@ +/* Get descriptor for character set conversion. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <errno.h> +#include <iconv.h> +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> +#include "gconv_charset.h" + + +iconv_t +iconv_open (const char *tocode, const char *fromcode) +{ + char *tocode_conv; + char *fromcode_conv; + size_t tocode_len; + size_t fromcode_len; + __gconv_t cd; + int res; + + /* Normalize the name. We remove all characters beside alpha-numeric, + '_', '-', '/', and '.'. */ + tocode_len = strlen (tocode); + tocode_conv = alloca (tocode_len + 3); + strip (tocode_conv, tocode); + tocode = tocode_conv[2] == '\0' ? upstr (tocode_conv, tocode) : tocode_conv; + + fromcode_len = strlen (fromcode); + fromcode_conv = alloca (fromcode_len + 3); + strip (fromcode_conv, fromcode); + fromcode = (fromcode_conv[2] == '\0' + ? upstr (fromcode_conv, fromcode) : fromcode_conv); + + res = __gconv_open (tocode, fromcode, &cd, 0); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* We must set the error number according to the specs. */ + if (res == __GCONV_NOCONV || res == __GCONV_NODB) + __set_errno (EINVAL); + + return (iconv_t) -1; + } + + return (iconv_t) cd; +} diff --git a/newlib/libc/sys/linux/iconv/iconvconfig.c b/newlib/libc/sys/linux/iconv/iconvconfig.c new file mode 100644 index 000000000..8ad6c93e2 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconvconfig.c @@ -0,0 +1,1176 @@ +/* Generate fastloading iconv module configuration files. + Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <argp.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <mcheck.h> +#include <search.h> +#include <stdint.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/uio.h> + +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* The hashing function we use. */ +#include "../intl/hash-string.h" + + +/* Types used. */ +struct module +{ + char *fromname; + struct Strent *fromname_strent; + char *filename; + struct Strent *filename_strent; + const char *directory; + struct Strent *directory_strent; + struct module *next; + int cost; + struct Strent *toname_strent; + char toname[0]; +}; + +struct alias +{ + char *fromname; + struct Strent *froment; + struct module *module; + struct Strent *toent; + char toname[0]; +}; + +struct name +{ + const char *name; + struct Strent *strent; + int module_idx; + uint32_t hashval; +}; + +struct name_info +{ + const char *canonical_name; + struct Strent *canonical_strent; + + struct module *from_internal; + struct module *to_internal; + + struct other_conv_list + { + int dest_idx; + struct other_conv + { + gidx_t module_idx; + struct module *module; + struct other_conv *next; + } other_conv; + struct other_conv_list *next; + } *other_conv_list; +}; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +/* Short description of program. */ +static const char doc[] = N_("\ +Create fastloading iconv module configuration file."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[DIR...]"); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + NULL, NULL, args_doc, doc, NULL, more_help +}; + + +/* The function doing the actual work. */ +static int handle_dir (const char *dir); + +/* Add all known builtin conversions and aliases. */ +static void add_builtins (void); + +/* Create list of all aliases without circular aliases. */ +static void get_aliases (void); + +/* Create list of all modules. */ +static void get_modules (void); + +/* Get list of all the names and thereby indexing them. */ +static void generate_name_list (void); + +/* Collect information about all the names. */ +static void generate_name_info (void); + +/* Write the output file. */ +static int write_output (void); + + +/* Search tree of the modules we know. */ +static void *modules; + +/* Search tree of the aliases we know. */ +static void *aliases; + +/* Search tree for name to index mapping. */ +static void *names; + +/* Number of names we know about. */ +static int nnames; + +/* List of all aliases. */ +static struct alias **alias_list; +static size_t nalias_list; +static size_t nalias_list_max; + +/* List of all modules. */ +static struct module **module_list; +static size_t nmodule_list; +static size_t nmodule_list_max; + +/* Names and information about them. */ +static struct name_info *name_info; +static size_t nname_info; + +/* Number of translations not from or to INTERNAL. */ +static size_t nextra_modules; + + +/* Names and aliases for the builtin transformations. */ +static struct +{ + const char *from; + const char *to; +} builtin_alias[] = + { +#define BUILTIN_ALIAS(alias, real) \ + { .from = alias, .to = real }, +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) +#include <gconv_builtin.h> + }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION +#define nbuiltin_alias (sizeof (builtin_alias) / sizeof (builtin_alias[0])) + +static struct +{ + const char *from; + const char *to; + const char *module; + int cost; +} builtin_trans[] = + { +#define BUILTIN_ALIAS(alias, real) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ + { .from = From, .to = To, .module = Name, .cost = Cost }, +#include <gconv_builtin.h> + }; +#define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) + + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + + +extern void *xmalloc (size_t n) __attribute_malloc__; +extern void *xcalloc (size_t n, size_t m) __attribute_malloc__; +extern void *xrealloc (void *p, size_t n); + + +/* C string table handling. */ +struct Strtab; +struct Strent; + +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + +/* String table we construct. */ +static struct Strtab *strtab; + + + +int +main (int argc, char *argv[]) +{ + int remaining; + int status = 0; + char *path; + char *tp; + const char *old = GCONV_PATH; + size_t len = strlen (old) + 1; + char *new = alloca(len); + + /* Enable memory use testing. */ + /* mcheck_pedantic (NULL); */ + mtrace (); + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* Initialize the string table. */ + strtab = strtabinit (); + + /* Handle all directories mentioned. */ + while (remaining < argc) + status |= handle_dir (argv[remaining++]); + + /* In any case also handle the standard directory. */ + path = memcpy (new, old, len); + tp = strtok (path, ":"); + while (tp != NULL) + { + status |= handle_dir (tp); + + tp = strtok (NULL, ":"); + } + + /* Add the builtin transformations and aliases without overwriting + anything. */ + add_builtins (); + + /* Store aliases in an array. */ + get_aliases (); + + /* Get list of all modules. */ + get_modules (); + + /* Generate list of all the names we know to handle in some way. */ + generate_name_list (); + + /* Now we know all the names we will handle, collect information + about them. */ + generate_name_info (); + + /* Write the output file, but only if we haven't seen any error. */ + if (status == 0) + status = write_output (); + else + error (1, 0, _("no output file produced because warning were issued")); + + return status; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + return strdup (gettext ("\ +Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n")); + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconvconfig (GNU %s) %s\n", PACKAGE, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2002"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +alias_compare (const void *p1, const void *p2) +{ + const struct alias *a1 = (const struct alias *) p1; + const struct alias *a2 = (const struct alias *) p2; + + return strcmp (a1->fromname, a2->fromname); +} + + +static void +new_alias (const char *fromname, size_t fromlen, const char *toname, + size_t tolen) +{ + struct alias *newp; + void **inserted; + + newp = (struct alias *) xmalloc (sizeof (struct alias) + fromlen + tolen); + + newp->fromname = memcpy (newp->toname, toname, tolen); + newp->fromname += tolen; + memcpy (newp->fromname, fromname, fromlen); + newp->module = NULL; + + inserted = (void **) tsearch (newp, &aliases, alias_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, gettext ("while inserting in search tree")); + if (*inserted != newp) + /* Something went wrong, free this entry. */ + free (newp); + else + { + newp->froment = strtabadd (strtab, newp->fromname, fromlen); + newp->toent = strtabadd (strtab, newp->toname, tolen); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from; + char *to; + char *wp; + + while (isspace (*rp)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + assert (strlen (from) + 1 == to - from); + assert (strlen (to) + 1 == wp - to); + + new_alias (from, to - from, to, wp - to); +} + + +static void +append_alias (const void *nodep, VISIT value, int level) +{ + if (value != leaf && value != postorder) + return; + + if (nalias_list_max == nalias_list) + { + nalias_list_max += 50; + alias_list = (struct alias **) xrealloc (alias_list, + (nalias_list_max + * sizeof (struct alias *))); + } + + alias_list[nalias_list++] = *(struct alias **) nodep; +} + + +static void +get_aliases (void) +{ + twalk (aliases, append_alias); +} + + +static int +module_compare (const void *p1, const void *p2) +{ + const struct module *m1 = (const struct module *) p1; + const struct module *m2 = (const struct module *) p2; + int result; + + result = strcmp (m1->fromname, m2->fromname); + if (result == 0) + result = strcmp (m1->toname, m2->toname); + + return result; +} + + +/* Create new module record. */ +static void +new_module (const char *fromname, size_t fromlen, const char *toname, + size_t tolen, const char *directory, + const char *filename, size_t filelen, int cost, size_t need_ext) +{ + struct module *new_module; + size_t dirlen = strlen (directory) + 1; + char *tmp; + void **inserted; + + new_module = (struct module *) xmalloc (sizeof (struct module) + + fromlen + tolen + filelen + + need_ext); + + new_module->fromname = memcpy (new_module->toname, toname, tolen); + new_module->fromname += tolen; + + new_module->filename = memcpy (new_module->fromname, fromname, fromlen); + new_module->filename += fromlen; + + new_module->cost = cost; + new_module->next = NULL; + + tmp = memcpy (new_module->filename, filename, filelen); + tmp += filelen; + + if (need_ext) + { + memcpy (tmp - 1, gconv_module_ext, need_ext + 1); + filelen += need_ext; + } + new_module->directory = directory; + + /* Now insert the new module data structure in our search tree. */ + inserted = (void **) tsearch (new_module, &modules, module_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, "while inserting in search tree"); + if (*inserted != new_module) + free (new_module); + else + { + new_module->fromname_strent = strtabadd (strtab, new_module->fromname, + fromlen); + new_module->toname_strent = strtabadd (strtab, new_module->toname, + tolen); + new_module->filename_strent = strtabadd (strtab, new_module->filename, + filelen); + new_module->directory_strent = strtabadd (strtab, directory, dirlen); + } +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + char *from; + char *to; + char *module; + char *wp; + int need_ext; + int cost; + + while (isspace (*rp)) + ++rp; + from = rp; + while (*rp != '\0' && !isspace (*rp)) + { + *rp = toupper (*rp); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (isspace (*rp)); + module = wp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost = strtol (rp, &endp, 10); + if (rp == endp || cost < 1) + /* No useful information. */ + cost = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + + /* See whether we must add the ending. */ + need_ext = 0; + if (wp - module < sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + assert (strlen (from) + 1 == to - from); + assert (strlen (to) + 1 == module - to); + assert (strlen (module) + 1 == wp - module); + + new_module (from, to - from, to, module - to, directory, module, wp - module, + cost, need_ext); +} + + +/* Read the config file and add the data for this directory to that. */ +static int +handle_dir (const char *dir) +{ + char *infile; + FILE *fp; + char *line = NULL; + size_t linelen = 0; + size_t dirlen = strlen (dir); + char *tmp; + + if (dir[dirlen - 1] != '/') + { + char *newp = (char *) xmalloc (dirlen + 2); + dir = memcpy (newp, dir, dirlen); + newp[dirlen++] = '/'; + newp[dirlen] = '\0'; + } + + infile = (char *) alloca (dirlen + sizeof "gconv-modules"); + tmp = mempcpy (infile, dir, dirlen); + tmp += dirlen; + strcpy (tmp, "gconv-modules"); + + fp = fopen (infile, "r"); + if (fp == NULL) + { + error (0, errno, "cannot open `%s'", infile); + return 1; + } + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &linelen, '\n', fp); + + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) with a NUL + byte to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (isspace (*rp)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !isspace (*rp)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, dir); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); + + return 0; +} + + +static void +append_module (const void *nodep, VISIT value, int level) +{ + struct module *mo; + + if (value != leaf && value != postorder) + return; + + mo = *(struct module **) nodep; + + if (nmodule_list > 0 + && strcmp (module_list[nmodule_list - 1]->fromname, mo->fromname) == 0) + { + /* Same name. */ + mo->next = module_list[nmodule_list - 1]; + module_list[nmodule_list - 1] = mo; + + return; + } + + if (nmodule_list_max == nmodule_list) + { + nmodule_list_max += 50; + module_list = (struct module **) xrealloc (module_list, + (nmodule_list_max + * sizeof (struct module *))); + } + + module_list[nmodule_list++] = mo; +} + + +static void +get_modules (void) +{ + twalk (modules, append_module); +} + + +static void +add_builtins (void) +{ + size_t cnt; + + /* Add all aliases. */ + for (cnt = 0; cnt < nbuiltin_alias; ++cnt) + new_alias (builtin_alias[cnt].from, + strlen (builtin_alias[cnt].from) + 1, + builtin_alias[cnt].to, + strlen (builtin_alias[cnt].to) + 1); + + /* add the builtin transformations. */ + for (cnt = 0; cnt < nbuiltin_trans; ++cnt) + new_module (builtin_trans[cnt].from, + strlen (builtin_trans[cnt].from) + 1, + builtin_trans[cnt].to, + strlen (builtin_trans[cnt].to) + 1, + "", builtin_trans[cnt].module, + strlen (builtin_trans[cnt].module) + 1, + builtin_trans[cnt].cost, 0); +} + + +static int +name_compare (const void *p1, const void *p2) +{ + const struct name *n1 = (const struct name *) p1; + const struct name *n2 = (const struct name *) p2; + + return strcmp (n1->name, n2->name); +} + + +static struct name * +new_name (const char *str, struct Strent *strent) +{ + struct name *newp = (struct name *) xmalloc (sizeof (struct name)); + + newp->name = str; + newp->strent = strent; + newp->module_idx = -1; + newp->hashval = hash_string (str); + + ++nnames; + + return newp; +} + + +static void +generate_name_list (void) +{ + size_t i; + + /* A name we always need. */ + tsearch (new_name ("INTERNAL", strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL"))), + &names, name_compare); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + if (strcmp (module_list[i]->fromname, "INTERNAL") != 0) + tsearch (new_name (module_list[i]->fromname, + module_list[i]->fromname_strent), + &names, name_compare); + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->toname, "INTERNAL") != 0) + tsearch (new_name (runp->toname, runp->toname_strent), + &names, name_compare); + } +} + + +static int +name_to_module_idx (const char *name, int add) +{ + struct name **res; + struct name fake_name = { .name = name }; + int idx; + + res = (struct name **) tfind (&fake_name, &names, name_compare); + if (res == NULL) + abort (); + + idx = (*res)->module_idx; + if (idx == -1 && add) + /* No module index assigned yet. */ + idx = (*res)->module_idx = nname_info++; + + return idx; +} + + +static void +generate_name_info (void) +{ + size_t i; + int idx; + + name_info = (struct name_info *) xcalloc (nmodule_list + 1, + sizeof (struct name_info)); + + /* First add a special entry for the INTERNAL name. This must have + index zero. */ + idx = name_to_module_idx ("INTERNAL", 1); + name_info[0].canonical_name = "INTERNAL"; + name_info[0].canonical_strent = strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL")); + assert (nname_info == 1); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->fromname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->toname, 1); + name_info[idx].from_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->toname) == 0); + name_info[idx].canonical_name = runp->toname; + name_info[idx].canonical_strent = runp->toname_strent; + } + else if (strcmp (runp->toname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->fromname, 1); + name_info[idx].to_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->fromname) == 0); + name_info[idx].canonical_name = runp->fromname; + name_info[idx].canonical_strent = runp->fromname_strent; + } + else + { + /* This is a transformation not to or from the INTERNAL + encoding. */ + int from_idx = name_to_module_idx (runp->fromname, 1); + int to_idx = name_to_module_idx (runp->toname, 1); + struct other_conv_list *newp; + + newp = (struct other_conv_list *) + xmalloc (sizeof (struct other_conv_list)); + newp->other_conv.module_idx = to_idx; + newp->other_conv.module = runp; + newp->other_conv.next = NULL; /* XXX Allow multiple module sequence */ + newp->dest_idx = to_idx; + newp->next = name_info[from_idx].other_conv_list; + name_info[from_idx].other_conv_list = newp; + assert (name_info[from_idx].canonical_name == NULL + || strcmp (name_info[from_idx].canonical_name, + runp->fromname) == 0); + name_info[from_idx].canonical_name = runp->fromname; + name_info[from_idx].canonical_strent = runp->fromname_strent; + + ++nextra_modules; + } + } + + /* Now add the module index information for all the aliases. */ + for (i = 0; i < nalias_list; ++i) + { + struct name fake_name = { .name = alias_list[i]->toname }; + struct name **tonamep; + + tonamep = (struct name **) tfind (&fake_name, &names, name_compare); + if (tonamep != NULL) + { + struct name *newp = new_name (alias_list[i]->fromname, + alias_list[i]->froment); + newp->module_idx = (*tonamep)->module_idx; + tsearch (newp, &names, name_compare); + } + } +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} + + +static uint32_t +next_prime (uint32_t seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +/* Format of the output file. + + Offset Length Description + 0000 4 Magic header bytes + 0004 4 Offset of string table (stoff) + 0008 4 Offset of name hashing table (hoff) + 000C 4 Hashing table size (hsize) + 0010 4 Offset of module table (moff) + 0014 4 Offset of other conversion module table (ooff) + + stoff ??? String table + + hoff 8*hsize Array of tuples + string table offset + module index + + moff ??? Array of tuples + canonical name offset + from-internal module dir name offset + from-internal module name off + to-internal module dir name offset + to-internal module name offset + offset into other conversion table + + ooff ??? One or more of + number of steps/modules + one or more of tuple + canonical name offset for output + module dir name offset + module name offset + (following last entry with step count 0) +*/ +static int +write_output (void) +{ + int fd; + char *string_table; + size_t string_table_size; + struct gconvcache_header header; + struct hash_entry *hash_table; + size_t hash_size; + struct module_entry *module_table; + char *extra_table; + char *cur_extra_table; + size_t n; + int idx; + struct iovec iov[6]; + static const gidx_t null_word; + size_t total; + char tmpfname[sizeof (GCONV_MODULES_CACHE) + strlen (".XXXXXX")]; + + /* Function to insert the names. */ + static void name_insert (const void *nodep, VISIT value, int level) + { + struct name *name; + unsigned int idx; + unsigned int hval2; + + if (value != leaf && value != postorder) + return; + + name = *(struct name **) nodep; + idx = name->hashval % hash_size; + hval2 = 1 + name->hashval % (hash_size - 2); + + while (hash_table[idx].string_offset != 0) + if ((idx += hval2) >= hash_size) + idx -= hash_size; + + hash_table[idx].string_offset = strtaboffset (name->strent); + + assert (name->module_idx != -1); + hash_table[idx].module_idx = name->module_idx; + } + + /* Open the output file. */ + strcpy (stpcpy (tmpfname, GCONV_MODULES_CACHE), ".XXXXXX"); + fd = mkstemp (tmpfname); + if (fd == -1) + return 1; + + /* Create the string table. */ + string_table = strtabfinalize (strtab, &string_table_size); + + /* Create the hashing table. We know how many strings we have. + Creating a perfect hash table is not reasonable here. Therefore + we use open hashing and a table size which is the next prime 40% + larger than the number of strings. */ + hash_size = next_prime (nnames * 1.4); + hash_table = (struct hash_entry *) xcalloc (hash_size, + sizeof (struct hash_entry)); + /* Fill the hash table. */ + twalk (names, name_insert); + + /* Create the section for the module list. */ + module_table = (struct module_entry *) xcalloc (sizeof (struct module_entry), + nname_info); + + /* Allocate memory for the non-INTERNAL conversions. The allocated + memory can be more than is actually needed. */ + extra_table = (char *) xcalloc (sizeof (struct extra_entry) + + sizeof (gidx_t) + + sizeof (struct extra_entry_module), + nextra_modules); + cur_extra_table = extra_table; + + /* Fill in the module information. */ + for (n = 0; n < nname_info; ++n) + { + module_table[n].canonname_offset = + strtaboffset (name_info[n].canonical_strent); + + if (name_info[n].from_internal == NULL) + { + module_table[n].fromdir_offset = 0; + module_table[n].fromname_offset = 0; + } + else + { + module_table[n].fromdir_offset = + strtaboffset (name_info[n].from_internal->directory_strent); + module_table[n].fromname_offset = + strtaboffset (name_info[n].from_internal->filename_strent); + } + + if (name_info[n].to_internal == NULL) + { + module_table[n].todir_offset = 0; + module_table[n].toname_offset = 0; + } + else + { + module_table[n].todir_offset = + strtaboffset (name_info[n].to_internal->directory_strent); + module_table[n].toname_offset = + strtaboffset (name_info[n].to_internal->filename_strent); + } + + if (name_info[n].other_conv_list != NULL) + { + struct other_conv_list *other = name_info[n].other_conv_list; + + /* Store the reference. We add 1 to distinguish the entry + at offset zero from the case where no extra modules are + available. The file reader has to account for the + offset. */ + module_table[n].extra_offset = 1 + cur_extra_table - extra_table; + + do + { + struct other_conv *runp; + struct extra_entry *extra; + + /* Allocate new entry. */ + extra = (struct extra_entry *) cur_extra_table; + cur_extra_table += sizeof (struct extra_entry); + extra->module_cnt = 0; + + runp = &other->other_conv; + do + { + cur_extra_table += sizeof (struct extra_entry_module); + extra->module[extra->module_cnt].outname_offset = + runp->next == NULL + ? other->dest_idx : runp->next->module_idx; + extra->module[extra->module_cnt].dir_offset = + strtaboffset (runp->module->directory_strent); + extra->module[extra->module_cnt].name_offset = + strtaboffset (runp->module->filename_strent); + ++extra->module_cnt; + + runp = runp->next; + } + while (runp != NULL); + + other = other->next; + } + while (other != NULL); + + /* Final module_cnt is zero. */ + *((gidx_t *) cur_extra_table) = 0; + cur_extra_table += sizeof (gidx_t); + } + } + + header.magic = GCONVCACHE_MAGIC; + + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct gconvcache_header); + total = iov[0].iov_len; + + header.string_offset = total; + iov[1].iov_base = string_table; + iov[1].iov_len = string_table_size; + total += iov[1].iov_len; + + idx = 2; + if ((string_table_size & (sizeof (gidx_t) - 1)) != 0) + { + iov[2].iov_base = (void *) &null_word; + iov[2].iov_len = (sizeof (gidx_t) + - (string_table_size & (sizeof (gidx_t) - 1))); + total += iov[2].iov_len; + ++idx; + } + + header.hash_offset = total; + header.hash_size = hash_size; + iov[idx].iov_base = hash_table; + iov[idx].iov_len = hash_size * sizeof (struct hash_entry); + total += iov[idx].iov_len; + ++idx; + + header.module_offset = total; + iov[idx].iov_base = module_table; + iov[idx].iov_len = nname_info * sizeof (struct module_entry); + total += iov[idx].iov_len; + ++idx; + + assert (cur_extra_table - extra_table + <= ((sizeof (struct extra_entry) + sizeof (gidx_t) + + sizeof (struct extra_entry_module)) + * nextra_modules)); + header.otherconv_offset = total; + iov[idx].iov_base = extra_table; + iov[idx].iov_len = cur_extra_table - extra_table; + total += iov[idx].iov_len; + ++idx; + + if (TEMP_FAILURE_RETRY (writev (fd, iov, idx)) != total + /* The file was created with mode 0600. Make it world-readable. */ + || fchmod (fd, 0644) != 0 + /* Rename the file, possibly replacing an old one. */ + || rename (tmpfname, GCONV_MODULES_CACHE) != 0) + { + int save_errno = errno; + close (fd); + unlink (tmpfname); + error (EXIT_FAILURE, save_errno, + gettext ("cannot generate output file")); + } + + close (fd); + + return 0; +} diff --git a/newlib/libc/sys/linux/iconv/iconvconfig.h b/newlib/libc/sys/linux/iconv/iconvconfig.h new file mode 100644 index 000000000..c52ceaefe --- /dev/null +++ b/newlib/libc/sys/linux/iconv/iconvconfig.h @@ -0,0 +1,67 @@ +/* Copyright (C) 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <stdint.h> + + +typedef uint16_t gidx_t; + + +struct gconvcache_header +{ + uint32_t magic; + gidx_t string_offset; + gidx_t hash_offset; + gidx_t hash_size; + gidx_t module_offset; + gidx_t otherconv_offset; +}; + +struct hash_entry +{ + gidx_t string_offset; + gidx_t module_idx; +}; + +struct module_entry +{ + gidx_t canonname_offset; + gidx_t fromdir_offset; + gidx_t fromname_offset; + gidx_t todir_offset; + gidx_t toname_offset; + gidx_t extra_offset; +}; + +struct extra_entry +{ + gidx_t module_cnt; + struct extra_entry_module + { + gidx_t outname_offset; + gidx_t dir_offset; + gidx_t name_offset; + } module[0]; +}; + + +#define GCONVCACHE_MAGIC 0x20010324 + + +#define GCONV_MODULES_CACHE GCONV_DIR "/gconv-modules.cache" diff --git a/newlib/libc/sys/linux/iconv/loadinfo.h b/newlib/libc/sys/linux/iconv/loadinfo.h new file mode 100644 index 000000000..1de6cfd24 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/loadinfo.h @@ -0,0 +1,100 @@ +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _LOADINFO_H +#define _LOADINFO_H 1 + +#ifndef PARAMS +# if __STDC__ +# define PARAMS(args) args +# else +# define PARAMS(args) () +# endif +#endif + +#ifndef internal_function +# define internal_function +#endif + +/* Tell the compiler when a conditional or integer expression is + almost always true or almost always false. */ +#ifndef HAVE_BUILTIN_EXPECT +# define __builtin_expect(expr, val) (expr) +#endif + +/* Encoding of locale name parts. */ +#define CEN_REVISION 1 +#define CEN_SPONSOR 2 +#define CEN_SPECIAL 4 +#define XPG_NORM_CODESET 8 +#define XPG_CODESET 16 +#define TERRITORY 32 +#define CEN_AUDIENCE 64 +#define XPG_MODIFIER 128 + +#define CEN_SPECIFIC (CEN_REVISION|CEN_SPONSOR|CEN_SPECIAL|CEN_AUDIENCE) +#define XPG_SPECIFIC (XPG_CODESET|XPG_NORM_CODESET|XPG_MODIFIER) + + +struct loaded_l10nfile +{ + const char *filename; + int decided; + + const void *data; + + struct loaded_l10nfile *next; + struct loaded_l10nfile *successor[1]; +}; + + +/* Normalize codeset name. There is no standard for the codeset + names. Normalization allows the user to use any of the common + names. The return value is dynamically allocated and has to be + freed by the caller. */ +extern const char *_nl_normalize_codeset PARAMS ((const char *codeset, + size_t name_len)); + +extern struct loaded_l10nfile * +_nl_make_l10nflist PARAMS ((struct loaded_l10nfile **l10nfile_list, + const char *dirlist, size_t dirlist_len, int mask, + const char *language, const char *territory, + const char *codeset, + const char *normalized_codeset, + const char *modifier, const char *special, + const char *sponsor, const char *revision, + const char *filename, int do_allocate)); + + +extern const char *_nl_expand_alias PARAMS ((const char *name)); + +/* normalized_codeset is dynamically allocated and has to be freed by + the caller. */ +extern int _nl_explode_name PARAMS ((char *name, const char **language, + const char **modifier, + const char **territory, + const char **codeset, + const char **normalized_codeset, + const char **special, + const char **sponsor, + const char **revision)); + +extern char *_nl_find_language PARAMS ((const char *name)); + +#endif /* loadinfo.h */ diff --git a/newlib/libc/sys/linux/iconv/localeinfo.h b/newlib/libc/sys/linux/iconv/localeinfo.h new file mode 100644 index 000000000..026463ffb --- /dev/null +++ b/newlib/libc/sys/linux/iconv/localeinfo.h @@ -0,0 +1,209 @@ +/* Declarations for internal libc locale interfaces + Copyright (C) 1995, 96, 97, 98, 99,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _LOCALEINFO_H +#define _LOCALEINFO_H 1 + +#include <stddef.h> +#include <langinfo.h> +#include <limits.h> +#include <time.h> +#include <stdint.h> +#include <sys/types.h> + +/* This has to be changed whenever a new locale is defined. */ +#define __LC_LAST 13 + +#include "loadinfo.h" /* For loaded_l10nfile definition. */ + +/* Magic number at the beginning of a locale data file for CATEGORY. */ +#define LIMAGIC(category) ((unsigned int) (0x20000828 ^ (category))) + +/* Two special weight constants for the collation data. */ +#define IGNORE_CHAR 2 + +/* We use a special value for the usage counter in `locale_data' to + signal that this data must never be removed anymore. */ +#define MAX_USAGE_COUNT (UINT_MAX - 1) +#define UNDELETABLE UINT_MAX + +/* Structure describing locale data in core for a category. */ +struct locale_data +{ + const char *name; + const char *filedata; /* Region mapping the file data. */ + off_t filesize; /* Size of the file (and the region). */ + int mmaped; /* If nonzero the data is mmaped. */ + + unsigned int usage_count; /* Counter for users. */ + + int use_translit; /* Nonzero if the mb*towv*() and wc*tomb() + functions should use transliteration. */ + const char *options; /* Extra options from the locale name, + not used in the path to the locale data. */ + + unsigned int nstrings; /* Number of strings below. */ + union locale_data_value + { + const uint32_t *wstr; + const char *string; + unsigned int word; + } + values __flexarr; /* Items, usually pointers into `filedata'. */ +}; + +/* We know three kinds of collation sorting rules. */ +enum coll_sort_rule +{ + illegal_0__, + sort_forward, + sort_backward, + illegal_3__, + sort_position, + sort_forward_position, + sort_backward_position, + sort_mask +}; + +/* We can map the types of the entries into a few categories. */ +enum value_type +{ + none, + string, + stringarray, + byte, + bytearray, + word, + stringlist, + wordarray, + wstring, + wstringarray, + wstringlist +}; + + +/* Definitions for `era' information from LC_TIME. */ +#define ERA_NAME_FORMAT_MEMBERS 4 +#define ERA_M_NAME 0 +#define ERA_M_FORMAT 1 +#define ERA_W_NAME 2 +#define ERA_W_FORMAT 3 + + +/* Structure to access `era' information from LC_TIME. */ +struct era_entry +{ + uint32_t direction; /* Contains '+' or '-'. */ + int32_t offset; + int32_t start_date[3]; + int32_t stop_date[3]; + const char *era_name; + const char *era_format; + const wchar_t *era_wname; + const wchar_t *era_wformat; + int absolute_direction; + /* absolute direction: + +1 indicates that year number is higher in the future. (like A.D.) + -1 indicates that year number is higher in the past. (like B.C.) */ +}; + + +/* LC_CTYPE specific: + Hardwired indices for standard wide character translation mappings. */ +enum +{ + __TOW_toupper = 0, + __TOW_tolower = 1 +}; + + +/* LC_CTYPE specific: + Access a wide character class with a single character index. + _ISCTYPE (c, desc) = iswctype (btowc (c), desc). + c must be an `unsigned char'. desc must be a nonzero wctype_t. */ +#define _ISCTYPE(c, desc) \ + (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1) + + +/* For each category declare the variable for the current locale data. */ +#define DEFINE_CATEGORY(category, category_name, items, a) \ +extern struct locale_data *_nl_current_##category; +#include "categories.def" +#undef DEFINE_CATEGORY + +extern const char *const _nl_category_names[__LC_LAST]; +extern const size_t _nl_category_name_sizes[__LC_LAST]; +extern struct locale_data * *const _nl_current[__LC_LAST]; + +/* Extract the current CATEGORY locale's string for ITEM. */ +#define _NL_CURRENT(category, item) \ + (_nl_current_##category->values[_NL_ITEM_INDEX (item)].string) + +/* Extract the current CATEGORY locale's string for ITEM. */ +#define _NL_CURRENT_WSTR(category, item) \ + ((wchar_t *) (_nl_current_##category->values[_NL_ITEM_INDEX (item)].wstr)) + +/* Extract the current CATEGORY locale's word for ITEM. */ +#define _NL_CURRENT_WORD(category, item) \ + (_nl_current_##category->values[_NL_ITEM_INDEX (item)].word) + +/* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. */ +#define _NL_CURRENT_DEFINE(category) \ + extern struct locale_data _nl_C_##category; \ + struct locale_data *_nl_current_##category = &_nl_C_##category + +/* Load the locale data for CATEGORY from the file specified by *NAME. + If *NAME is "", use environment variables as specified by POSIX, + and fill in *NAME with the actual name used. The directories + listed in LOCALE_PATH are searched for the locale files. */ +extern struct locale_data *_nl_find_locale (const char *locale_path, + size_t locale_path_len, + int category, const char **name); + +/* Try to load the file described by FILE. */ +extern void _nl_load_locale (struct loaded_l10nfile *file, int category); + +/* Free all resource. */ +extern void _nl_unload_locale (struct locale_data *locale); + +/* Free the locale and give back all memory if the usage count is one. */ +extern void _nl_remove_locale (int locale, struct locale_data *data); + + +/* Return `era' entry which corresponds to TP. Used in strftime. */ +extern struct era_entry *_nl_get_era_entry (const struct tm *tp); + +/* Return `era' cnt'th entry . Used in strptime. */ +extern struct era_entry *_nl_select_era_entry (int cnt); + +/* Return `alt_digit' which corresponds to NUMBER. Used in strftime. */ +extern const char *_nl_get_alt_digit (unsigned int number); + +/* Similar, but now for wide characters. */ +extern const wchar_t *_nl_get_walt_digit (unsigned int number); + +/* Parse string as alternative digit and return numeric value. */ +extern int _nl_parse_alt_digit (const char **strp); + +/* Postload processing. */ +extern void _nl_postload_ctype (void); +extern void _nl_postload_time (void); + + +#endif /* localeinfo.h */ diff --git a/newlib/libc/sys/linux/iconv/loop.c b/newlib/libc/sys/linux/iconv/loop.c new file mode 100644 index 000000000..7d3bf472d --- /dev/null +++ b/newlib/libc/sys/linux/iconv/loop.c @@ -0,0 +1,450 @@ +/* Conversion loop frame work. + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This file provides a frame for the reader loop in all conversion modules. + The actual code must (of course) be provided in the actual module source + code but certain actions can be written down generically, with some + customization options which are these: + + MIN_NEEDED_INPUT minimal number of input bytes needed for the next + conversion. + MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round + of conversion. + + MAX_NEEDED_INPUT you guess it, this is the maximal number of input + bytes needed. It defaults to MIN_NEEDED_INPUT + MAX_NEEDED_OUTPUT likewise for output bytes. + + LOOPFCT name of the function created. If not specified + the name is `loop' but this prevents the use + of multiple functions in the same file. + + BODY this is supposed to expand to the body of the loop. + The user must provide this. + + EXTRA_LOOP_DECLS extra arguments passed from converion loop call. + + INIT_PARAMS code to define and initialize variables from params. + UPDATE_PARAMS code to store result in params. +*/ + +#include <assert.h> +#include <machine/endian.h> +#include <gconv.h> +#include <stdint.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> /* For MIN. */ +#define __need_size_t +#include <stddef.h> + + +/* We have to provide support for machines which are not able to handled + unaligned memory accesses. Some of the character encodings have + representations with a fixed width of 2 or 4 bytes. But if we cannot + access unaligned memory we still have to read byte-wise. */ +#undef FCTNAME2 +#if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +/* We can handle unaligned memory access. */ +# define get16(addr) *((__const uint16_t *) (addr)) +# define get32(addr) *((__const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16(addr, val) *((uint16_t *) (addr)) = (val) +# define put32(addr, val) *((uint32_t *) (addr)) = (val) + +# define FCTNAME2(name) name +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16(addr) \ + (((__const unsigned char *) (addr))[1] << 8 \ + | ((__const unsigned char *) (addr))[0]) +# define get32(addr) \ + (((((__const unsigned char *) (addr))[3] << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[0]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16(addr) \ + (((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) +# define get32(addr) \ + (((((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[3]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif + +# define FCTNAME2(name) name##_unaligned +#endif +#define FCTNAME(name) FCTNAME2(name) + + +/* We need at least one byte for the next round. */ +#ifndef MIN_NEEDED_INPUT +# error "MIN_NEEDED_INPUT definition missing" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_INPUT +# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT +#endif + +/* We produce at least one byte in the next round. */ +#ifndef MIN_NEEDED_OUTPUT +# error "MIN_NEEDED_OUTPUT definition missing" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_OUTPUT +# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT +#endif + +/* Default name for the function. */ +#ifndef LOOPFCT +# define LOOPFCT loop +#endif + +/* Make sure we have a loop body. */ +#ifndef BODY +# error "Definition of BODY missing for function" LOOPFCT +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_DECLS +# define EXTRA_LOOP_DECLS +#endif + + +/* To make it easier for the writers of the modules, we define a macro + to test whether we have to ignore errors. */ +#define ignore_errors_p() \ + (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) + + +/* Error handling with transliteration/transcription function use and + ignoring of errors. Note that we cannot use the do while (0) trick + since `break' and `continue' must reach certain points. */ +#define STANDARD_ERR_HANDLER(Incr) \ + { \ + struct __gconv_trans_data *trans; \ + \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ + case we are not doing any error recovery outself. */ \ + break; \ + \ + /* First try the transliteration methods. */ \ + for (trans = step_data->__trans; trans != NULL; trans = trans->__next) \ + { \ + result = trans->__trans_fct (step, step_data, trans->__data, *inptrp, \ + &inptr, inend, &outptr, irreversible); \ + if (result != __GCONV_ILLEGAL_INPUT) \ + break; \ + } \ + /* If any of them recognized the input continue with the loop. */ \ + if (result != __GCONV_ILLEGAL_INPUT) \ + continue; \ + \ + /* Next see whether we have to ignore the error. If not, stop. */ \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* When we come here it means we ignore the character. */ \ + ++*irreversible; \ + inptr += Incr; \ + continue; \ + } + + +/* Handling of Unicode 3.1 TAG characters. Unicode recommends + "If language codes are not relevant to the particular processing + operation, then they should be ignored." + This macro is usually called right before STANDARD_ERR_HANDLER (Incr). */ +#define UNICODE_TAG_HANDLER(Character, Incr) \ + { \ + /* TAG characters are those in the range U+E0000..U+E007F. */ \ + if (((Character) >> 7) == (0xe0000 >> 7)) \ + { \ + inptr += Incr; \ + continue; \ + } \ + } + + +/* The function returns the status, as defined in gconv.h. */ +static inline int +FCTNAME (LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, const unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ +#ifdef LOOP_NEED_STATE + mbstate_t *state = step_data->__statep; +#endif +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_EMPTY_INPUT; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + + while (inptr != inend) + { + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. They will be optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if ((MIN_NEEDED_OUTPUT != 1 + && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) + || (MIN_NEEDED_OUTPUT == 1 + && __builtin_expect (outptr >= outend, 0))) + { + /* Overflow in the output buffer. */ + result = __GCONV_FULL_OUTPUT; + break; + } + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) + { + /* We don't have enough input for another complete input + character. */ + result = __GCONV_INCOMPLETE_INPUT; + break; + } + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size). */ + BODY + } + + /* Update the pointers pointed to by the parameters. */ + *inptrp = inptr; + *outptrp = outptr; +#ifdef UPDATE_PARAMS + UPDATE_PARAMS; +#endif + + return result; +} + + +/* Include the file a second time to define the function to handle + unaligned access. */ +#if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 +# undef get16 +# undef get32 +# undef put16 +# undef put32 +# undef unaligned + +# define DEFINE_UNALIGNED +# include "loop.c" +# undef DEFINE_UNALIGNED +#endif + + +#if MAX_NEEDED_INPUT > 1 +# define SINGLE(fct) SINGLE2 (fct) +# define SINGLE2(fct) fct##_single +static inline int +SINGLE(LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ + mbstate_t *state = step_data->__statep; +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + +#ifdef UNPACK_BYTES + UNPACK_BYTES +#else + /* Add the bytes from the state to the input buffer. */ + for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +#endif + + /* Are there enough bytes in the input buffer? */ + if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) + { + *inptrp = inend; +#ifdef STORE_REST + inptr = bytebuf; + inptrp = &inptr; + inend = &bytebuf[inlen]; + + STORE_REST +#else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +#endif + + return __GCONV_INCOMPLETE_INPUT; + } + + /* Enough space in output buffer. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + return __GCONV_FULL_OUTPUT; + + /* Now add characters from the normal input buffer. */ + do + bytebuf[inlen++] = *inptr++; + while (inlen < MAX_NEEDED_INPUT && inptr < inend); + + inptr = bytebuf; + inend = &bytebuf[inlen]; + + do + { + BODY + } + while (0); + + /* Now we either have produced an output character and consumed all the + bytes from the state and at least one more, or the character is still + incomplete, or we have some other error (like illegal input character, + no space in output buffer). */ + if (__builtin_expect (inptr != bytebuf, 1)) + { + /* We found a new character. */ + assert (inptr - bytebuf > (state->__count & 7)); + + *inptrp += inptr - bytebuf - (state->__count & 7); + *outptrp = outptr; + + result = __GCONV_OK; + + /* Clear the state buffer. */ + state->__count &= ~7; + } + else if (result == __GCONV_INCOMPLETE_INPUT) + { + /* This can only happen if we have less than MAX_NEEDED_INPUT bytes + available. */ + assert (inend != &bytebuf[MAX_NEEDED_INPUT]); + + *inptrp += inend - bytebuf - (state->__count & 7); +#ifdef STORE_REST + inptrp = &inptr; + + STORE_REST +#else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +#endif + } + + return result; +} +# undef SINGLE +# undef SINGLE2 +#endif + + +/* We remove the macro definitions so that we can include this file again + for the definition of another function. */ +#undef MIN_NEEDED_INPUT +#undef MAX_NEEDED_INPUT +#undef MIN_NEEDED_OUTPUT +#undef MAX_NEEDED_OUTPUT +#undef LOOPFCT +#undef BODY +#undef LOOPFCT +#undef EXTRA_LOOP_DECLS +#undef INIT_PARAMS +#undef UPDATE_PARAMS +#undef UNPACK_BYTES +#undef LOOP_NEED_STATE +#undef LOOP_NEED_FLAGS +#undef LOOP_NEED_DATA +#undef get16 +#undef get32 +#undef put16 +#undef put32 +#undef unaligned diff --git a/newlib/libc/sys/linux/iconv/skeleton.c b/newlib/libc/sys/linux/iconv/skeleton.c new file mode 100644 index 000000000..cde8533c5 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/skeleton.c @@ -0,0 +1,696 @@ +/* Skeleton for a conversion module. + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This file can be included to provide definitions of several things + many modules have in common. It can be customized using the following + macros: + + DEFINE_INIT define the default initializer. This requires the + following symbol to be defined. + + CHARSET_NAME string with official name of the coded character + set (in all-caps) + + DEFINE_FINI define the default destructor function. + + MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. + MIN_NEEDED_TO likewise for the to-charset. + + MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. + This macro is optional, it defaults to MIN_NEEDED_FROM. + MAX_NEEDED_TO likewise for the to-charset. + + DEFINE_DIRECTION_OBJECTS + two objects will be defined to be used when the + `gconv' function must only distinguish two + directions. This is implied by DEFINE_INIT. + If this macro is not defined the following + macro must be available. + + FROM_DIRECTION this macro is supposed to return a value != 0 + if we convert from the current character set, + otherwise it return 0. + + EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it + defines some code which writes out a sequence + of characters which bring the current state into + the initial state. + + FROM_LOOP name of the function implementing the conversion + from the current characters. + TO_LOOP likewise for the other direction + + ONE_DIRECTION optional. If defined to 1, only one conversion + direction is defined instead of two. In this + case, FROM_DIRECTION should be defined to 1, and + FROM_LOOP and TO_LOOP should have the same value. + + SAVE_RESET_STATE in case of an error we must reset the state for + the rerun so this macro must be defined for + stateful encodings. It takes an argument which + is nonzero when saving. + + RESET_INPUT_BUFFER If the input character sets allow this the macro + can be defined to reset the input buffer pointers + to cover only those characters up to the error. + + FUNCTION_NAME if not set the conversion function is named `gconv'. + + PREPARE_LOOP optional code preparing the conversion loop. Can + contain variable definitions. + END_LOOP also optional, may be used to store information + + EXTRA_LOOP_ARGS optional macro specifying extra arguments passed + to loop function. + */ + +#include <assert.h> +#include <gconv.h> +#include <string.h> +#define __need_size_t +#define __need_NULL +#include <stddef.h> + +#include <wchar.h> + +#ifndef STATIC_GCONV +# include <dlfcn.h> +#endif + +# define DL_CALL_FCT(fct, args) fct args + +/* The direction objects. */ +#if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT +static int from_object; +static int to_object; + +# ifndef FROM_DIRECTION +# define FROM_DIRECTION (step->__data == &from_object) +# endif +#else +# ifndef FROM_DIRECTION +# error "FROM_DIRECTION must be provided if direction objects are not used" +# endif +#endif + + +/* How many bytes are needed at most for the from-charset. */ +#ifndef MAX_NEEDED_FROM +# define MAX_NEEDED_FROM MIN_NEEDED_FROM +#endif + +/* Same for the to-charset. */ +#ifndef MAX_NEEDED_TO +# define MAX_NEEDED_TO MIN_NEEDED_TO +#endif + + +/* Define macros which can access unaligned buffers. These macros are + supposed to be used only in code outside the inner loops. For the inner + loops we have other definitions which allow optimized access. */ +#ifdef _STRING_ARCH_unaligned +/* We can handle unaligned memory access. */ +# define get16u(addr) *((__const uint16_t *) (addr)) +# define get32u(addr) *((__const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16u(addr, val) *((uint16_t *) (addr)) = (val) +# define put32u(addr, val) *((uint32_t *) (addr)) = (val) +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16u(addr) \ + (((__const unsigned char *) (addr))[1] << 8 \ + | ((__const unsigned char *) (addr))[0]) +# define get32u(addr) \ + (((((__const unsigned char *) (addr))[3] << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[0]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16u(addr) \ + (((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) +# define get32u(addr) \ + (((((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[3]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif +#endif + + +/* For conversions from a fixed width character set to another fixed width + character set we can define RESET_INPUT_BUFFER in a very fast way. */ +#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO +/* We have to use these `if's here since the compiler cannot know that + (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */ +# define RESET_INPUT_BUFFER \ + if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \ + else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \ + else \ + *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM +# endif +#endif + + +/* The default init function. It simply matches the name and initializes + the step data to point to one of the objects above. */ +#if DEFINE_INIT +# ifndef CHARSET_NAME +# error "CHARSET_NAME not defined" +# endif + +extern int gconv_init (struct __gconv_step *step); +int +gconv_init (struct __gconv_step *step) +{ + /* Determine which direction. */ + if (strcmp (step->__from_name, CHARSET_NAME) == 0) + { + step->__data = &from_object; + + step->__min_needed_from = MIN_NEEDED_FROM; + step->__max_needed_from = MAX_NEEDED_FROM; + step->__min_needed_to = MIN_NEEDED_TO; + step->__max_needed_to = MAX_NEEDED_TO; + } + else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) + { + step->__data = &to_object; + + step->__min_needed_from = MIN_NEEDED_TO; + step->__max_needed_from = MAX_NEEDED_TO; + step->__min_needed_to = MIN_NEEDED_FROM; + step->__max_needed_to = MAX_NEEDED_FROM; + } + else + return __GCONV_NOCONV; + +#ifdef SAVE_RESET_STATE + step->__stateful = 1; +#else + step->__stateful = 0; +#endif + + return __GCONV_OK; +} +#endif + + +/* The default destructor function does nothing in the moment and so + we don't define it at all. But we still provide the macro just in + case we need it some day. */ +#if DEFINE_FINI +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_ARGS +# define EXTRA_LOOP_ARGS +#endif + + +/* This is the actual conversion function. */ +#ifndef FUNCTION_NAME +# define FUNCTION_NAME gconv +#endif + +/* The macros are used to access the function to convert single characters. */ +#define SINGLE(fct) SINGLE2 (fct) +#define SINGLE2(fct) fct##_single + + +extern int FUNCTION_NAME (struct __gconv_step *step, + struct __gconv_step_data *data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, + int do_flush, int consume_incomplete); +int +FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, int do_flush, + int consume_incomplete) +{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + __gconv_fct fct; + int status; + + fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct; + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (__builtin_expect (do_flush, 0)) + { + /* This should never happen during error handling. */ + assert (outbufstart == NULL); + + status = __GCONV_OK; + +#ifdef EMIT_SHIFT_TO_INIT + if (do_flush == 1) + { + /* We preserve the initial values of the pointer variables. */ + unsigned char *outbuf = data->__outbuf; + unsigned char *outstart = outbuf; + unsigned char *outend = data->__outbufend; + +# ifdef PREPARE_LOOP + PREPARE_LOOP +# endif + +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +# endif + + /* Emit the escape sequence to reset the state. */ + EMIT_SHIFT_TO_INIT; + + /* Call the steps down the chain if there are any but only if we + successfully emitted the escape sequence. This should only + fail if the output buffer is full. If the input is invalid + it should be discarded since the user wants to start from a + clean state. */ + if (status == __GCONV_OK) + { + if (data->__flags & __GCONV_IS_LAST) + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + else + { + /* Write out all output which was produced. */ + if (outbuf > outstart) + { + const unsigned char *outerr = outstart; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, + &outerr, outbuf, NULL, + irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__builtin_expect (outerr != outbuf, 0)) + { + /* We have a problem. Undo the conversion. */ + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + } + + /* Change the status. */ + status = result; + } + } + + if (status == __GCONV_OK) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, + NULL, NULL, irreversible, 1, + consume_incomplete)); + } + } + } + else +#endif + { + /* Clear the state object. There might be bytes in there from + previous calls with CONSUME_INCOMPLETE == 1. But don't emit + escape sequences. */ + memset (data->__statep, '\0', sizeof (*data->__statep)); + + if (! (data->__flags & __GCONV_IS_LAST)) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, + NULL, irreversible, do_flush, + consume_incomplete)); + } + } + else + { + /* We preserve the initial values of the pointer variables. */ + const unsigned char *inptr = *inptrp; + unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1) + ? data->__outbuf : *outbufstart); + unsigned char *outend = data->__outbufend; + unsigned char *outstart; + /* This variable is used to count the number of characters we + actually converted. */ + size_t lirreversible = 0; + size_t *lirreversiblep = irreversible ? &lirreversible : NULL; +#if defined _STRING_ARCH_unaligned \ + || MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \ + || MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0 +# define unaligned 0 +#else + int unaligned; +# define GEN_unaligned(name) GEN_unaligned2 (name) +# define GEN_unaligned2(name) name##_unaligned +#endif + +#ifdef PREPARE_LOOP + PREPARE_LOOP +#endif + +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + /* If the function is used to implement the mb*towc*() or wc*tomb*() + functions we must test whether any bytes from the last call are + stored in the `state' object. */ + if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1) + || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && consume_incomplete && (data->__statep->__count & 7) != 0) + { + /* Yep, we have some bytes left over. Process them now. + But this must not happen while we are called from an + error handler. */ + assert (outbufstart == NULL); + +# if MAX_NEEDED_FROM > 1 + if (MAX_NEEDED_TO == 1 || FROM_DIRECTION) + status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep + EXTRA_LOOP_ARGS); +# endif +# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION + else +# endif +# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION + status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep EXTRA_LOOP_ARGS); +# endif + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + return status; + } +#endif + +#if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + /* The following assumes that encodings, which have a variable length + what might unalign a buffer even though it is a aligned in the + beginning, either don't have the minimal number of bytes as a divisor + of the maximum length or have a minimum length of 1. This is true + for all known and supported encodings. */ + unaligned = ((FROM_DIRECTION + && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0 + || ((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % MIN_NEEDED_TO != 0))) + || (!FROM_DIRECTION + && (((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0) + || (uintptr_t) inptr % MIN_NEEDED_TO != 0))); +#endif + + while (1) + { + struct __gconv_trans_data *trans; + + /* Remember the start value for this round. */ + inptr = *inptrp; + /* The outbuf buffer is empty. */ + outstart = outbuf; + +#ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +#endif + + if (__builtin_expect (!unaligned, 1)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + } +#if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + } +#endif + + /* If we were called as part of an error handling module we + don't do anything else here. */ + if (__builtin_expect (outbufstart != NULL, 0)) + { + *outbufstart = outbuf; + return status; + } + + /* Give the transliteration module the chance to store the + original text and the result in case it needs a context. */ + for (trans = data->__trans; trans != NULL; trans = trans->__next) + if (trans->__trans_context_fct != NULL) + DL_CALL_FCT (trans->__trans_context_fct, + (trans->__data, inptr, *inptrp, outstart, outbuf)); + + /* We finished one use of the loops. */ + ++data->__invocation_counter; + + /* If this is the last step leave the loop, there is nothing + we can do. */ + if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0)) + { + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + + /* Remember how many non-identical characters we + converted in a irreversible way. */ + *irreversible += lirreversible; + + break; + } + + /* Write out all output which was produced. */ + if (__builtin_expect (outbuf > outstart, 1)) + { + const unsigned char *outerr = data->__outbuf; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, + outbuf, NULL, irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__builtin_expect (outerr != outbuf, 0)) + { +#ifdef RESET_INPUT_BUFFER + RESET_INPUT_BUFFER; +#else + /* We have a problem with the in on of the functions + below. Undo the conversion upto the error point. */ + size_t nstatus; + + /* Reload the pointers. */ + *inptrp = inptr; + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + + if (__builtin_expect (!unaligned, 1)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = FROM_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = TO_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = GEN_unaligned (FROM_LOOP) (step, data, + inptrp, inend, + &outbuf, + outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = GEN_unaligned (TO_LOOP) (step, data, + inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# endif + + /* We must run out of output buffer space in this + rerun. */ + assert (outbuf == outerr); + assert (nstatus == __GCONV_FULL_OUTPUT); + + /* If we haven't consumed a single byte decrement + the invocation counter. */ + if (__builtin_expect (outbuf == outstart, 0)) + --data->__invocation_counter; +#endif /* reset input buffer */ + } + + /* Change the status. */ + status = result; + } + else + /* All the output is consumed, we can make another run + if everything was ok. */ + if (status == __GCONV_FULL_OUTPUT) + { + status = __GCONV_OK; + outbuf = data->__outbuf; + } + } + + if (status != __GCONV_OK) + break; + + /* Reset the output buffer pointer for the next round. */ + outbuf = data->__outbuf; + } + +#ifdef END_LOOP + END_LOOP +#endif + + /* If we are supposed to consume all character store now all of the + remaining characters in the `state' object. */ +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1) + || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && __builtin_expect (consume_incomplete, 0) + && status == __GCONV_INCOMPLETE_INPUT) + { +# ifdef STORE_REST + mbstate_t *state = data->__statep; + + STORE_REST +# else + size_t cnt; + + /* Make sure the remaining bytes fit into the state objects + buffer. */ + assert (inend - *inptrp < 4); + + for (cnt = 0; *inptrp < inend; ++cnt) + data->__statep->__value.__wchb[cnt] = *(*inptrp)++; + data->__statep->__count &= ~7; + data->__statep->__count |= cnt; +# endif + } +#endif + } + + return status; +} + +#undef DEFINE_INIT +#undef CHARSET_NAME +#undef DEFINE_FINI +#undef MIN_NEEDED_FROM +#undef MIN_NEEDED_TO +#undef MAX_NEEDED_FROM +#undef MAX_NEEDED_TO +#undef DEFINE_DIRECTION_OBJECTS +#undef FROM_DIRECTION +#undef EMIT_SHIFT_TO_INIT +#undef FROM_LOOP +#undef TO_LOOP +#undef SAVE_RESET_STATE +#undef RESET_INPUT_BUFFER +#undef FUNCTION_NAME +#undef PREPARE_LOOP +#undef END_LOOP +#undef ONE_DIRECTION +#undef STORE_REST diff --git a/newlib/libc/sys/linux/iconv/strtab.c b/newlib/libc/sys/linux/iconv/strtab.c new file mode 100644 index 000000000..0f5e2b632 --- /dev/null +++ b/newlib/libc/sys/linux/iconv/strtab.c @@ -0,0 +1,341 @@ +/* C string table handling. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Written by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <inttypes.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/param.h> + + +struct Strent +{ + const char *string; + size_t len; + struct Strent *next; + struct Strent *left; + struct Strent *right; + size_t offset; + char reverse[0]; +}; + + +struct memoryblock +{ + struct memoryblock *next; + char memory[0]; +}; + + +struct Strtab +{ + struct Strent *root; + struct memoryblock *memory; + char *backp; + size_t left; + size_t total; + + struct Strent null; +}; + + +/* Cache for the pagesize. We correct this value a bit so that `malloc' + is not allocating more than a page. */ +static size_t ps; + + +extern void *xmalloc (size_t n) __attribute_malloc__; + +/* Prototypes for our functions that are used from iconvconfig.c. If + you change these, change also iconvconfig.c. */ +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + + +struct Strtab * +strtabinit (void) +{ + struct Strtab *ret; + + if (ps == 0) + { + ps = sysconf (_SC_PAGESIZE) - 2 * sizeof (void *); + assert (sizeof (struct memoryblock) < ps); + } + + ret = (struct Strtab *) calloc (1, sizeof (struct Strtab)); + if (ret != NULL) + { + ret->null.len = 1; + ret->null.string = ""; + } + return ret; +} + + +static void +morememory (struct Strtab *st, size_t len) +{ + struct memoryblock *newmem; + + if (len < ps) + len = ps; + newmem = (struct memoryblock *) malloc (len); + if (newmem == NULL) + abort (); + + newmem->next = st->memory; + st->memory = newmem; + st->backp = newmem->memory; + st->left = len - offsetof (struct memoryblock, memory); +} + + +void +strtabfree (struct Strtab *st) +{ + struct memoryblock *mb = st->memory; + + while (mb != NULL) + { + void *old = mb; + mb = mb->next; + free (old); + } + + free (st); +} + + +static struct Strent * +newstring (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + size_t align; + int i; + + /* Compute the amount of padding needed to make the structure aligned. */ + align = ((__alignof__ (struct Strent) + - (((uintptr_t) st->backp) + & (__alignof__ (struct Strent) - 1))) + & (__alignof__ (struct Strent) - 1)); + + /* Make sure there is enough room in the memory block. */ + if (st->left < align + sizeof (struct Strent) + len) + { + morememory (st, sizeof (struct Strent) + len); + align = 0; + } + + /* Create the reserved string. */ + newstr = (struct Strent *) (st->backp + align); + newstr->string = str; + newstr->len = len; + newstr->next = NULL; + newstr->left = NULL; + newstr->right = NULL; + newstr->offset = 0; + for (i = len - 2; i >= 0; --i) + newstr->reverse[i] = str[len - 2 - i]; + newstr->reverse[len - 1] = '\0'; + st->backp += align + sizeof (struct Strent) + len; + st->left -= align + sizeof (struct Strent) + len; + + return newstr; +} + + +/* XXX This function should definitely be rewritten to use a balancing + tree algorith (AVL, red-black trees). For now a simple, correct + implementation is enough. */ +static struct Strent ** +searchstring (struct Strent **sep, struct Strent *newstr) +{ + int cmpres; + + /* More strings? */ + if (*sep == NULL) + { + *sep = newstr; + return sep; + } + + /* Compare the strings. */ + cmpres = memcmp ((*sep)->reverse, newstr->reverse, + MIN ((*sep)->len, newstr->len) - 1); + if (cmpres == 0) + /* We found a matching string. */ + return sep; + else if (cmpres > 0) + return searchstring (&(*sep)->left, newstr); + else + return searchstring (&(*sep)->right, newstr); +} + + +/* Add new string. The actual string is assumed to be permanent. */ +struct Strent * +strtabadd (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + struct Strent **sep; + + /* Compute the string length if the caller doesn't know it. */ + if (len == 0) + len = strlen (str) + 1; + + /* Make sure all "" strings get offset 0. */ + if (len == 1) + return &st->null; + + /* Allocate memory for the new string and its associated information. */ + newstr = newstring (st, str, len); + + /* Search in the array for the place to insert the string. If there + is no string with matching prefix and no string with matching + leading substring, create a new entry. */ + sep = searchstring (&st->root, newstr); + if (*sep != newstr) + { + /* This is not the same entry. This means we have a prefix match. */ + if ((*sep)->len > newstr->len) + { + struct Strent *subs; + + for (subs = (*sep)->next; subs; subs = subs->next) + if (subs->len == newstr->len) + { + /* We have an exact match with a substring. Free the memory + we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + return subs; + } + + /* We have a new substring. This means we don't need the reverse + string of this entry anymore. */ + st->backp -= newstr->len; + st->left += newstr->len; + + newstr->next = (*sep)->next; + (*sep)->next = newstr; + } + else if ((*sep)->len != newstr->len) + { + /* When we get here it means that the string we are about to + add has a common prefix with a string we already have but + it is longer. In this case we have to put it first. */ + st->total += newstr->len - (*sep)->len; + newstr->next = *sep; + newstr->left = (*sep)->left; + newstr->right = (*sep)->right; + *sep = newstr; + } + else + { + /* We have an exact match. Free the memory we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + newstr = *sep; + } + } + else + st->total += newstr->len; + + return newstr; +} + + +static void +copystrings (struct Strent *nodep, char **freep, size_t *offsetp) +{ + struct Strent *subs; + + if (nodep->left != NULL) + copystrings (nodep->left, freep, offsetp); + + /* Process the current node. */ + nodep->offset = *offsetp; + *freep = (char *) memcpy (*freep, nodep->string, nodep->len); + *freep += nodep->len; + *offsetp += nodep->len; + + for (subs = nodep->next; subs != NULL; subs = subs->next) + { + assert (subs->len < nodep->len); + subs->offset = nodep->offset + nodep->len - subs->len; + } + + if (nodep->right != NULL) + copystrings (nodep->right, freep, offsetp); +} + + +void * +strtabfinalize (struct Strtab *st, size_t *size) +{ + size_t copylen; + char *endp; + char *retval; + + /* Fill in the information. */ + endp = retval = (char *) xmalloc (st->total + 1); + + /* Always put an empty string at the beginning so that a zero offset + can mean error. */ + *endp++ = '\0'; + + /* Now run through the tree and add all the string while also updating + the offset members of the elfstrent records. */ + copylen = 1; + copystrings (st->root, &endp, ©len); + assert (copylen == st->total + 1); + assert (endp = retval + st->total + 1); + *size = copylen; + + return retval; +} + + +size_t +strtaboffset (struct Strent *se) +{ + return se->offset; +} |