diff options
author | Jim Meyering <meyering@redhat.com> | 2008-01-04 16:16:50 +0100 |
---|---|---|
committer | Jim Meyering <meyering@redhat.com> | 2008-01-04 16:16:50 +0100 |
commit | 447bbb9ae4fd8190817001e3c7065a504721cd0a (patch) | |
tree | bfde839cfd2018b8a65d463630abb9539f749d5a /lib | |
parent | a924da06a11ad14176660412d9be9a7905503267 (diff) | |
download | idutils-447bbb9ae4fd8190817001e3c7065a504721cd0a.tar.gz idutils-447bbb9ae4fd8190817001e3c7065a504721cd0a.tar.bz2 idutils-447bbb9ae4fd8190817001e3c7065a504721cd0a.zip |
Remove files now pulled from gnulib.
Also remove all intl/ files.
Signed-off-by: Jim Meyering <meyering@redhat.com>
Diffstat (limited to 'lib')
90 files changed, 0 insertions, 22795 deletions
diff --git a/lib/__fpending.c b/lib/__fpending.c deleted file mode 100644 index 221aee6..0000000 --- a/lib/__fpending.c +++ /dev/null @@ -1,30 +0,0 @@ -/* __fpending.c -- return the number of pending output bytes on a stream - Copyright (C) 2000, 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Jim Meyering. */ - -#include <config.h> - -#include "__fpending.h" - -/* Return the number of pending (aka buffered, unflushed) - bytes on the stream, FP, that is open for writing. */ -size_t -__fpending (FILE *fp) -{ - return PENDING_OUTPUT_N_BYTES; -} diff --git a/lib/__fpending.h b/lib/__fpending.h deleted file mode 100644 index 8a8aabc..0000000 --- a/lib/__fpending.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Declare __fpending. - - Copyright (C) 2000, 2003, 2005, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - Written by Jim Meyering. */ - -#include <stddef.h> -#include <stdio.h> - -#ifndef HAVE_DECL___FPENDING -"this configure-time declaration test was not run" -#endif - -#if HAVE_DECL___FPENDING -# if HAVE_STDIO_EXT_H -# include <stdio_ext.h> -# endif -#else -size_t __fpending (FILE *); -#endif diff --git a/lib/alloca.c b/lib/alloca.c deleted file mode 100644 index 3a1f4e2..0000000 --- a/lib/alloca.c +++ /dev/null @@ -1,489 +0,0 @@ -/* alloca.c -- allocate automatically reclaimed memory - (Mostly) portable public-domain implementation -- D A Gwyn - - This implementation of the PWB library alloca function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - J.Otto Tennant <jot@cray.com> contributed the Cray support. - - There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. */ - -#include <config.h> - -#include <alloca.h> - -#include <string.h> -#include <stdlib.h> - -#ifdef emacs -# include "lisp.h" -# include "blockinput.h" -# ifdef EMACS_FREE -# undef free -# define free EMACS_FREE -# endif -#else -# define memory_full() abort () -#endif - -/* If compiling with GCC 2, this file's not needed. */ -#if !defined (__GNUC__) || __GNUC__ < 2 - -/* If someone has defined alloca as a macro, - there must be some other way alloca is supposed to work. */ -# ifndef alloca - -# ifdef emacs -# ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -# ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -/* Using #error here is not wise since this file should work for - old and obscure compilers. */ -# endif /* STACK_DIRECTION undefined */ -# endif /* static */ -# endif /* emacs */ - -/* If your stack is a linked list of frames, you have to - provide an "address metric" ADDRESS_FUNCTION macro. */ - -# if defined (CRAY) && defined (CRAY_STACKSEG_END) -long i00afunc (); -# define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) -# else -# define ADDRESS_FUNCTION(arg) &(arg) -# endif - -/* Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ - -# ifndef STACK_DIRECTION -# define STACK_DIRECTION 0 /* Direction unknown. */ -# endif - -# if STACK_DIRECTION != 0 - -# define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ - -# else /* STACK_DIRECTION == 0; need run-time code. */ - -static int stack_dir; /* 1 or -1 once known. */ -# define STACK_DIR stack_dir - -static void -find_stack_direction (void) -{ - static char *addr = NULL; /* Address of first `dummy', once known. */ - auto char dummy; /* To get stack address. */ - - if (addr == NULL) - { /* Initial entry. */ - addr = ADDRESS_FUNCTION (dummy); - - find_stack_direction (); /* Recurse once. */ - } - else - { - /* Second entry. */ - if (ADDRESS_FUNCTION (dummy) > addr) - stack_dir = 1; /* Stack grew upward. */ - else - stack_dir = -1; /* Stack grew downward. */ - } -} - -# endif /* STACK_DIRECTION == 0 */ - -/* An "alloca header" is used to: - (a) chain together all alloca'ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc - alignment chunk size. The following default should work okay. */ - -# ifndef ALIGN_SIZE -# define ALIGN_SIZE sizeof(double) -# endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* To force sizeof(header). */ - struct - { - union hdr *next; /* For chaining headers. */ - char *deep; /* For stack depth measure. */ - } h; -} header; - -static header *last_alloca_header = NULL; /* -> last alloca header. */ - -/* Return a pointer to at least SIZE bytes of storage, - which will be automatically reclaimed upon exit from - the procedure that called alloca. Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. */ - -void * -alloca (size_t size) -{ - auto char probe; /* Probes stack depth: */ - register char *depth = ADDRESS_FUNCTION (probe); - -# if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* Unknown growth direction. */ - find_stack_direction (); -# endif - - /* Reclaim garbage, defined as all alloca'd storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* Traverses linked list. */ - -# ifdef emacs - BLOCK_INPUT; -# endif - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free (hp); /* Collect garbage. */ - - hp = np; /* -> next header. */ - } - else - break; /* Rest are not deeper. */ - - last_alloca_header = hp; /* -> last valid storage. */ - -# ifdef emacs - UNBLOCK_INPUT; -# endif - } - - if (size == 0) - return NULL; /* No allocation required. */ - - /* Allocate combined header + user data storage. */ - - { - /* Address of header. */ - register header *new; - - size_t combined_size = sizeof (header) + size; - if (combined_size < sizeof (header)) - memory_full (); - - new = malloc (combined_size); - - if (! new) - memory_full (); - - new->h.next = last_alloca_header; - new->h.deep = depth; - - last_alloca_header = new; - - /* User storage begins just after header. */ - - return (void *) (new + 1); - } -} - -# if defined (CRAY) && defined (CRAY_STACKSEG_END) - -# ifdef DEBUG_I00AFUNC -# include <stdio.h> -# endif - -# ifndef CRAY_STACK -# define CRAY_STACK -# ifndef CRAY2 -/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ -struct stack_control_header - { - long shgrow:32; /* Number of times stack has grown. */ - long shaseg:32; /* Size of increments to stack. */ - long shhwm:32; /* High water mark of stack. */ - long shsize:32; /* Current size of stack (all segments). */ - }; - -/* The stack segment linkage control information occurs at - the high-address end of a stack segment. (The stack - grows from low addresses to high addresses.) The initial - part of the stack segment linkage control information is - 0200 (octal) words. This provides for register storage - for the routine which overflows the stack. */ - -struct stack_segment_linkage - { - long ss[0200]; /* 0200 overflow words. */ - long sssize:32; /* Number of words in this segment. */ - long ssbase:32; /* Offset to stack base. */ - long:32; - long sspseg:32; /* Offset to linkage control of previous - segment of stack. */ - long:32; - long sstcpt:32; /* Pointer to task common address block. */ - long sscsnm; /* Private control structure number for - microtasking. */ - long ssusr1; /* Reserved for user. */ - long ssusr2; /* Reserved for user. */ - long sstpid; /* Process ID for pid based multi-tasking. */ - long ssgvup; /* Pointer to multitasking thread giveup. */ - long sscray[7]; /* Reserved for Cray Research. */ - long ssa0; - long ssa1; - long ssa2; - long ssa3; - long ssa4; - long ssa5; - long ssa6; - long ssa7; - long sss0; - long sss1; - long sss2; - long sss3; - long sss4; - long sss5; - long sss6; - long sss7; - }; - -# else /* CRAY2 */ -/* The following structure defines the vector of words - returned by the STKSTAT library routine. */ -struct stk_stat - { - long now; /* Current total stack size. */ - long maxc; /* Amount of contiguous space which would - be required to satisfy the maximum - stack demand to date. */ - long high_water; /* Stack high-water mark. */ - long overflows; /* Number of stack overflow ($STKOFEN) calls. */ - long hits; /* Number of internal buffer hits. */ - long extends; /* Number of block extensions. */ - long stko_mallocs; /* Block allocations by $STKOFEN. */ - long underflows; /* Number of stack underflow calls ($STKRETN). */ - long stko_free; /* Number of deallocations by $STKRETN. */ - long stkm_free; /* Number of deallocations by $STKMRET. */ - long segments; /* Current number of stack segments. */ - long maxs; /* Maximum number of stack segments so far. */ - long pad_size; /* Stack pad size. */ - long current_address; /* Current stack segment address. */ - long current_size; /* Current stack segment size. This - number is actually corrupted by STKSTAT to - include the fifteen word trailer area. */ - long initial_address; /* Address of initial segment. */ - long initial_size; /* Size of initial segment. */ - }; - -/* The following structure describes the data structure which trails - any stack segment. I think that the description in 'asdef' is - out of date. I only describe the parts that I am sure about. */ - -struct stk_trailer - { - long this_address; /* Address of this block. */ - long this_size; /* Size of this block (does not include - this trailer). */ - long unknown2; - long unknown3; - long link; /* Address of trailer block of previous - segment. */ - long unknown5; - long unknown6; - long unknown7; - long unknown8; - long unknown9; - long unknown10; - long unknown11; - long unknown12; - long unknown13; - long unknown14; - }; - -# endif /* CRAY2 */ -# endif /* not CRAY_STACK */ - -# ifdef CRAY2 -/* Determine a "stack measure" for an arbitrary ADDRESS. - I doubt that "lint" will like this much. */ - -static long -i00afunc (long *address) -{ - struct stk_stat status; - struct stk_trailer *trailer; - long *block, size; - long result = 0; - - /* We want to iterate through all of the segments. The first - step is to get the stack status structure. We could do this - more quickly and more directly, perhaps, by referencing the - $LM00 common block, but I know that this works. */ - - STKSTAT (&status); - - /* Set up the iteration. */ - - trailer = (struct stk_trailer *) (status.current_address - + status.current_size - - 15); - - /* There must be at least one stack segment. Therefore it is - a fatal error if "trailer" is null. */ - - if (trailer == 0) - abort (); - - /* Discard segments that do not contain our argument address. */ - - while (trailer != 0) - { - block = (long *) trailer->this_address; - size = trailer->this_size; - if (block == 0 || size == 0) - abort (); - trailer = (struct stk_trailer *) trailer->link; - if ((block <= address) && (address < (block + size))) - break; - } - - /* Set the result to the offset in this segment and add the sizes - of all predecessor segments. */ - - result = address - block; - - if (trailer == 0) - { - return result; - } - - do - { - if (trailer->this_size <= 0) - abort (); - result += trailer->this_size; - trailer = (struct stk_trailer *) trailer->link; - } - while (trailer != 0); - - /* We are done. Note that if you present a bogus address (one - not in any segment), you will get a different number back, formed - from subtracting the address of the first block. This is probably - not what you want. */ - - return (result); -} - -# else /* not CRAY2 */ -/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. - Determine the number of the cell within the stack, - given the address of the cell. The purpose of this - routine is to linearize, in some sense, stack addresses - for alloca. */ - -static long -i00afunc (long address) -{ - long stkl = 0; - - long size, pseg, this_segment, stack; - long result = 0; - - struct stack_segment_linkage *ssptr; - - /* Register B67 contains the address of the end of the - current stack segment. If you (as a subprogram) store - your registers on the stack and find that you are past - the contents of B67, you have overflowed the segment. - - B67 also points to the stack segment linkage control - area, which is what we are really interested in. */ - - stkl = CRAY_STACKSEG_END (); - ssptr = (struct stack_segment_linkage *) stkl; - - /* If one subtracts 'size' from the end of the segment, - one has the address of the first word of the segment. - - If this is not the first segment, 'pseg' will be - nonzero. */ - - pseg = ssptr->sspseg; - size = ssptr->sssize; - - this_segment = stkl - size; - - /* It is possible that calling this routine itself caused - a stack overflow. Discard stack segments which do not - contain the target address. */ - - while (!(this_segment <= address && address <= stkl)) - { -# ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); -# endif - if (pseg == 0) - break; - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - this_segment = stkl - size; - } - - result = address - this_segment; - - /* If you subtract pseg from the current end of the stack, - you get the address of the previous stack segment's end. - This seems a little convoluted to me, but I'll bet you save - a cycle somewhere. */ - - while (pseg != 0) - { -# ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o\n", pseg, size); -# endif - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - result += size; - } - return (result); -} - -# endif /* not CRAY2 */ -# endif /* CRAY */ - -# endif /* no alloca */ -#endif /* not GCC version 2 */ diff --git a/lib/alloca_.h b/lib/alloca_.h deleted file mode 100644 index dd0b3e9..0000000 --- a/lib/alloca_.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Memory allocation on the stack. - - Copyright (C) 1995, 1999, 2001, 2002, 2003, 2004, 2006 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - USA. */ - -/* Avoid using the symbol _ALLOCA_H here, as Bison assumes _ALLOCA_H - means there is a real alloca function. */ -#ifndef _GNULIB_ALLOCA_H -# define _GNULIB_ALLOCA_H - -/* alloca (N) returns a pointer to N bytes of memory - allocated on the stack, which will last until the function returns. - Use of alloca should be avoided: - - inside arguments of function calls - undefined behaviour, - - in inline functions - the allocation may actually last until the - calling function returns, - - for huge N (say, N >= 65536) - you never know how large (or small) - the stack is, and when the stack cannot fulfill the memory allocation - request, the program just crashes. - */ - -#ifndef alloca -# ifdef __GNUC__ -# define alloca __builtin_alloca -# elif defined _AIX -# define alloca __alloca -# elif defined _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# include <stddef.h> -# ifdef __cplusplus -extern "C" -# endif -void *alloca (size_t); -# endif -#endif - -#endif /* _GNULIB_ALLOCA_H */ diff --git a/lib/atexit.c b/lib/atexit.c deleted file mode 100644 index 5ef33e5..0000000 --- a/lib/atexit.c +++ /dev/null @@ -1,13 +0,0 @@ -/* Wrapper to implement ANSI C's atexit using SunOS's on_exit. */ -/* This function is in the public domain. --Mike Stump. */ - -#include <config.h> - -int -atexit (void (*f) (void)) -{ - /* If the system doesn't provide a definition for atexit, use on_exit - if the system provides that. */ - on_exit (f, 0); - return 0; -} diff --git a/lib/basename.c b/lib/basename.c deleted file mode 100644 index fbe17ff..0000000 --- a/lib/basename.c +++ /dev/null @@ -1,129 +0,0 @@ -/* basename.c -- return the last element in a file name - - Copyright (C) 1990, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "dirname.h" - -#include <string.h> -#include "xalloc.h" -#include "xstrndup.h" - -/* Return the address of the last file name component of NAME. If - NAME has no relative file name components because it is a file - system root, return the empty string. */ - -char * -last_component (char const *name) -{ - char const *base = name + FILE_SYSTEM_PREFIX_LEN (name); - char const *p; - bool saw_slash = false; - - while (ISSLASH (*base)) - base++; - - for (p = base; *p; p++) - { - if (ISSLASH (*p)) - saw_slash = true; - else if (saw_slash) - { - base = p; - saw_slash = false; - } - } - - return (char *) base; -} - - -/* In general, we can't use the builtin `basename' function if available, - since it has different meanings in different environments. - In some environments the builtin `basename' modifies its argument. - - Return the last file name component of NAME, allocated with - xmalloc. On systems with drive letters, a leading "./" - distinguishes relative names that would otherwise look like a drive - letter. Unlike POSIX basename(), NAME cannot be NULL, - base_name("") returns "", and the first trailing slash is not - stripped. - - If lstat (NAME) would succeed, then { chdir (dir_name (NAME)); - lstat (base_name (NAME)); } will access the same file. Likewise, - if the sequence { chdir (dir_name (NAME)); - rename (base_name (NAME), "foo"); } succeeds, you have renamed NAME - to "foo" in the same directory NAME was in. */ - -char * -base_name (char const *name) -{ - char const *base = last_component (name); - size_t length; - - /* If there is no last component, then name is a file system root or the - empty string. */ - if (! *base) - return xstrndup (name, base_len (name)); - - /* Collapse a sequence of trailing slashes into one. */ - length = base_len (base); - if (ISSLASH (base[length])) - length++; - - /* On systems with drive letters, `a/b:c' must return `./b:c' rather - than `b:c' to avoid confusion with a drive letter. On systems - with pure POSIX semantics, this is not an issue. */ - if (FILE_SYSTEM_PREFIX_LEN (base)) - { - char *p = xmalloc (length + 3); - p[0] = '.'; - p[1] = '/'; - memcpy (p + 2, base, length); - p[length + 2] = '\0'; - return p; - } - - /* Finally, copy the basename. */ - return xstrndup (base, length); -} - -/* Return the length of the basename NAME. Typically NAME is the - value returned by base_name or last_component. Act like strlen - (NAME), except omit all trailing slashes. */ - -size_t -base_len (char const *name) -{ - size_t len; - size_t prefix_len = FILE_SYSTEM_PREFIX_LEN (name); - - for (len = strlen (name); 1 < len && ISSLASH (name[len - 1]); len--) - continue; - - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && len == 1 - && ISSLASH (name[0]) && ISSLASH (name[1]) && ! name[2]) - return 2; - - if (FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE && prefix_len - && len == prefix_len && ISSLASH (name[prefix_len])) - return prefix_len + 1; - - return len; -} diff --git a/lib/close-stream.c b/lib/close-stream.c deleted file mode 100644 index 72d0d68..0000000 --- a/lib/close-stream.c +++ /dev/null @@ -1,76 +0,0 @@ -/* Close a stream, with nicer error checking than fclose's. - - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "close-stream.h" - -#include <errno.h> -#include <stdbool.h> - -#include "__fpending.h" - -#if USE_UNLOCKED_IO -# include "unlocked-io.h" -#endif - -/* Close STREAM. Return 0 if successful, EOF (setting errno) - otherwise. A failure might set errno to 0 if the error number - cannot be determined. - - If a program writes *anything* to STREAM, that program should close - STREAM and make sure that it succeeds before exiting. Otherwise, - suppose that you go to the extreme of checking the return status - of every function that does an explicit write to STREAM. The last - printf can succeed in writing to the internal stream buffer, and yet - the fclose(STREAM) could still fail (due e.g., to a disk full error) - when it tries to write out that buffered data. Thus, you would be - left with an incomplete output file and the offending program would - exit successfully. Even calling fflush is not always sufficient, - since some file systems (NFS and CODA) buffer written/flushed data - until an actual close call. - - Besides, it's wasteful to check the return value from every call - that writes to STREAM -- just let the internal stream state record - the failure. That's what the ferror test is checking below. */ - -int -close_stream (FILE *stream) -{ - bool some_pending = (__fpending (stream) != 0); - bool prev_fail = (ferror (stream) != 0); - bool fclose_fail = (fclose (stream) != 0); - - /* Return an error indication if there was a previous failure or if - fclose failed, with one exception: ignore an fclose failure if - there was no previous error, no data remains to be flushed, and - fclose failed with EBADF. That can happen when a program like cp - is invoked like this `cp a b >&-' (i.e., with standard output - closed) and doesn't generate any output (hence no previous error - and nothing to be flushed). */ - - if (prev_fail || (fclose_fail && (some_pending || errno != EBADF))) - { - if (! fclose_fail) - errno = 0; - return EOF; - } - - return 0; -} diff --git a/lib/close-stream.h b/lib/close-stream.h deleted file mode 100644 index be3d419..0000000 --- a/lib/close-stream.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stdio.h> -int close_stream (FILE *stream); diff --git a/lib/closeout.c b/lib/closeout.c deleted file mode 100644 index 830f16f..0000000 --- a/lib/closeout.c +++ /dev/null @@ -1,86 +0,0 @@ -/* Close standard output and standard error, exiting with a diagnostic on error. - - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "closeout.h" - -#include <errno.h> -#include <stdio.h> -#include <unistd.h> - -#include "gettext.h" -#define _(msgid) gettext (msgid) - -#include "close-stream.h" -#include "error.h" -#include "exitfail.h" -#include "quotearg.h" - -static const char *file_name; - -/* Set the file name to be reported in the event an error is detected - by close_stdout. */ -void -close_stdout_set_file_name (const char *file) -{ - file_name = file; -} - -/* Close standard output. On error, issue a diagnostic and _exit - with status 'exit_failure'. - - Also close standard error. On error, _exit with status 'exit_failure'. - - Since close_stdout is commonly registered via 'atexit', POSIX - and the C standard both say that it should not call 'exit', - because the behavior is undefined if 'exit' is called more than - once. So it calls '_exit' instead of 'exit'. If close_stdout - is registered via atexit before other functions are registered, - the other functions can act before this _exit is invoked. - - Applications that use close_stdout should flush any streams - other than stdout and stderr before exiting, since the call to - _exit will bypass other buffer flushing. Applications should - be flushing and closing other streams anyway, to check for I/O - errors. Also, applications should not use tmpfile, since _exit - can bypass the removal of these files. - - It's important to detect such failures and exit nonzero because many - tools (most notably `make' and other build-management systems) depend - on being able to detect failure in other tools via their exit status. */ - -void -close_stdout (void) -{ - if (close_stream (stdout) != 0) - { - char const *write_error = _("write error"); - if (file_name) - error (0, errno, "%s: %s", quotearg_colon (file_name), - write_error); - else - error (0, errno, "%s", write_error); - - _exit (exit_failure); - } - - if (close_stream (stderr) != 0) - _exit (exit_failure); -} diff --git a/lib/closeout.h b/lib/closeout.h deleted file mode 100644 index 8bed23b..0000000 --- a/lib/closeout.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Close standard output and standard error. - - Copyright (C) 1998, 2000, 2003, 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef CLOSEOUT_H -# define CLOSEOUT_H 1 - -# ifdef __cplusplus -extern "C" { -# endif - -void close_stdout_set_file_name (const char *file); -void close_stdout (void); - -# ifdef __cplusplus -} -# endif - -#endif diff --git a/lib/dirname.c b/lib/dirname.c deleted file mode 100644 index 16552c6..0000000 --- a/lib/dirname.c +++ /dev/null @@ -1,85 +0,0 @@ -/* dirname.c -- return all but the last element in a file name - - Copyright (C) 1990, 1998, 2000, 2001, 2003, 2004, 2005, 2006 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "dirname.h" - -#include <string.h> -#include "xalloc.h" - -/* Return the length of the prefix of FILE that will be used by - dir_name. If FILE is in the working directory, this returns zero - even though `dir_name (FILE)' will return ".". Works properly even - if there are trailing slashes (by effectively ignoring them). */ - -size_t -dir_len (char const *file) -{ - size_t prefix_length = FILE_SYSTEM_PREFIX_LEN (file); - size_t length; - - /* Advance prefix_length beyond important leading slashes. */ - prefix_length += (prefix_length != 0 - ? (FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE - && ISSLASH (file[prefix_length])) - : (ISSLASH (file[0]) - ? ((DOUBLE_SLASH_IS_DISTINCT_ROOT - && ISSLASH (file[1]) && ! ISSLASH (file[2]) - ? 2 : 1)) - : 0)); - - /* Strip the basename and any redundant slashes before it. */ - for (length = last_component (file) - file; - prefix_length < length; length--) - if (! ISSLASH (file[length - 1])) - break; - return length; -} - - -/* In general, we can't use the builtin `dirname' function if available, - since it has different meanings in different environments. - In some environments the builtin `dirname' modifies its argument. - - Return the leading directories part of FILE, allocated with xmalloc. - Works properly even if there are trailing slashes (by effectively - ignoring them). Unlike POSIX dirname(), FILE cannot be NULL. - - If lstat (FILE) would succeed, then { chdir (dir_name (FILE)); - lstat (base_name (FILE)); } will access the same file. Likewise, - if the sequence { chdir (dir_name (FILE)); - rename (base_name (FILE), "foo"); } succeeds, you have renamed FILE - to "foo" in the same directory FILE was in. */ - -char * -dir_name (char const *file) -{ - size_t length = dir_len (file); - bool append_dot = (length == 0 - || (FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE - && length == FILE_SYSTEM_PREFIX_LEN (file) - && file[2] != '\0' && ! ISSLASH (file[2]))); - char *dir = xmalloc (length + append_dot + 1); - memcpy (dir, file, length); - if (append_dot) - dir[length++] = '.'; - dir[length] = '\0'; - return dir; -} diff --git a/lib/dirname.h b/lib/dirname.h deleted file mode 100644 index 91e7ed3..0000000 --- a/lib/dirname.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Take file names apart into directory and base names. - - Copyright (C) 1998, 2001, 2003-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef DIRNAME_H_ -# define DIRNAME_H_ 1 - -# include <stdbool.h> -# include <stddef.h> - -# ifndef DIRECTORY_SEPARATOR -# define DIRECTORY_SEPARATOR '/' -# endif - -# ifndef ISSLASH -# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) -# endif - -# ifndef FILE_SYSTEM_PREFIX_LEN -# if FILE_SYSTEM_ACCEPTS_DRIVE_LETTER_PREFIX - /* This internal macro assumes ASCII, but all hosts that support drive - letters use ASCII. */ -# define _IS_DRIVE_LETTER(c) (((unsigned int) (c) | ('a' - 'A')) - 'a' \ - <= 'z' - 'a') -# define FILE_SYSTEM_PREFIX_LEN(Filename) \ - (_IS_DRIVE_LETTER ((Filename)[0]) && (Filename)[1] == ':' ? 2 : 0) -# else -# define FILE_SYSTEM_PREFIX_LEN(Filename) 0 -# endif -# endif - -# ifndef FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE -# define FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE 0 -# endif - -# ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT -# define DOUBLE_SLASH_IS_DISTINCT_ROOT 0 -# endif - -# if FILE_SYSTEM_DRIVE_PREFIX_CAN_BE_RELATIVE -# define IS_ABSOLUTE_FILE_NAME(F) ISSLASH ((F)[FILE_SYSTEM_PREFIX_LEN (F)]) -# else -# define IS_ABSOLUTE_FILE_NAME(F) \ - (ISSLASH ((F)[0]) || 0 < FILE_SYSTEM_PREFIX_LEN (F)) -# endif -# define IS_RELATIVE_FILE_NAME(F) (! IS_ABSOLUTE_FILE_NAME (F)) - -char *base_name (char const *file); -char *dir_name (char const *file); -size_t base_len (char const *file); -size_t dir_len (char const *file); -char *last_component (char const *file); - -bool strip_trailing_slashes (char *file); - -#endif /* not DIRNAME_H_ */ diff --git a/lib/error.c b/lib/error.c deleted file mode 100644 index cf86343..0000000 --- a/lib/error.c +++ /dev/null @@ -1,338 +0,0 @@ -/* Error handler for noninteractive utilities - Copyright (C) 1990-1998, 2000-2005, 2006 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */ - -#if !_LIBC -# include <config.h> -#endif - -#include "error.h" - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#if !_LIBC && ENABLE_NLS -# include "gettext.h" -#endif - -#ifdef _LIBC -# include <libintl.h> -# include <stdbool.h> -# include <stdint.h> -# include <wchar.h> -# define mbsrtowcs __mbsrtowcs -#endif - -#if USE_UNLOCKED_IO -# include "unlocked-io.h" -#endif - -#ifndef _ -# define _(String) String -#endif - -/* If NULL, error will flush stdout, then print on stderr the program - name, a colon and a space. Otherwise, error will call this - function without parameters instead. */ -void (*error_print_progname) (void); - -/* This variable is incremented each time `error' is called. */ -unsigned int error_message_count; - -#ifdef _LIBC -/* In the GNU C library, there is a predefined variable for this. */ - -# define program_name program_invocation_name -# include <errno.h> -# include <limits.h> -# include <libio/libioP.h> - -/* In GNU libc we want do not want to use the common name `error' directly. - Instead make it a weak alias. */ -extern void __error (int status, int errnum, const char *message, ...) - __attribute__ ((__format__ (__printf__, 3, 4))); -extern void __error_at_line (int status, int errnum, const char *file_name, - unsigned int line_number, const char *message, - ...) - __attribute__ ((__format__ (__printf__, 5, 6)));; -# define error __error -# define error_at_line __error_at_line - -# include <libio/iolibio.h> -# define fflush(s) INTUSE(_IO_fflush) (s) -# undef putc -# define putc(c, fp) INTUSE(_IO_putc) (c, fp) - -# include <bits/libc-lock.h> - -#else /* not _LIBC */ - -# if !HAVE_DECL_STRERROR_R && STRERROR_R_CHAR_P -# ifndef HAVE_DECL_STRERROR_R -"this configure-time declaration test was not run" -# endif -char *strerror_r (); -# endif - -/* The calling program should define program_name and set it to the - name of the executing program. */ -extern char *program_name; - -# if HAVE_STRERROR_R || defined strerror_r -# define __strerror_r strerror_r -# endif /* HAVE_STRERROR_R || defined strerror_r */ -#endif /* not _LIBC */ - -static void -print_errno_message (int errnum) -{ - char const *s; - -#if defined HAVE_STRERROR_R || _LIBC - char errbuf[1024]; -# if STRERROR_R_CHAR_P || _LIBC - s = __strerror_r (errnum, errbuf, sizeof errbuf); -# else - if (__strerror_r (errnum, errbuf, sizeof errbuf) == 0) - s = errbuf; - else - s = 0; -# endif -#else - s = strerror (errnum); -#endif - -#if !_LIBC - if (! s) - s = _("Unknown system error"); -#endif - -#if _LIBC - __fxprintf (NULL, ": %s", s); -#else - fprintf (stderr, ": %s", s); -#endif -} - -static void -error_tail (int status, int errnum, const char *message, va_list args) -{ -#if _LIBC - if (_IO_fwide (stderr, 0) > 0) - { -# define ALLOCA_LIMIT 2000 - size_t len = strlen (message) + 1; - wchar_t *wmessage = NULL; - mbstate_t st; - size_t res; - const char *tmp; - bool use_malloc = false; - - while (1) - { - if (__libc_use_alloca (len * sizeof (wchar_t))) - wmessage = (wchar_t *) alloca (len * sizeof (wchar_t)); - else - { - if (!use_malloc) - wmessage = NULL; - - wchar_t *p = (wchar_t *) realloc (wmessage, - len * sizeof (wchar_t)); - if (p == NULL) - { - free (wmessage); - fputws_unlocked (L"out of memory\n", stderr); - return; - } - wmessage = p; - use_malloc = true; - } - - memset (&st, '\0', sizeof (st)); - tmp = message; - - res = mbsrtowcs (wmessage, &tmp, len, &st); - if (res != len) - break; - - if (__builtin_expect (len >= SIZE_MAX / 2, 0)) - { - /* This really should not happen if everything is fine. */ - res = (size_t) -1; - break; - } - - len *= 2; - } - - if (res == (size_t) -1) - { - /* The string cannot be converted. */ - if (use_malloc) - { - free (wmessage); - use_malloc = false; - } - wmessage = (wchar_t *) L"???"; - } - - __vfwprintf (stderr, wmessage, args); - - if (use_malloc) - free (wmessage); - } - else -#endif - vfprintf (stderr, message, args); - va_end (args); - - ++error_message_count; - if (errnum) - print_errno_message (errnum); -#if _LIBC - __fxprintf (NULL, "\n"); -#else - putc ('\n', stderr); -#endif - fflush (stderr); - if (status) - exit (status); -} - - -/* Print the program name and error message MESSAGE, which is a printf-style - format string with optional args. - If ERRNUM is nonzero, print its corresponding system error message. - Exit with status STATUS if it is nonzero. */ -void -error (int status, int errnum, const char *message, ...) -{ - va_list args; - -#if defined _LIBC && defined __libc_ptf_call - /* We do not want this call to be cut short by a thread - cancellation. Therefore disable cancellation for now. */ - int state = PTHREAD_CANCEL_ENABLE; - __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state), - 0); -#endif - - fflush (stdout); -#ifdef _LIBC - _IO_flockfile (stderr); -#endif - if (error_print_progname) - (*error_print_progname) (); - else - { -#if _LIBC - __fxprintf (NULL, "%s: ", program_name); -#else - fprintf (stderr, "%s: ", program_name); -#endif - } - - va_start (args, message); - error_tail (status, errnum, message, args); - -#ifdef _LIBC - _IO_funlockfile (stderr); -# ifdef __libc_ptf_call - __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0); -# endif -#endif -} - -/* Sometimes we want to have at most one error per line. This - variable controls whether this mode is selected or not. */ -int error_one_per_line; - -void -error_at_line (int status, int errnum, const char *file_name, - unsigned int line_number, const char *message, ...) -{ - va_list args; - - if (error_one_per_line) - { - static const char *old_file_name; - static unsigned int old_line_number; - - if (old_line_number == line_number - && (file_name == old_file_name - || strcmp (old_file_name, file_name) == 0)) - /* Simply return and print nothing. */ - return; - - old_file_name = file_name; - old_line_number = line_number; - } - -#if defined _LIBC && defined __libc_ptf_call - /* We do not want this call to be cut short by a thread - cancellation. Therefore disable cancellation for now. */ - int state = PTHREAD_CANCEL_ENABLE; - __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state), - 0); -#endif - - fflush (stdout); -#ifdef _LIBC - _IO_flockfile (stderr); -#endif - if (error_print_progname) - (*error_print_progname) (); - else - { -#if _LIBC - __fxprintf (NULL, "%s:", program_name); -#else - fprintf (stderr, "%s:", program_name); -#endif - } - -#if _LIBC - __fxprintf (NULL, file_name != NULL ? "%s:%d: " : " ", - file_name, line_number); -#else - fprintf (stderr, file_name != NULL ? "%s:%d: " : " ", - file_name, line_number); -#endif - - va_start (args, message); - error_tail (status, errnum, message, args); - -#ifdef _LIBC - _IO_funlockfile (stderr); -# ifdef __libc_ptf_call - __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0); -# endif -#endif -} - -#ifdef _LIBC -/* Make the weak alias. */ -# undef error -# undef error_at_line -weak_alias (__error, error) -weak_alias (__error_at_line, error_at_line) -#endif diff --git a/lib/error.h b/lib/error.h deleted file mode 100644 index 5a5f247..0000000 --- a/lib/error.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Declaration for error-reporting function - Copyright (C) 1995, 1996, 1997, 2003, 2006 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _ERROR_H -#define _ERROR_H 1 - -#ifndef __attribute__ -/* This feature is available in gcc versions 2.5 and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ -# define __attribute__(Spec) /* empty */ -# endif -/* The __-protected variants of `format' and `printf' attributes - are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) -# define __format__ format -# define __printf__ printf -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* Print a message with `fprintf (stderr, FORMAT, ...)'; - if ERRNUM is nonzero, follow it with ": " and strerror (ERRNUM). - If STATUS is nonzero, terminate the program with `exit (STATUS)'. */ - -extern void error (int __status, int __errnum, const char *__format, ...) - __attribute__ ((__format__ (__printf__, 3, 4))); - -extern void error_at_line (int __status, int __errnum, const char *__fname, - unsigned int __lineno, const char *__format, ...) - __attribute__ ((__format__ (__printf__, 5, 6))); - -/* If NULL, error will flush stdout, then print on stderr the program - name, a colon and a space. Otherwise, error will call this - function without parameters instead. */ -extern void (*error_print_progname) (void); - -/* This variable is incremented each time `error' is called. */ -extern unsigned int error_message_count; - -/* Sometimes we want to have at most one error per line. This - variable controls whether this mode is selected or not. */ -extern int error_one_per_line; - -#ifdef __cplusplus -} -#endif - -#endif /* error.h */ diff --git a/lib/exclude.c b/lib/exclude.c deleted file mode 100644 index 5b726ff..0000000 --- a/lib/exclude.c +++ /dev/null @@ -1,250 +0,0 @@ -/* exclude.c -- exclude file names - - Copyright (C) 1992, 1993, 1994, 1997, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert <eggert@twinsun.com> */ - -#include <config.h> - -#include <stdbool.h> - -#include <ctype.h> -#include <errno.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "exclude.h" -#include "fnmatch.h" -#include "xalloc.h" -#include "verify.h" - -#if USE_UNLOCKED_IO -# include "unlocked-io.h" -#endif - -/* Non-GNU systems lack these options, so we don't need to check them. */ -#ifndef FNM_CASEFOLD -# define FNM_CASEFOLD 0 -#endif -#ifndef FNM_LEADING_DIR -# define FNM_LEADING_DIR 0 -#endif - -verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) - & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR - | FNM_CASEFOLD)) - == 0); - -/* An exclude pattern-options pair. The options are fnmatch options - ORed with EXCLUDE_* options. */ - -struct patopts - { - char const *pattern; - int options; - }; - -/* An exclude list, of pattern-options pairs. */ - -struct exclude - { - struct patopts *exclude; - size_t exclude_alloc; - size_t exclude_count; - }; - -/* Return a newly allocated and empty exclude list. */ - -struct exclude * -new_exclude (void) -{ - return xzalloc (sizeof *new_exclude ()); -} - -/* Free the storage associated with an exclude list. */ - -void -free_exclude (struct exclude *ex) -{ - free (ex->exclude); - free (ex); -} - -/* Return zero if PATTERN matches F, obeying OPTIONS, except that - (unlike fnmatch) wildcards are disabled in PATTERN. */ - -static int -fnmatch_no_wildcards (char const *pattern, char const *f, int options) -{ - if (! (options & FNM_LEADING_DIR)) - return ((options & FNM_CASEFOLD) - ? strcasecmp (pattern, f) - : strcmp (pattern, f)); - else - { - size_t patlen = strlen (pattern); - int r = ((options & FNM_CASEFOLD) - ? strncasecmp (pattern, f, patlen) - : strncmp (pattern, f, patlen)); - if (! r) - { - r = f[patlen]; - if (r == '/') - r = 0; - } - return r; - } -} - -bool -exclude_fnmatch (char const *pattern, char const *f, int options) -{ - int (*matcher) (char const *, char const *, int) = - (options & EXCLUDE_WILDCARDS - ? fnmatch - : fnmatch_no_wildcards); - bool matched = ((*matcher) (pattern, f, options) == 0); - char const *p; - - if (! (options & EXCLUDE_ANCHORED)) - for (p = f; *p && ! matched; p++) - if (*p == '/' && p[1] != '/') - matched = ((*matcher) (pattern, p + 1, options) == 0); - - return matched; -} - -/* Return true if EX excludes F. */ - -bool -excluded_file_name (struct exclude const *ex, char const *f) -{ - size_t exclude_count = ex->exclude_count; - - /* If no options are given, the default is to include. */ - if (exclude_count == 0) - return false; - else - { - struct patopts const *exclude = ex->exclude; - size_t i; - - /* Otherwise, the default is the opposite of the first option. */ - bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE); - - /* Scan through the options, seeing whether they change F from - excluded to included or vice versa. */ - for (i = 0; i < exclude_count; i++) - { - char const *pattern = exclude[i].pattern; - int options = exclude[i].options; - if (excluded == !! (options & EXCLUDE_INCLUDE)) - excluded ^= exclude_fnmatch (pattern, f, options); - } - - return excluded; - } -} - -/* Append to EX the exclusion PATTERN with OPTIONS. */ - -void -add_exclude (struct exclude *ex, char const *pattern, int options) -{ - struct patopts *patopts; - - if (ex->exclude_count == ex->exclude_alloc) - ex->exclude = x2nrealloc (ex->exclude, &ex->exclude_alloc, - sizeof *ex->exclude); - - patopts = &ex->exclude[ex->exclude_count++]; - patopts->pattern = pattern; - patopts->options = options; -} - -/* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with - OPTIONS. LINE_END terminates each pattern in the file. If - LINE_END is a space character, ignore trailing spaces and empty - lines in FILE. Return -1 on failure, 0 on success. */ - -int -add_exclude_file (void (*add_func) (struct exclude *, char const *, int), - struct exclude *ex, char const *file_name, int options, - char line_end) -{ - bool use_stdin = file_name[0] == '-' && !file_name[1]; - FILE *in; - char *buf = NULL; - char *p; - char const *pattern; - char const *lim; - size_t buf_alloc = 0; - size_t buf_count = 0; - int c; - int e = 0; - - if (use_stdin) - in = stdin; - else if (! (in = fopen (file_name, "r"))) - return -1; - - while ((c = getc (in)) != EOF) - { - if (buf_count == buf_alloc) - buf = x2realloc (buf, &buf_alloc); - buf[buf_count++] = c; - } - - if (ferror (in)) - e = errno; - - if (!use_stdin && fclose (in) != 0) - e = errno; - - buf = xrealloc (buf, buf_count + 1); - buf[buf_count] = line_end; - lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); - pattern = buf; - - for (p = buf; p < lim; p++) - if (*p == line_end) - { - char *pattern_end = p; - - if (isspace ((unsigned char) line_end)) - { - for (; ; pattern_end--) - if (pattern_end == pattern) - goto next_pattern; - else if (! isspace ((unsigned char) pattern_end[-1])) - break; - } - - *pattern_end = '\0'; - (*add_func) (ex, pattern, options); - - next_pattern: - pattern = p + 1; - } - - errno = e; - return e ? -1 : 0; -} diff --git a/lib/exclude.h b/lib/exclude.h deleted file mode 100644 index 203e384..0000000 --- a/lib/exclude.h +++ /dev/null @@ -1,44 +0,0 @@ -/* exclude.h -- declarations for excluding file names - - Copyright (C) 1992, 1993, 1994, 1997, 1999, 2001, 2002, 2003, 2005, - 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert <eggert@twinsun.com> */ - -/* Exclude options, which can be ORed with fnmatch options. */ - -/* Patterns must match the start of file names, instead of matching - anywhere after a '/'. */ -#define EXCLUDE_ANCHORED (1 << 30) - -/* Include instead of exclude. */ -#define EXCLUDE_INCLUDE (1 << 29) - -/* '?', '*', '[', and '\\' are special in patterns. Without this - option, these characters are ordinary and fnmatch is not used. */ -#define EXCLUDE_WILDCARDS (1 << 28) - -struct exclude; - -struct exclude *new_exclude (void); -void free_exclude (struct exclude *); -void add_exclude (struct exclude *, char const *, int); -int add_exclude_file (void (*) (struct exclude *, char const *, int), - struct exclude *, char const *, int, char); -bool excluded_file_name (struct exclude const *, char const *); -bool exclude_fnmatch (char const *pattern, char const *f, int options); diff --git a/lib/exit.h b/lib/exit.h deleted file mode 100644 index e8f7738..0000000 --- a/lib/exit.h +++ /dev/null @@ -1,32 +0,0 @@ -/* exit() function. - Copyright (C) 1995, 2001 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _EXIT_H -#define _EXIT_H - -/* Get exit() declaration. */ -#include <stdlib.h> - -/* Some systems do not define EXIT_*, despite otherwise supporting C89. */ -#ifndef EXIT_SUCCESS -# define EXIT_SUCCESS 0 -#endif -#ifndef EXIT_FAILURE -# define EXIT_FAILURE 1 -#endif - -#endif /* _EXIT_H */ diff --git a/lib/exitfail.c b/lib/exitfail.c deleted file mode 100644 index 97abc67..0000000 --- a/lib/exitfail.c +++ /dev/null @@ -1,25 +0,0 @@ -/* Failure exit status - - Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "exitfail.h" -#include "exit.h" - -int volatile exit_failure = EXIT_FAILURE; diff --git a/lib/exitfail.h b/lib/exitfail.h deleted file mode 100644 index e46cf9c..0000000 --- a/lib/exitfail.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Failure exit status - - Copyright (C) 2002 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -extern int volatile exit_failure; diff --git a/lib/fnmatch.c b/lib/fnmatch.c deleted file mode 100644 index 02dd365..0000000 --- a/lib/fnmatch.c +++ /dev/null @@ -1,354 +0,0 @@ -/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _LIBC -# include <config.h> -#endif - -/* Enable GNU extensions in fnmatch.h. */ -#ifndef _GNU_SOURCE -# define _GNU_SOURCE 1 -#endif - -#if ! defined __builtin_expect && __GNUC__ < 3 -# define __builtin_expect(expr, expected) (expr) -#endif - -#include <fnmatch.h> - -#include <alloca.h> -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <stddef.h> -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> - -#define WIDE_CHAR_SUPPORT \ - (HAVE_WCTYPE_H && HAVE_BTOWC && HAVE_ISWCTYPE \ - && HAVE_WMEMCHR && (HAVE_WMEMCPY || HAVE_WMEMPCPY)) - -/* For platform which support the ISO C amendement 1 functionality we - support user defined character classes. */ -#if defined _LIBC || WIDE_CHAR_SUPPORT -# include <wctype.h> -# include <wchar.h> -#endif - -/* We need some of the locale data (the collation sequence information) - but there is no interface to get this information in general. Therefore - we support a correct implementation only in glibc. */ -#ifdef _LIBC -# include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" -# include "../locale/coll-lookup.h" -# include <shlib-compat.h> - -# define CONCAT(a,b) __CONCAT(a,b) -# define mbsrtowcs __mbsrtowcs -# define fnmatch __fnmatch -extern int fnmatch (const char *pattern, const char *string, int flags); -#endif - -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ -#define NO_LEADING_PERIOD(flags) \ - ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself, and have not detected a bug - in the library. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU - - -# if ! (defined isblank || HAVE_DECL_ISBLANK) -# define isblank(c) ((c) == ' ' || (c) == '\t') -# endif - -# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -# if defined _LIBC || WIDE_CHAR_SUPPORT -/* The GNU C library provides support for user-defined character classes - and the functions from ISO C amendement 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else -/* This shouldn't happen but some implementation might still have this - problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif - -# ifdef _LIBC -# define IS_CHAR_CLASS(string) __wctype (string) -# else -# define IS_CHAR_CLASS(string) wctype (string) -# endif - -# ifdef _LIBC -# define ISWCTYPE(WC, WT) __iswctype (WC, WT) -# else -# define ISWCTYPE(WC, WT) iswctype (WC, WT) -# endif - -# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC -/* In this case we are implementing the multibyte character handling. */ -# define HANDLE_MULTIBYTE 1 -# endif - -# else -# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -# define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) -# endif - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -/* Global variable. */ -static int posixly_correct; - -# ifndef internal_function -/* Inside GNU libc we mark some function in a special way. In other - environments simply ignore the marking. */ -# define internal_function -# endif - -/* Note that this evaluates C many times. */ -# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) -# define CHAR char -# define UCHAR unsigned char -# define INT int -# define FCT internal_fnmatch -# define EXT ext_match -# define END end_pattern -# define L_(CS) CS -# ifdef _LIBC -# define BTOWC(C) __btowc (C) -# else -# define BTOWC(C) btowc (C) -# endif -# define STRLEN(S) strlen (S) -# define STRCAT(D, S) strcat (D, S) -# ifdef _LIBC -# define MEMPCPY(D, S, N) __mempcpy (D, S, N) -# else -# if HAVE_MEMPCPY -# define MEMPCPY(D, S, N) mempcpy (D, S, N) -# else -# define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N))) -# endif -# endif -# define MEMCHR(S, C, N) memchr (S, C, N) -# define STRCOLL(S1, S2) strcoll (S1, S2) -# include "fnmatch_loop.c" - - -# if HANDLE_MULTIBYTE -# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) -# define CHAR wchar_t -# define UCHAR wint_t -# define INT wint_t -# define FCT internal_fnwmatch -# define EXT ext_wmatch -# define END end_wpattern -# define L_(CS) L##CS -# define BTOWC(C) (C) -# ifdef _LIBC -# define STRLEN(S) __wcslen (S) -# define STRCAT(D, S) __wcscat (D, S) -# define MEMPCPY(D, S, N) __wmempcpy (D, S, N) -# else -# define STRLEN(S) wcslen (S) -# define STRCAT(D, S) wcscat (D, S) -# if HAVE_WMEMPCPY -# define MEMPCPY(D, S, N) wmempcpy (D, S, N) -# else -# define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N)) -# endif -# endif -# define MEMCHR(S, C, N) wmemchr (S, C, N) -# define STRCOLL(S1, S2) wcscoll (S1, S2) -# define WIDE_CHAR_VERSION 1 - -# undef IS_CHAR_CLASS -/* We have to convert the wide character string in a multibyte string. But - we know that the character class names consist of alphanumeric characters - from the portable character set, and since the wide character encoding - for a member of the portable character set is the same code point as - its single-byte encoding, we can use a simplified method to convert the - string to a multibyte character string. */ -static wctype_t -is_char_class (const wchar_t *wcs) -{ - char s[CHAR_CLASS_MAX_LENGTH + 1]; - char *cp = s; - - do - { - /* Test for a printable character from the portable character set. */ -# ifdef _LIBC - if (*wcs < 0x20 || *wcs > 0x7e - || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) - return (wctype_t) 0; -# else - switch (*wcs) - { - case L' ': case L'!': case L'"': case L'#': case L'%': - case L'&': case L'\'': case L'(': case L')': case L'*': - case L'+': case L',': case L'-': case L'.': case L'/': - case L'0': case L'1': case L'2': case L'3': case L'4': - case L'5': case L'6': case L'7': case L'8': case L'9': - case L':': case L';': case L'<': case L'=': case L'>': - case L'?': - case L'A': case L'B': case L'C': case L'D': case L'E': - case L'F': case L'G': case L'H': case L'I': case L'J': - case L'K': case L'L': case L'M': case L'N': case L'O': - case L'P': case L'Q': case L'R': case L'S': case L'T': - case L'U': case L'V': case L'W': case L'X': case L'Y': - case L'Z': - case L'[': case L'\\': case L']': case L'^': case L'_': - case L'a': case L'b': case L'c': case L'd': case L'e': - case L'f': case L'g': case L'h': case L'i': case L'j': - case L'k': case L'l': case L'm': case L'n': case L'o': - case L'p': case L'q': case L'r': case L's': case L't': - case L'u': case L'v': case L'w': case L'x': case L'y': - case L'z': case L'{': case L'|': case L'}': case L'~': - break; - default: - return (wctype_t) 0; - } -# endif - - /* Avoid overrunning the buffer. */ - if (cp == s + CHAR_CLASS_MAX_LENGTH) - return (wctype_t) 0; - - *cp++ = (char) *wcs++; - } - while (*wcs != L'\0'); - - *cp = '\0'; - -# ifdef _LIBC - return __wctype (s); -# else - return wctype (s); -# endif -} -# define IS_CHAR_CLASS(string) is_char_class (string) - -# include "fnmatch_loop.c" -# endif - - -int -fnmatch (const char *pattern, const char *string, int flags) -{ -# if HANDLE_MULTIBYTE -# define ALLOCA_LIMIT 2000 - if (__builtin_expect (MB_CUR_MAX, 1) != 1) - { - mbstate_t ps; - size_t patsize; - size_t strsize; - size_t totsize; - wchar_t *wpattern; - wchar_t *wstring; - int res; - - /* Calculate the size needed to convert the strings to - wide characters. */ - memset (&ps, '\0', sizeof (ps)); - patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1; - if (__builtin_expect (patsize != 0, 1)) - { - assert (mbsinit (&ps)); - strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1; - if (__builtin_expect (strsize != 0, 1)) - { - assert (mbsinit (&ps)); - totsize = patsize + strsize; - if (__builtin_expect (! (patsize <= totsize - && totsize <= SIZE_MAX / sizeof (wchar_t)), - 0)) - { - errno = ENOMEM; - return -1; - } - - /* Allocate room for the wide characters. */ - if (__builtin_expect (totsize < ALLOCA_LIMIT, 1)) - wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t)); - else - { - wpattern = malloc (totsize * sizeof (wchar_t)); - if (__builtin_expect (! wpattern, 0)) - { - errno = ENOMEM; - return -1; - } - } - wstring = wpattern + patsize; - - /* Convert the strings into wide characters. */ - mbsrtowcs (wpattern, &pattern, patsize, &ps); - assert (mbsinit (&ps)); - mbsrtowcs (wstring, &string, strsize, &ps); - - res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1, - flags & FNM_PERIOD, flags); - - if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0)) - free (wpattern); - return res; - } - } - } - -# endif /* HANDLE_MULTIBYTE */ - - return internal_fnmatch (pattern, string, string + strlen (string), - flags & FNM_PERIOD, flags); -} - -# ifdef _LIBC -# undef fnmatch -versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); -# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) -strong_alias (__fnmatch, __fnmatch_old) -compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); -# endif -libc_hidden_ver (__fnmatch, fnmatch) -# endif - -#endif /* _LIBC or not __GNU_LIBRARY__. */ diff --git a/lib/fnmatch_.h b/lib/fnmatch_.h deleted file mode 100644 index c406aef..0000000 --- a/lib/fnmatch_.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2001, 2002, 2003, - 2005 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _FNMATCH_H -#define _FNMATCH_H 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* We #undef these before defining them because some losing systems - (HP-UX A.08.07 for example) define these in <unistd.h>. */ -#undef FNM_PATHNAME -#undef FNM_NOESCAPE -#undef FNM_PERIOD - -/* Bits set in the FLAGS argument to `fnmatch'. */ -#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ -#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ -#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ - -#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE -# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */ -# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ -# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ -# define FNM_EXTMATCH (1 << 5) /* Use ksh-like extended matching. */ -#endif - -/* Value returned by `fnmatch' if STRING does not match PATTERN. */ -#define FNM_NOMATCH 1 - -/* This value is returned if the implementation does not support - `fnmatch'. Since this is not the case here it will never be - returned but the conformance test suites still require the symbol - to be defined. */ -#ifdef _XOPEN_SOURCE -# define FNM_NOSYS (-1) -#endif - -/* Match NAME against the file name pattern PATTERN, - returning zero if it matches, FNM_NOMATCH if not. */ -extern int fnmatch (const char *__pattern, const char *__name, - int __flags); - -#ifdef __cplusplus -} -#endif - -#endif /* fnmatch.h */ diff --git a/lib/fnmatch_loop.c b/lib/fnmatch_loop.c deleted file mode 100644 index d1008c2..0000000 --- a/lib/fnmatch_loop.c +++ /dev/null @@ -1,1210 +0,0 @@ -/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Match STRING against the file name pattern PATTERN, returning zero if - it matches, nonzero if not. */ -static int EXT (INT opt, const CHAR *pattern, const CHAR *string, - const CHAR *string_end, bool no_leading_period, int flags) - internal_function; -static const CHAR *END (const CHAR *patternp) internal_function; - -static int -internal_function -FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, - bool no_leading_period, int flags) -{ - register const CHAR *p = pattern, *n = string; - register UCHAR c; -#ifdef _LIBC -# if WIDE_CHAR_VERSION - const char *collseq = (const char *) - _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); -# else - const UCHAR *collseq = (const UCHAR *) - _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); -# endif -#endif - - while ((c = *p++) != L_('\0')) - { - bool new_no_leading_period = false; - c = FOLD (c); - - switch (c) - { - case L_('?'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, - flags); - if (res != -1) - return res; - } - - if (n == string_end) - return FNM_NOMATCH; - else if (*n == L_('/') && (flags & FNM_FILE_NAME)) - return FNM_NOMATCH; - else if (*n == L_('.') && no_leading_period) - return FNM_NOMATCH; - break; - - case L_('\\'): - if (!(flags & FNM_NOESCAPE)) - { - c = *p++; - if (c == L_('\0')) - /* Trailing \ loses. */ - return FNM_NOMATCH; - c = FOLD (c); - } - if (n == string_end || FOLD ((UCHAR) *n) != c) - return FNM_NOMATCH; - break; - - case L_('*'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, - flags); - if (res != -1) - return res; - } - - if (n != string_end && *n == L_('.') && no_leading_period) - return FNM_NOMATCH; - - for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) - { - if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) - { - const CHAR *endp = END (p); - if (endp != p) - { - /* This is a pattern. Skip over it. */ - p = endp; - continue; - } - } - - if (c == L_('?')) - { - /* A ? needs to match one character. */ - if (n == string_end) - /* There isn't another character; no match. */ - return FNM_NOMATCH; - else if (*n == L_('/') - && __builtin_expect (flags & FNM_FILE_NAME, 0)) - /* A slash does not match a wildcard under - FNM_FILE_NAME. */ - return FNM_NOMATCH; - else - /* One character of the string is consumed in matching - this ? wildcard, so *??? won't match if there are - less than three characters. */ - ++n; - } - } - - if (c == L_('\0')) - /* The wildcard(s) is/are the last element of the pattern. - If the name is a file name and contains another slash - this means it cannot match, unless the FNM_LEADING_DIR - flag is set. */ - { - int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; - - if (flags & FNM_FILE_NAME) - { - if (flags & FNM_LEADING_DIR) - result = 0; - else - { - if (MEMCHR (n, L_('/'), string_end - n) == NULL) - result = 0; - } - } - - return result; - } - else - { - const CHAR *endp; - - endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), - string_end - n); - if (endp == NULL) - endp = string_end; - - if (c == L_('[') - || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 - && (c == L_('@') || c == L_('+') || c == L_('!')) - && *p == L_('('))) - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - bool no_leading_period2 = no_leading_period; - - for (--p; n < endp; ++n, no_leading_period2 = false) - if (FCT (p, n, string_end, no_leading_period2, flags2) - == 0) - return 0; - } - else if (c == L_('/') && (flags & FNM_FILE_NAME)) - { - while (n < string_end && *n != L_('/')) - ++n; - if (n < string_end && *n == L_('/') - && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) - == 0)) - return 0; - } - else - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - int no_leading_period2 = no_leading_period; - - if (c == L_('\\') && !(flags & FNM_NOESCAPE)) - c = *p; - c = FOLD (c); - for (--p; n < endp; ++n, no_leading_period2 = false) - if (FOLD ((UCHAR) *n) == c - && (FCT (p, n, string_end, no_leading_period2, flags2) - == 0)) - return 0; - } - } - - /* If we come here no match is possible with the wildcard. */ - return FNM_NOMATCH; - - case L_('['): - { - /* Nonzero if the sense of the character class is inverted. */ - register bool not; - CHAR cold; - UCHAR fn; - - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - if (n == string_end) - return FNM_NOMATCH; - - if (*n == L_('.') && no_leading_period) - return FNM_NOMATCH; - - if (*n == L_('/') && (flags & FNM_FILE_NAME)) - /* `/' cannot be matched. */ - return FNM_NOMATCH; - - not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); - if (not) - ++p; - - fn = FOLD ((UCHAR) *n); - - c = *p++; - for (;;) - { - if (!(flags & FNM_NOESCAPE) && c == L_('\\')) - { - if (*p == L_('\0')) - return FNM_NOMATCH; - c = FOLD ((UCHAR) *p); - ++p; - - goto normal_bracket; - } - else if (c == L_('[') && *p == L_(':')) - { - /* Leave room for the null. */ - CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; - size_t c1 = 0; -#if defined _LIBC || WIDE_CHAR_SUPPORT - wctype_t wt; -#endif - const CHAR *startp = p; - - for (;;) - { - if (c1 == CHAR_CLASS_MAX_LENGTH) - /* The name is too long and therefore the pattern - is ill-formed. */ - return FNM_NOMATCH; - - c = *++p; - if (c == L_(':') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c < L_('a') || c >= L_('z')) - { - /* This cannot possibly be a character class name. - Match it as a normal range. */ - p = startp; - c = L_('['); - goto normal_bracket; - } - str[c1++] = c; - } - str[c1] = L_('\0'); - -#if defined _LIBC || WIDE_CHAR_SUPPORT - wt = IS_CHAR_CLASS (str); - if (wt == 0) - /* Invalid character class name. */ - return FNM_NOMATCH; - -# if defined _LIBC && ! WIDE_CHAR_VERSION - /* The following code is glibc specific but does - there a good job in speeding up the code since - we can avoid the btowc() call. */ - if (_ISCTYPE ((UCHAR) *n, wt)) - goto matched; -# else - if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) - goto matched; -# endif -#else - if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n)) - || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n)) - || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n)) - || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n)) - || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n)) - || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n)) - || (STREQ (str, L_("lower")) && islower ((UCHAR) *n)) - || (STREQ (str, L_("print")) && isprint ((UCHAR) *n)) - || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n)) - || (STREQ (str, L_("space")) && isspace ((UCHAR) *n)) - || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n)) - || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n))) - goto matched; -#endif - c = *p++; - } -#ifdef _LIBC - else if (c == L_('[') && *p == L_('=')) - { - UCHAR str[1]; - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - const CHAR *startp = p; - - c = *++p; - if (c == L_('\0')) - { - p = startp; - c = L_('['); - goto normal_bracket; - } - str[0] = c; - - c = *++p; - if (c != L_('=') || p[1] != L_(']')) - { - p = startp; - c = L_('['); - goto normal_bracket; - } - p += 2; - - if (nrules == 0) - { - if ((UCHAR) *n == str[0]) - goto matched; - } - else - { - const int32_t *table; -# if WIDE_CHAR_VERSION - const int32_t *weights; - const int32_t *extra; -# else - const unsigned char *weights; - const unsigned char *extra; -# endif - const int32_t *indirect; - int32_t idx; - const UCHAR *cp = (const UCHAR *) str; - - /* This #include defines a local function! */ -# if WIDE_CHAR_VERSION -# include <locale/weightwc.h> -# else -# include <locale/weight.h> -# endif - -# if WIDE_CHAR_VERSION - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); - weights = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); - extra = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); -# else - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); -# endif - - idx = findidx (&cp); - if (idx != 0) - { - /* We found a table entry. Now see whether the - character we are currently at has the same - equivalance class value. */ - int len = weights[idx]; - int32_t idx2; - const UCHAR *np = (const UCHAR *) n; - - idx2 = findidx (&np); - if (idx2 != 0 && len == weights[idx2]) - { - int cnt = 0; - - while (cnt < len - && (weights[idx + 1 + cnt] - == weights[idx2 + 1 + cnt])) - ++cnt; - - if (cnt == len) - goto matched; - } - } - } - - c = *p++; - } -#endif - else if (c == L_('\0')) - /* [ (unterminated) loses. */ - return FNM_NOMATCH; - else - { - bool is_range = false; - -#ifdef _LIBC - bool is_seqval = false; - - if (c == L_('[') && *p == L_('.')) - { - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - const CHAR *startp = p; - size_t c1 = 0; - - while (1) - { - c = *++p; - if (c == L_('.') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c == '\0') - return FNM_NOMATCH; - ++c1; - } - - /* We have to handling the symbols differently in - ranges since then the collation sequence is - important. */ - is_range = *p == L_('-') && p[1] != L_('\0'); - - if (nrules == 0) - { - /* There are no names defined in the collation - data. Therefore we only accept the trivial - names consisting of the character itself. */ - if (c1 != 1) - return FNM_NOMATCH; - - if (!is_range && *n == startp[1]) - goto matched; - - cold = startp[1]; - c = *p++; - } - else - { - int32_t table_size; - const int32_t *symb_table; -# ifdef WIDE_CHAR_VERSION - char str[c1]; - size_t strcnt; -# else -# define str (startp + 1) -# endif - const unsigned char *extra; - int32_t idx; - int32_t elem; - int32_t second; - int32_t hash; - -# ifdef WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -# endif - - table_size = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - - /* Locate the character in the hashing table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem - + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ -# ifdef WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - - if (! is_range) - { -# ifdef WIDE_CHAR_VERSION - for (c1 = 0; - (int32_t) c1 < wextra[idx]; - ++c1) - if (n[c1] != wextra[1 + c1]) - break; - - if ((int32_t) c1 == wextra[idx]) - goto matched; -# else - for (c1 = 0; c1 < extra[idx]; ++c1) - if (n[c1] != extra[1 + c1]) - break; - - if (c1 == extra[idx]) - goto matched; -# endif - } - - /* Get the collation sequence value. */ - is_seqval = true; -# ifdef WIDE_CHAR_VERSION - cold = wextra[1 + wextra[idx]]; -# else - /* Adjust for the alignment. */ - idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; - cold = *((int32_t *) &extra[idx]); -# endif - - c = *p++; - } - else if (c1 == 1) - { - /* No valid character. Match it as a - single byte. */ - if (!is_range && *n == str[0]) - goto matched; - - cold = str[0]; - c = *p++; - } - else - return FNM_NOMATCH; - } - } - else -# undef str -#endif - { - c = FOLD (c); - normal_bracket: - - /* We have to handling the symbols differently in - ranges since then the collation sequence is - important. */ - is_range = (*p == L_('-') && p[1] != L_('\0') - && p[1] != L_(']')); - - if (!is_range && c == fn) - goto matched; - -#if _LIBC - /* This is needed if we goto normal_bracket; from - outside of is_seqval's scope. */ - is_seqval = false; -#endif - - cold = c; - c = *p++; - } - - if (c == L_('-') && *p != L_(']')) - { -#if _LIBC - /* We have to find the collation sequence - value for C. Collation sequence is nothing - we can regularly access. The sequence - value is defined by the order in which the - definitions of the collation values for the - various characters appear in the source - file. A strange concept, nowhere - documented. */ - uint32_t fcollseq; - uint32_t lcollseq; - UCHAR cend = *p++; - -# ifdef WIDE_CHAR_VERSION - /* Search in the `names' array for the characters. */ - fcollseq = __collseq_table_lookup (collseq, fn); - if (fcollseq == ~((uint32_t) 0)) - /* XXX We don't know anything about the character - we are supposed to match. This means we are - failing. */ - goto range_not_matched; - - if (is_seqval) - lcollseq = cold; - else - lcollseq = __collseq_table_lookup (collseq, cold); -# else - fcollseq = collseq[fn]; - lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; -# endif - - is_seqval = false; - if (cend == L_('[') && *p == L_('.')) - { - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_NRULES); - const CHAR *startp = p; - size_t c1 = 0; - - while (1) - { - c = *++p; - if (c == L_('.') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c == '\0') - return FNM_NOMATCH; - ++c1; - } - - if (nrules == 0) - { - /* There are no names defined in the - collation data. Therefore we only - accept the trivial names consisting - of the character itself. */ - if (c1 != 1) - return FNM_NOMATCH; - - cend = startp[1]; - } - else - { - int32_t table_size; - const int32_t *symb_table; -# ifdef WIDE_CHAR_VERSION - char str[c1]; - size_t strcnt; -# else -# define str (startp + 1) -# endif - const unsigned char *extra; - int32_t idx; - int32_t elem; - int32_t second; - int32_t hash; - -# ifdef WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -# endif - - table_size = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - - /* Locate the character in the hashing - table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ -# ifdef WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~4; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - /* Get the collation sequence value. */ - is_seqval = true; -# ifdef WIDE_CHAR_VERSION - cend = wextra[1 + wextra[idx]]; -# else - /* Adjust for the alignment. */ - idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; - cend = *((int32_t *) &extra[idx]); -# endif - } - else if (symb_table[2 * elem] != 0 && c1 == 1) - { - cend = str[0]; - c = *p++; - } - else - return FNM_NOMATCH; - } -# undef str - } - else - { - if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) - cend = *p++; - if (cend == L_('\0')) - return FNM_NOMATCH; - cend = FOLD (cend); - } - - /* XXX It is not entirely clear to me how to handle - characters which are not mentioned in the - collation specification. */ - if ( -# ifdef WIDE_CHAR_VERSION - lcollseq == 0xffffffff || -# endif - lcollseq <= fcollseq) - { - /* We have to look at the upper bound. */ - uint32_t hcollseq; - - if (is_seqval) - hcollseq = cend; - else - { -# ifdef WIDE_CHAR_VERSION - hcollseq = - __collseq_table_lookup (collseq, cend); - if (hcollseq == ~((uint32_t) 0)) - { - /* Hum, no information about the upper - bound. The matching succeeds if the - lower bound is matched exactly. */ - if (lcollseq != fcollseq) - goto range_not_matched; - - goto matched; - } -# else - hcollseq = collseq[cend]; -# endif - } - - if (lcollseq <= hcollseq && fcollseq <= hcollseq) - goto matched; - } -# ifdef WIDE_CHAR_VERSION - range_not_matched: -# endif -#else - /* We use a boring value comparison of the character - values. This is better than comparing using - `strcoll' since the latter would have surprising - and sometimes fatal consequences. */ - UCHAR cend = *p++; - - if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) - cend = *p++; - if (cend == L_('\0')) - return FNM_NOMATCH; - - /* It is a range. */ - if (cold <= fn && fn <= cend) - goto matched; -#endif - - c = *p++; - } - } - - if (c == L_(']')) - break; - } - - if (!not) - return FNM_NOMATCH; - break; - - matched: - /* Skip the rest of the [...] that already matched. */ - do - { - ignore_next: - c = *p++; - - if (c == L_('\0')) - /* [... (unterminated) loses. */ - return FNM_NOMATCH; - - if (!(flags & FNM_NOESCAPE) && c == L_('\\')) - { - if (*p == L_('\0')) - return FNM_NOMATCH; - /* XXX 1003.2d11 is unclear if this is right. */ - ++p; - } - else if (c == L_('[') && *p == L_(':')) - { - int c1 = 0; - const CHAR *startp = p; - - while (1) - { - c = *++p; - if (++c1 == CHAR_CLASS_MAX_LENGTH) - return FNM_NOMATCH; - - if (*p == L_(':') && p[1] == L_(']')) - break; - - if (c < L_('a') || c >= L_('z')) - { - p = startp; - goto ignore_next; - } - } - p += 2; - c = *p++; - } - else if (c == L_('[') && *p == L_('=')) - { - c = *++p; - if (c == L_('\0')) - return FNM_NOMATCH; - c = *++p; - if (c != L_('=') || p[1] != L_(']')) - return FNM_NOMATCH; - p += 2; - c = *p++; - } - else if (c == L_('[') && *p == L_('.')) - { - ++p; - while (1) - { - c = *++p; - if (c == '\0') - return FNM_NOMATCH; - - if (*p == L_('.') && p[1] == L_(']')) - break; - } - p += 2; - c = *p++; - } - } - while (c != L_(']')); - if (not) - return FNM_NOMATCH; - } - break; - - case L_('+'): - case L_('@'): - case L_('!'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, flags); - if (res != -1) - return res; - } - goto normal_match; - - case L_('/'): - if (NO_LEADING_PERIOD (flags)) - { - if (n == string_end || c != (UCHAR) *n) - return FNM_NOMATCH; - - new_no_leading_period = true; - break; - } - /* FALLTHROUGH */ - default: - normal_match: - if (n == string_end || c != FOLD ((UCHAR) *n)) - return FNM_NOMATCH; - } - - no_leading_period = new_no_leading_period; - ++n; - } - - if (n == string_end) - return 0; - - if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) - /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ - return 0; - - return FNM_NOMATCH; -} - - -static const CHAR * -internal_function -END (const CHAR *pattern) -{ - const CHAR *p = pattern; - - while (1) - if (*++p == L_('\0')) - /* This is an invalid pattern. */ - return pattern; - else if (*p == L_('[')) - { - /* Handle brackets special. */ - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - /* Skip the not sign. We have to recognize it because of a possibly - following ']'. */ - if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) - ++p; - /* A leading ']' is recognized as such. */ - if (*p == L_(']')) - ++p; - /* Skip over all characters of the list. */ - while (*p != L_(']')) - if (*p++ == L_('\0')) - /* This is no valid pattern. */ - return pattern; - } - else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') - || *p == L_('!')) && p[1] == L_('(')) - p = END (p + 1); - else if (*p == L_(')')) - break; - - return p + 1; -} - - -static int -internal_function -EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, - bool no_leading_period, int flags) -{ - const CHAR *startp; - size_t level; - struct patternlist - { - struct patternlist *next; - CHAR str[1]; - } *list = NULL; - struct patternlist **lastp = &list; - size_t pattern_len = STRLEN (pattern); - const CHAR *p; - const CHAR *rs; - enum { ALLOCA_LIMIT = 8000 }; - - /* Parse the pattern. Store the individual parts in the list. */ - level = 0; - for (startp = p = pattern + 1; ; ++p) - if (*p == L_('\0')) - /* This is an invalid pattern. */ - return -1; - else if (*p == L_('[')) - { - /* Handle brackets special. */ - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - /* Skip the not sign. We have to recognize it because of a possibly - following ']'. */ - if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) - ++p; - /* A leading ']' is recognized as such. */ - if (*p == L_(']')) - ++p; - /* Skip over all characters of the list. */ - while (*p != L_(']')) - if (*p++ == L_('\0')) - /* This is no valid pattern. */ - return -1; - } - else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') - || *p == L_('!')) && p[1] == L_('(')) - /* Remember the nesting level. */ - ++level; - else if (*p == L_(')')) - { - if (level-- == 0) - { - /* This means we found the end of the pattern. */ -#define NEW_PATTERN \ - struct patternlist *newp; \ - size_t plen; \ - size_t plensize; \ - size_t newpsize; \ - \ - plen = (opt == L_('?') || opt == L_('@') \ - ? pattern_len \ - : p - startp + 1); \ - plensize = plen * sizeof (CHAR); \ - newpsize = offsetof (struct patternlist, str) + plensize; \ - if ((size_t) -1 / sizeof (CHAR) < plen \ - || newpsize < offsetof (struct patternlist, str) \ - || ALLOCA_LIMIT <= newpsize) \ - return -1; \ - newp = (struct patternlist *) alloca (newpsize); \ - *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ - newp->next = NULL; \ - *lastp = newp; \ - lastp = &newp->next - NEW_PATTERN; - break; - } - } - else if (*p == L_('|')) - { - if (level == 0) - { - NEW_PATTERN; - startp = p + 1; - } - } - assert (list != NULL); - assert (p[-1] == L_(')')); -#undef NEW_PATTERN - - switch (opt) - { - case L_('*'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) - return 0; - /* FALLTHROUGH */ - - case L_('+'): - do - { - for (rs = string; rs <= string_end; ++rs) - /* First match the prefix with the current pattern with the - current pattern. */ - if (FCT (list->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 - /* This was successful. Now match the rest with the rest - of the pattern. */ - && (FCT (p, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0 - /* This didn't work. Try the whole pattern. */ - || (rs != string - && FCT (pattern - 1, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0))) - /* It worked. Signal success. */ - return 0; - } - while ((list = list->next) != NULL); - - /* None of the patterns lead to a match. */ - return FNM_NOMATCH; - - case L_('?'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) - return 0; - /* FALLTHROUGH */ - - case L_('@'): - do - /* I cannot believe it but `strcat' is actually acceptable - here. Match the entire string with the prefix from the - pattern list and the rest of the pattern following the - pattern list. */ - if (FCT (STRCAT (list->str, p), string, string_end, - no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) - /* It worked. Signal success. */ - return 0; - while ((list = list->next) != NULL); - - /* None of the patterns lead to a match. */ - return FNM_NOMATCH; - - case L_('!'): - for (rs = string; rs <= string_end; ++rs) - { - struct patternlist *runp; - - for (runp = list; runp != NULL; runp = runp->next) - if (FCT (runp->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) - break; - - /* If none of the patterns matched see whether the rest does. */ - if (runp == NULL - && (FCT (p, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) - == 0)) - /* This is successful. */ - return 0; - } - - /* None of the patterns together with the rest of the pattern - lead to a match. */ - return FNM_NOMATCH; - - default: - assert (! "Invalid extended matching operator"); - break; - } - - return -1; -} - - -#undef FOLD -#undef CHAR -#undef UCHAR -#undef INT -#undef FCT -#undef EXT -#undef END -#undef MEMPCPY -#undef MEMCHR -#undef STRCOLL -#undef STRLEN -#undef STRCAT -#undef L_ -#undef BTOWC diff --git a/lib/getcwd.c b/lib/getcwd.c deleted file mode 100644 index 0ac5cdc..0000000 --- a/lib/getcwd.c +++ /dev/null @@ -1,425 +0,0 @@ -/* Copyright (C) 1991,92,93,94,95,96,97,98,99,2004,2005,2006,2007 Free Software - Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#if !_LIBC -# include <config.h> -# include "getcwd.h" -#endif - -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <stdbool.h> -#include <stddef.h> - -#include <fcntl.h> /* For AT_FDCWD on Solaris 9. */ - -#ifndef __set_errno -# define __set_errno(val) (errno = (val)) -#endif - -#include <dirent.h> -#ifndef _D_EXACT_NAMLEN -# define _D_EXACT_NAMLEN(d) strlen ((d)->d_name) -#endif -#ifndef _D_ALLOC_NAMLEN -# define _D_ALLOC_NAMLEN(d) (_D_EXACT_NAMLEN (d) + 1) -#endif - -#include <unistd.h> -#include <stdlib.h> -#include <string.h> - -#if _LIBC -# ifndef mempcpy -# define mempcpy __mempcpy -# endif -#endif - -#include <limits.h> - -/* Work around a bug in Solaris 9 and 10: AT_FDCWD is positive. Its - value exceeds INT_MAX, so its use as an int doesn't conform to the - C standard, and GCC and Sun C complain in some cases. */ -#if 0 < AT_FDCWD && AT_FDCWD == 0xffd19553 -# undef AT_FDCWD -# define AT_FDCWD (-3041965) -#endif - -#ifdef ENAMETOOLONG -# define is_ENAMETOOLONG(x) ((x) == ENAMETOOLONG) -#else -# define is_ENAMETOOLONG(x) 0 -#endif - -#ifndef MAX -# define MAX(a, b) ((a) < (b) ? (b) : (a)) -#endif -#ifndef MIN -# define MIN(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#ifndef PATH_MAX -# ifdef MAXPATHLEN -# define PATH_MAX MAXPATHLEN -# else -# define PATH_MAX 1024 -# endif -#endif - -#if D_INO_IN_DIRENT -# define MATCHING_INO(dp, ino) ((dp)->d_ino == (ino)) -#else -# define MATCHING_INO(dp, ino) true -#endif - -#if !_LIBC -# define __getcwd getcwd -# define __lstat lstat -# define __closedir closedir -# define __opendir opendir -# define __readdir readdir -#endif - -/* The results of opendir() in this file are not used with dirfd and fchdir, - therefore save some unnecessary recursion in fchdir.c. */ -#undef opendir -#undef closedir - -/* Get the name of the current working directory, and put it in SIZE - bytes of BUF. Returns NULL if the directory couldn't be determined or - SIZE was too small. If successful, returns BUF. In GNU, if BUF is - NULL, an array is allocated with `malloc'; the array is SIZE bytes long, - unless SIZE == 0, in which case it is as big as necessary. */ - -char * -__getcwd (char *buf, size_t size) -{ - /* Lengths of big file name components and entire file names, and a - deep level of file name nesting. These numbers are not upper - bounds; they are merely large values suitable for initial - allocations, designed to be large enough for most real-world - uses. */ - enum - { - BIG_FILE_NAME_COMPONENT_LENGTH = 255, - BIG_FILE_NAME_LENGTH = MIN (4095, PATH_MAX - 1), - DEEP_NESTING = 100 - }; - -#ifdef AT_FDCWD - int fd = AT_FDCWD; - bool fd_needs_closing = false; -#else - char dots[DEEP_NESTING * sizeof ".." + BIG_FILE_NAME_COMPONENT_LENGTH + 1]; - char *dotlist = dots; - size_t dotsize = sizeof dots; - size_t dotlen = 0; -#endif - DIR *dirstream = NULL; - dev_t rootdev, thisdev; - ino_t rootino, thisino; - char *dir; - register char *dirp; - struct stat st; - size_t allocated = size; - size_t used; - -#if HAVE_PARTLY_WORKING_GETCWD - /* The system getcwd works, except it sometimes fails when it - shouldn't, setting errno to ERANGE, ENAMETOOLONG, or ENOENT. If - AT_FDCWD is not defined, the algorithm below is O(N**2) and this - is much slower than the system getcwd (at least on GNU/Linux). - So trust the system getcwd's results unless they look - suspicious. - - Use the system getcwd even if we have openat support, since the - system getcwd works even when a parent is unreadable, while the - openat-based approach does not. */ - -# undef getcwd - dir = getcwd (buf, size); - if (dir || (errno != ERANGE && !is_ENAMETOOLONG (errno) && errno != ENOENT)) - return dir; -#endif - - if (size == 0) - { - if (buf != NULL) - { - __set_errno (EINVAL); - return NULL; - } - - allocated = BIG_FILE_NAME_LENGTH + 1; - } - - if (buf == NULL) - { - dir = malloc (allocated); - if (dir == NULL) - return NULL; - } - else - dir = buf; - - dirp = dir + allocated; - *--dirp = '\0'; - - if (__lstat (".", &st) < 0) - goto lose; - thisdev = st.st_dev; - thisino = st.st_ino; - - if (__lstat ("/", &st) < 0) - goto lose; - rootdev = st.st_dev; - rootino = st.st_ino; - - while (!(thisdev == rootdev && thisino == rootino)) - { - struct dirent *d; - dev_t dotdev; - ino_t dotino; - bool mount_point; - int parent_status; - size_t dirroom; - size_t namlen; - bool use_d_ino = true; - - /* Look at the parent directory. */ -#ifdef AT_FDCWD - fd = openat (fd, "..", O_RDONLY); - if (fd < 0) - goto lose; - fd_needs_closing = true; - parent_status = fstat (fd, &st); -#else - dotlist[dotlen++] = '.'; - dotlist[dotlen++] = '.'; - dotlist[dotlen] = '\0'; - parent_status = __lstat (dotlist, &st); -#endif - if (parent_status != 0) - goto lose; - - if (dirstream && __closedir (dirstream) != 0) - { - dirstream = NULL; - goto lose; - } - - /* Figure out if this directory is a mount point. */ - dotdev = st.st_dev; - dotino = st.st_ino; - mount_point = dotdev != thisdev; - - /* Search for the last directory. */ -#ifdef AT_FDCWD - dirstream = fdopendir (fd); - if (dirstream == NULL) - goto lose; - fd_needs_closing = false; -#else - dirstream = __opendir (dotlist); - if (dirstream == NULL) - goto lose; - dotlist[dotlen++] = '/'; -#endif - for (;;) - { - /* Clear errno to distinguish EOF from error if readdir returns - NULL. */ - __set_errno (0); - d = __readdir (dirstream); - - /* When we've iterated through all directory entries without finding - one with a matching d_ino, rewind the stream and consider each - name again, but this time, using lstat. This is necessary in a - chroot on at least one system (glibc-2.3.6 + linux 2.6.12), where - .., ../.., ../../.., etc. all had the same device number, yet the - d_ino values for entries in / did not match those obtained - via lstat. */ - if (d == NULL && errno == 0 && use_d_ino) - { - use_d_ino = false; - rewinddir (dirstream); - d = __readdir (dirstream); - } - - if (d == NULL) - { - if (errno == 0) - /* EOF on dirstream, which can mean e.g., that the current - directory has been removed. */ - __set_errno (ENOENT); - goto lose; - } - if (d->d_name[0] == '.' && - (d->d_name[1] == '\0' || - (d->d_name[1] == '.' && d->d_name[2] == '\0'))) - continue; - - if (use_d_ino) - { - bool match = (MATCHING_INO (d, thisino) || mount_point); - if (! match) - continue; - } - - { - int entry_status; -#ifdef AT_FDCWD - entry_status = fstatat (fd, d->d_name, &st, AT_SYMLINK_NOFOLLOW); -#else - /* Compute size needed for this file name, or for the file - name ".." in the same directory, whichever is larger. - Room for ".." might be needed the next time through - the outer loop. */ - size_t name_alloc = _D_ALLOC_NAMLEN (d); - size_t filesize = dotlen + MAX (sizeof "..", name_alloc); - - if (filesize < dotlen) - goto memory_exhausted; - - if (dotsize < filesize) - { - /* My, what a deep directory tree you have, Grandma. */ - size_t newsize = MAX (filesize, dotsize * 2); - size_t i; - if (newsize < dotsize) - goto memory_exhausted; - if (dotlist != dots) - free (dotlist); - dotlist = malloc (newsize); - if (dotlist == NULL) - goto lose; - dotsize = newsize; - - i = 0; - do - { - dotlist[i++] = '.'; - dotlist[i++] = '.'; - dotlist[i++] = '/'; - } - while (i < dotlen); - } - - memcpy (dotlist + dotlen, d->d_name, _D_ALLOC_NAMLEN (d)); - entry_status = __lstat (dotlist, &st); -#endif - /* We don't fail here if we cannot stat() a directory entry. - This can happen when (network) file systems fail. If this - entry is in fact the one we are looking for we will find - out soon as we reach the end of the directory without - having found anything. */ - if (entry_status == 0 && S_ISDIR (st.st_mode) - && st.st_dev == thisdev && st.st_ino == thisino) - break; - } - } - - dirroom = dirp - dir; - namlen = _D_EXACT_NAMLEN (d); - - if (dirroom <= namlen) - { - if (size != 0) - { - __set_errno (ERANGE); - goto lose; - } - else - { - char *tmp; - size_t oldsize = allocated; - - allocated += MAX (allocated, namlen); - if (allocated < oldsize - || ! (tmp = realloc (dir, allocated))) - goto memory_exhausted; - - /* Move current contents up to the end of the buffer. - This is guaranteed to be non-overlapping. */ - dirp = memcpy (tmp + allocated - (oldsize - dirroom), - tmp + dirroom, - oldsize - dirroom); - dir = tmp; - } - } - dirp -= namlen; - memcpy (dirp, d->d_name, namlen); - *--dirp = '/'; - - thisdev = dotdev; - thisino = dotino; - } - - if (dirstream && __closedir (dirstream) != 0) - { - dirstream = NULL; - goto lose; - } - - if (dirp == &dir[allocated - 1]) - *--dirp = '/'; - -#ifndef AT_FDCWD - if (dotlist != dots) - free (dotlist); -#endif - - used = dir + allocated - dirp; - memmove (dir, dirp, used); - - if (buf == NULL && size == 0) - /* Ensure that the buffer is only as large as necessary. */ - buf = realloc (dir, used); - - if (buf == NULL) - /* Either buf was NULL all along, or `realloc' failed but - we still have the original string. */ - buf = dir; - - return buf; - - memory_exhausted: - __set_errno (ENOMEM); - lose: - { - int save = errno; - if (dirstream) - __closedir (dirstream); -#ifdef AT_FDCWD - if (fd_needs_closing) - close (fd); -#else - if (dotlist != dots) - free (dotlist); -#endif - if (buf == NULL) - free (dir); - __set_errno (save); - } - return NULL; -} - -#ifdef weak_alias -weak_alias (__getcwd, getcwd) -#endif diff --git a/lib/getcwd.h b/lib/getcwd.h deleted file mode 100644 index 59606dd..0000000 --- a/lib/getcwd.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Get the working directory, compatibly with the GNU C Library. - - Copyright (C) 2004-2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert. */ - -/* Include the headers that might declare getcwd so that they will not - cause confusion if included after this file. */ - -#include <stdlib.h> -#include <unistd.h> - -/* If necessary, systematically rename identifiers so that they do not - collide with the system function. Renaming avoids problems with - some compilers and linkers. */ - -#ifdef __GETCWD_PREFIX -# undef getcwd -# define __GETCWD_CONCAT(x, y) x ## y -# define __GETCWD_XCONCAT(x, y) __GETCWD_CONCAT (x, y) -# define __GETCWD_ID(y) __GETCWD_XCONCAT (__GETCWD_PREFIX, y) -# define getcwd __GETCWD_ID (getcwd) -/* See the POSIX:2001 specification - <http://www.opengroup.org/susv3xsh/getcwd.html>. */ -char *getcwd (char *, size_t); -#endif diff --git a/lib/getopt.c b/lib/getopt.c deleted file mode 100644 index 3580ad8..0000000 --- a/lib/getopt.c +++ /dev/null @@ -1,1191 +0,0 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to drepper@gnu.org - before changing it! - Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _LIBC -# include <config.h> -#endif - -#include "getopt.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#ifdef __VMS -# include <unixlib.h> -#endif - -#ifdef _LIBC -# include <libintl.h> -#else -# include "gettext.h" -# define _(msgid) gettext (msgid) -#endif - -#if defined _LIBC && defined USE_IN_LIBIO -# include <wchar.h> -#endif - -#ifndef attribute_hidden -# define attribute_hidden -#endif - -/* Unlike standard Unix `getopt', functions like `getopt_long' - let the user intersperse the options with the other arguments. - - As `getopt_long' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Using `getopt' or setting the environment variable POSIXLY_CORRECT - disables permutation. - Then the application's behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt_int.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* 1003.2 says this must be 1 before any call. */ -int optind = 1; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Keep a global copy of all internal members of getopt_data. */ - -static struct _getopt_data getopt_data; - - -#if defined HAVE_DECL_GETENV && !HAVE_DECL_GETENV -extern char *getenv (); -#endif - -#ifdef _LIBC -/* Stored original parameters. - XXX This is no good solution. We should rather copy the args so - that we can compare them later. But we must not use malloc(3). */ -extern int __libc_argc; -extern char **__libc_argv; - -/* Bash 2.0 gives us an environment variable containing flags - indicating ARGV elements that should not be considered arguments. */ - -# ifdef USE_NONOPTION_FLAGS -/* Defined in getopt_init.c */ -extern char *__getopt_nonoption_flags; -# endif - -# ifdef USE_NONOPTION_FLAGS -# define SWAP_FLAGS(ch1, ch2) \ - if (d->__nonoption_flags_len > 0) \ - { \ - char __tmp = __getopt_nonoption_flags[ch1]; \ - __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ - __getopt_nonoption_flags[ch2] = __tmp; \ - } -# else -# define SWAP_FLAGS(ch1, ch2) -# endif -#else /* !_LIBC */ -# define SWAP_FLAGS(ch1, ch2) -#endif /* _LIBC */ - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -static void -exchange (char **argv, struct _getopt_data *d) -{ - int bottom = d->__first_nonopt; - int middle = d->__last_nonopt; - int top = d->optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - /* First make sure the handling of the `__getopt_nonoption_flags' - string can work normally. Our top argument must be in the range - of the string. */ - if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len) - { - /* We must extend the array. The user plays games with us and - presents new arguments. */ - char *new_str = malloc (top + 1); - if (new_str == NULL) - d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0; - else - { - memset (__mempcpy (new_str, __getopt_nonoption_flags, - d->__nonoption_flags_max_len), - '\0', top + 1 - d->__nonoption_flags_max_len); - d->__nonoption_flags_max_len = top + 1; - __getopt_nonoption_flags = new_str; - } - } -#endif - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - SWAP_FLAGS (bottom + i, middle + i); - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - d->__first_nonopt += (d->optind - d->__last_nonopt); - d->__last_nonopt = d->optind; -} - -/* Initialize the internal data when the first call is made. */ - -static const char * -_getopt_initialize (int argc, char **argv, const char *optstring, - int posixly_correct, struct _getopt_data *d) -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - d->__first_nonopt = d->__last_nonopt = d->optind; - - d->__nextchar = NULL; - - d->__posixly_correct = posixly_correct || !!getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - d->__ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - d->__ordering = REQUIRE_ORDER; - ++optstring; - } - else if (d->__posixly_correct) - d->__ordering = REQUIRE_ORDER; - else - d->__ordering = PERMUTE; - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - if (!d->__posixly_correct - && argc == __libc_argc && argv == __libc_argv) - { - if (d->__nonoption_flags_max_len == 0) - { - if (__getopt_nonoption_flags == NULL - || __getopt_nonoption_flags[0] == '\0') - d->__nonoption_flags_max_len = -1; - else - { - const char *orig_str = __getopt_nonoption_flags; - int len = d->__nonoption_flags_max_len = strlen (orig_str); - if (d->__nonoption_flags_max_len < argc) - d->__nonoption_flags_max_len = argc; - __getopt_nonoption_flags = - (char *) malloc (d->__nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) - d->__nonoption_flags_max_len = -1; - else - memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), - '\0', d->__nonoption_flags_max_len - len); - } - } - d->__nonoption_flags_len = d->__nonoption_flags_max_len; - } - else - d->__nonoption_flags_len = 0; -#endif - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns -1. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. - - If POSIXLY_CORRECT is nonzero, behave as if the POSIXLY_CORRECT - environment variable were set. */ - -int -_getopt_internal_r (int argc, char **argv, const char *optstring, - const struct option *longopts, int *longind, - int long_only, int posixly_correct, struct _getopt_data *d) -{ - int print_errors = d->opterr; - if (optstring[0] == ':') - print_errors = 0; - - if (argc < 1) - return -1; - - d->optarg = NULL; - - if (d->optind == 0 || !d->__initialized) - { - if (d->optind == 0) - d->optind = 1; /* Don't scan ARGV[0], the program name. */ - optstring = _getopt_initialize (argc, argv, optstring, - posixly_correct, d); - d->__initialized = 1; - } - - /* Test whether ARGV[optind] points to a non-option argument. - Either it does not have option syntax, or there is an environment flag - from the shell indicating it is not an option. The later information - is only used when the used in the GNU libc. */ -#if defined _LIBC && defined USE_NONOPTION_FLAGS -# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \ - || (d->optind < d->__nonoption_flags_len \ - && __getopt_nonoption_flags[d->optind] == '1')) -#else -# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') -#endif - - if (d->__nextchar == NULL || *d->__nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been - moved back by the user (who may also have changed the arguments). */ - if (d->__last_nonopt > d->optind) - d->__last_nonopt = d->optind; - if (d->__first_nonopt > d->optind) - d->__first_nonopt = d->optind; - - if (d->__ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (d->__first_nonopt != d->__last_nonopt - && d->__last_nonopt != d->optind) - exchange ((char **) argv, d); - else if (d->__last_nonopt != d->optind) - d->__first_nonopt = d->optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (d->optind < argc && NONOPTION_P) - d->optind++; - d->__last_nonopt = d->optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (d->optind != argc && !strcmp (argv[d->optind], "--")) - { - d->optind++; - - if (d->__first_nonopt != d->__last_nonopt - && d->__last_nonopt != d->optind) - exchange ((char **) argv, d); - else if (d->__first_nonopt == d->__last_nonopt) - d->__first_nonopt = d->optind; - d->__last_nonopt = argc; - - d->optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (d->optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (d->__first_nonopt != d->__last_nonopt) - d->optind = d->__first_nonopt; - return -1; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if (NONOPTION_P) - { - if (d->__ordering == REQUIRE_ORDER) - return -1; - d->optarg = argv[d->optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - d->__nextchar = (argv[d->optind] + 1 - + (longopts != NULL && argv[d->optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[d->optind][1] == '-' - || (long_only && (argv[d->optind][2] - || !strchr (optstring, argv[d->optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = -1; - int option_index; - - for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) - { - if ((unsigned int) (nameend - d->__nextchar) - == (unsigned int) strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[d->optind]) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[d->optind]); -#endif - } - d->__nextchar += strlen (d->__nextchar); - d->optind++; - d->optopt = 0; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - d->optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - d->optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - - if (argv[d->optind - 1][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#else - fprintf (stderr, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[d->optind - 1][0], - pfound->name); -#else - fprintf (stderr, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[d->optind - 1][0], - pfound->name); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - - d->__nextchar += strlen (d->__nextchar); - - d->optopt = pfound->val; - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (d->optind < argc) - d->optarg = argv[d->optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option `%s' requires an argument\n"), - argv[0], argv[d->optind - 1]) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[d->optind - 1]); -#endif - } - d->__nextchar += strlen (d->__nextchar); - d->optopt = pfound->val; - return optstring[0] == ':' ? ':' : '?'; - } - } - d->__nextchar += strlen (d->__nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[d->optind][1] == '-' - || strchr (optstring, *d->__nextchar) == NULL) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - - if (argv[d->optind][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), - argv[0], d->__nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `--%s'\n"), - argv[0], d->__nextchar); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[d->optind][0], d->__nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[d->optind][0], d->__nextchar); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - d->__nextchar = (char *) ""; - d->optind++; - d->optopt = 0; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *d->__nextchar++; - char *temp = strchr (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*d->__nextchar == '\0') - ++d->optind; - - if (temp == NULL || c == ':') - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - - if (d->__posixly_correct) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: illegal option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); -#endif - } - else - { -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: invalid option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - d->optopt = c; - return '?'; - } - /* Convenience. Treat POSIX -W foo same as long option --foo */ - if (temp[0] == 'W' && temp[1] == ';') - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = 0; - int option_index; - - /* This is an option that requires an argument. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - d->optind++; - } - else if (d->optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, - _("%s: option requires an argument -- %c\n"), - argv[0], c) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - d->optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - return c; - } - else - /* We already incremented `d->optind' once; - increment it again when taking next ARGV-elt as argument. */ - d->optarg = argv[d->optind++]; - - /* optarg is now the argument, see if it's in the - table of longopts. */ - - for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '='; - nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) - { - if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[d->optind]) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[d->optind]); -#endif - } - d->__nextchar += strlen (d->__nextchar); - d->optind++; - return '?'; - } - if (pfound != NULL) - { - option_index = indfound; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - d->optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - - d->__nextchar += strlen (d->__nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (d->optind < argc) - d->optarg = argv[d->optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option `%s' requires an argument\n"), - argv[0], argv[d->optind - 1]) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[d->optind - 1]); -#endif - } - d->__nextchar += strlen (d->__nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - d->__nextchar += strlen (d->__nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - d->__nextchar = NULL; - return 'W'; /* Let the application handle it. */ - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - d->optind++; - } - else - d->optarg = NULL; - d->__nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - d->optind++; - } - else if (d->optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option requires an argument -- %c\n"), - argv[0], c) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - d->optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - d->optarg = argv[d->optind++]; - d->__nextchar = NULL; - } - } - return c; - } -} - -int -_getopt_internal (int argc, char **argv, const char *optstring, - const struct option *longopts, int *longind, - int long_only, int posixly_correct) -{ - int result; - - getopt_data.optind = optind; - getopt_data.opterr = opterr; - - result = _getopt_internal_r (argc, argv, optstring, longopts, longind, - long_only, posixly_correct, &getopt_data); - - optind = getopt_data.optind; - optarg = getopt_data.optarg; - optopt = getopt_data.optopt; - - return result; -} - -/* glibc gets a LSB-compliant getopt. - Standalone applications get a POSIX-compliant getopt. */ -#if _LIBC -enum { POSIXLY_CORRECT = 0 }; -#else -enum { POSIXLY_CORRECT = 1 }; -#endif - -int -getopt (int argc, char *const *argv, const char *optstring) -{ - return _getopt_internal (argc, (char **) argv, optstring, NULL, NULL, 0, - POSIXLY_CORRECT); -} - - -#ifdef TEST - -/* Compile with -DTEST to make an executable for use in testing - the above definition of `getopt'. */ - -int -main (int argc, char **argv) -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - - c = getopt (argc, argv, "abc:d:0123456789"); - if (c == -1) - break; - - switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/lib/getopt1.c b/lib/getopt1.c deleted file mode 100644 index cc0746e..0000000 --- a/lib/getopt1.c +++ /dev/null @@ -1,171 +0,0 @@ -/* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifdef _LIBC -# include <getopt.h> -#else -# include <config.h> -# include "getopt.h" -#endif -#include "getopt_int.h" - -#include <stdio.h> - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -#include <stdlib.h> -#endif - -#ifndef NULL -#define NULL 0 -#endif - -int -getopt_long (int argc, char *__getopt_argv_const *argv, const char *options, - const struct option *long_options, int *opt_index) -{ - return _getopt_internal (argc, (char **) argv, options, long_options, - opt_index, 0, 0); -} - -int -_getopt_long_r (int argc, char **argv, const char *options, - const struct option *long_options, int *opt_index, - struct _getopt_data *d) -{ - return _getopt_internal_r (argc, argv, options, long_options, opt_index, - 0, 0, d); -} - -/* Like getopt_long, but '-' as well as '--' can indicate a long option. - If an option that starts with '-' (not '--') doesn't match a long option, - but does match a short option, it is parsed as a short option - instead. */ - -int -getopt_long_only (int argc, char *__getopt_argv_const *argv, - const char *options, - const struct option *long_options, int *opt_index) -{ - return _getopt_internal (argc, (char **) argv, options, long_options, - opt_index, 1, 0); -} - -int -_getopt_long_only_r (int argc, char **argv, const char *options, - const struct option *long_options, int *opt_index, - struct _getopt_data *d) -{ - return _getopt_internal_r (argc, argv, options, long_options, opt_index, - 1, 0, d); -} - - -#ifdef TEST - -#include <stdio.h> - -int -main (int argc, char **argv) -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - int option_index = 0; - static struct option long_options[] = - { - {"add", 1, 0, 0}, - {"append", 0, 0, 0}, - {"delete", 1, 0, 0}, - {"verbose", 0, 0, 0}, - {"create", 0, 0, 0}, - {"file", 1, 0, 0}, - {0, 0, 0, 0} - }; - - c = getopt_long (argc, argv, "abc:d:0123456789", - long_options, &option_index); - if (c == -1) - break; - - switch (c) - { - case 0: - printf ("option %s", long_options[option_index].name); - if (optarg) - printf (" with arg %s", optarg); - printf ("\n"); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case 'd': - printf ("option d with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/lib/getopt_.h b/lib/getopt_.h deleted file mode 100644 index 27fce3d..0000000 --- a/lib/getopt_.h +++ /dev/null @@ -1,226 +0,0 @@ -/* Declarations for getopt. - Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2005,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _GETOPT_H - -#ifndef __need_getopt -# define _GETOPT_H 1 -#endif - -/* Standalone applications should #define __GETOPT_PREFIX to an - identifier that prefixes the external functions and variables - defined in this header. When this happens, include the - headers that might declare getopt so that they will not cause - confusion if included after this file. Then systematically rename - identifiers so that they do not collide with the system functions - and variables. Renaming avoids problems with some compilers and - linkers. */ -#if defined __GETOPT_PREFIX && !defined __need_getopt -# include <stdlib.h> -# include <stdio.h> -# include <unistd.h> -# undef __need_getopt -# undef getopt -# undef getopt_long -# undef getopt_long_only -# undef optarg -# undef opterr -# undef optind -# undef optopt -# define __GETOPT_CONCAT(x, y) x ## y -# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) -# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) -# define getopt __GETOPT_ID (getopt) -# define getopt_long __GETOPT_ID (getopt_long) -# define getopt_long_only __GETOPT_ID (getopt_long_only) -# define optarg __GETOPT_ID (optarg) -# define opterr __GETOPT_ID (opterr) -# define optind __GETOPT_ID (optind) -# define optopt __GETOPT_ID (optopt) -#endif - -/* Standalone applications get correct prototypes for getopt_long and - getopt_long_only; they declare "char **argv". libc uses prototypes - with "char *const *argv" that are incorrect because getopt_long and - getopt_long_only can permute argv; this is required for backward - compatibility (e.g., for LSB 2.0.1). - - This used to be `#if defined __GETOPT_PREFIX && !defined __need_getopt', - but it caused redefinition warnings if both unistd.h and getopt.h were - included, since unistd.h includes getopt.h having previously defined - __need_getopt. - - The only place where __getopt_argv_const is used is in definitions - of getopt_long and getopt_long_only below, but these are visible - only if __need_getopt is not defined, so it is quite safe to rewrite - the conditional as follows: -*/ -#if !defined __need_getopt -# if defined __GETOPT_PREFIX -# define __getopt_argv_const /* empty */ -# else -# define __getopt_argv_const const -# endif -#endif - -/* If __GNU_LIBRARY__ is not already defined, either we are being used - standalone, or this is the first header included in the source file. - If we are being used with glibc, we need to include <features.h>, but - that does not exist if we are standalone. So: if __GNU_LIBRARY__ is - not defined, include <ctype.h>, which will pull in <features.h> for us - if it's from glibc. (Why ctype.h? It's guaranteed to exist and it - doesn't flood the namespace with stuff the way some other headers do.) */ -#if !defined __GNU_LIBRARY__ -# include <ctype.h> -#endif - -#ifndef __THROW -# ifndef __GNUC_PREREQ -# define __GNUC_PREREQ(maj, min) (0) -# endif -# if defined __cplusplus && __GNUC_PREREQ (2,8) -# define __THROW throw () -# else -# define __THROW -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -#ifndef __need_getopt -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ - const char *name; - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -# define no_argument 0 -# define required_argument 1 -# define optional_argument 2 -#endif /* need getopt */ - - -/* Get definitions and prototypes for functions to process the - arguments in ARGV (ARGC of them, minus the program name) for - options given in OPTS. - - Return the option character from OPTS just read. Return -1 when - there are no more options. For unrecognized options, or options - missing arguments, `optopt' is set to the option letter, and '?' is - returned. - - The OPTS string is a list of characters which are recognized option - letters, optionally followed by colons, specifying that that letter - takes an argument, to be placed in `optarg'. - - If a letter in OPTS is followed by two colons, its argument is - optional. This behavior is specific to the GNU `getopt'. - - The argument `--' causes premature termination of argument - scanning, explicitly telling `getopt' that there are no more - options. - - If OPTS begins with `-', then non-option arguments are treated as - arguments to the option '\1'. This behavior is specific to the GNU - `getopt'. If OPTS begins with `+', or POSIXLY_CORRECT is set in - the environment, then do not permute arguments. */ - -extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) - __THROW; - -#ifndef __need_getopt -extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind) - __THROW; -extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind) - __THROW; - -#endif - -#ifdef __cplusplus -} -#endif - -/* Make sure we later can get all the definitions and declarations. */ -#undef __need_getopt - -#endif /* getopt.h */ diff --git a/lib/getopt_int.h b/lib/getopt_int.h deleted file mode 100644 index 401579f..0000000 --- a/lib/getopt_int.h +++ /dev/null @@ -1,131 +0,0 @@ -/* Internal declarations for getopt. - Copyright (C) 1989-1994,1996-1999,2001,2003,2004 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _GETOPT_INT_H -#define _GETOPT_INT_H 1 - -extern int _getopt_internal (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only, int __posixly_correct); - - -/* Reentrant versions which can handle parsing multiple argument - vectors at the same time. */ - -/* Data type for reentrant functions. */ -struct _getopt_data -{ - /* These have exactly the same meaning as the corresponding global - variables, except that they are used for the reentrant - versions of getopt. */ - int optind; - int opterr; - int optopt; - char *optarg; - - /* Internal members. */ - - /* True if the internal members have been initialized. */ - int __initialized; - - /* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - char *__nextchar; - - /* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters, or by calling getopt. - - PERMUTE is the default. We permute the contents of ARGV as we - scan, so that eventually all the non-options are at the end. - This allows options to be given in any order, even with programs - that were not written to expect this. - - RETURN_IN_ORDER is an option available to programs that were - written to expect options and other ARGV-elements in any order - and that care about the ordering of the two. We describe each - non-option ARGV-element as if it were the argument of an option - with character code 1. Using `-' as the first character of the - list of option characters selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return -1 with `optind' != ARGC. */ - - enum - { - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER - } __ordering; - - /* If the POSIXLY_CORRECT environment variable is set - or getopt was called. */ - int __posixly_correct; - - - /* Handle permutation of arguments. */ - - /* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first - of them; `last_nonopt' is the index after the last of them. */ - - int __first_nonopt; - int __last_nonopt; - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - int __nonoption_flags_max_len; - int __nonoption_flags_len; -# endif -}; - -/* The initializer is necessary to set OPTIND and OPTERR to their - default values and to clear the initialization flag. */ -#define _GETOPT_DATA_INITIALIZER { 1, 1 } - -extern int _getopt_internal_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only, int __posixly_correct, - struct _getopt_data *__data); - -extern int _getopt_long_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - struct _getopt_data *__data); - -extern int _getopt_long_only_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, - int *__longind, - struct _getopt_data *__data); - -#endif /* getopt_int.h */ diff --git a/lib/gettext.h b/lib/gettext.h deleted file mode 100644 index 9d76ec9..0000000 --- a/lib/gettext.h +++ /dev/null @@ -1,270 +0,0 @@ -/* Convenience header for conditional use of GNU <libintl.h>. - Copyright (C) 1995-1998, 2000-2002, 2004-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _LIBGETTEXT_H -#define _LIBGETTEXT_H 1 - -/* NLS can be disabled through the configure --disable-nls option. */ -#if ENABLE_NLS - -/* Get declarations of GNU message catalog functions. */ -# include <libintl.h> - -/* You can set the DEFAULT_TEXT_DOMAIN macro to specify the domain used by - the gettext() and ngettext() macros. This is an alternative to calling - textdomain(), and is useful for libraries. */ -# ifdef DEFAULT_TEXT_DOMAIN -# undef gettext -# define gettext(Msgid) \ - dgettext (DEFAULT_TEXT_DOMAIN, Msgid) -# undef ngettext -# define ngettext(Msgid1, Msgid2, N) \ - dngettext (DEFAULT_TEXT_DOMAIN, Msgid1, Msgid2, N) -# endif - -#else - -/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which - chokes if dcgettext is defined as a macro. So include it now, to make - later inclusions of <locale.h> a NOP. We don't include <libintl.h> - as well because people using "gettext.h" will not include <libintl.h>, - and also including <libintl.h> would fail on SunOS 4, whereas <locale.h> - is OK. */ -#if defined(__sun) -# include <locale.h> -#endif - -/* Many header files from the libstdc++ coming with g++ 3.3 or newer include - <libintl.h>, which chokes if dcgettext is defined as a macro. So include - it now, to make later inclusions of <libintl.h> a NOP. */ -#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3) -# include <cstdlib> -# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H -# include <libintl.h> -# endif -#endif - -/* Disabled NLS. - The casts to 'const char *' serve the purpose of producing warnings - for invalid uses of the value returned from these functions. - On pre-ANSI systems without 'const', the config.h file is supposed to - contain "#define const". */ -# define gettext(Msgid) ((const char *) (Msgid)) -# define dgettext(Domainname, Msgid) ((void) (Domainname), gettext (Msgid)) -# define dcgettext(Domainname, Msgid, Category) \ - ((void) (Category), dgettext (Domainname, Msgid)) -# define ngettext(Msgid1, Msgid2, N) \ - ((N) == 1 \ - ? ((void) (Msgid2), (const char *) (Msgid1)) \ - : ((void) (Msgid1), (const char *) (Msgid2))) -# define dngettext(Domainname, Msgid1, Msgid2, N) \ - ((void) (Domainname), ngettext (Msgid1, Msgid2, N)) -# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \ - ((void) (Category), dngettext(Domainname, Msgid1, Msgid2, N)) -# define textdomain(Domainname) ((const char *) (Domainname)) -# define bindtextdomain(Domainname, Dirname) \ - ((void) (Domainname), (const char *) (Dirname)) -# define bind_textdomain_codeset(Domainname, Codeset) \ - ((void) (Domainname), (const char *) (Codeset)) - -#endif - -/* A pseudo function call that serves as a marker for the automated - extraction of messages, but does not call gettext(). The run-time - translation is done at a different place in the code. - The argument, String, should be a literal string. Concatenated strings - and other string expressions won't work. - The macro's expansion is not parenthesized, so that it is suitable as - initializer for static 'char[]' or 'const char[]' variables. */ -#define gettext_noop(String) String - -/* The separator between msgctxt and msgid in a .mo file. */ -#define GETTEXT_CONTEXT_GLUE "\004" - -/* Pseudo function calls, taking a MSGCTXT and a MSGID instead of just a - MSGID. MSGCTXT and MSGID must be string literals. MSGCTXT should be - short and rarely need to change. - The letter 'p' stands for 'particular' or 'special'. */ -#ifdef DEFAULT_TEXT_DOMAIN -# define pgettext(Msgctxt, Msgid) \ - pgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#else -# define pgettext(Msgctxt, Msgid) \ - pgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#endif -#define dpgettext(Domainname, Msgctxt, Msgid) \ - pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#define dcpgettext(Domainname, Msgctxt, Msgid, Category) \ - pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, Category) -#ifdef DEFAULT_TEXT_DOMAIN -# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#else -# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#endif -#define dnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#define dcnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N, Category) \ - npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, Category) - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static const char * -pgettext_aux (const char *domain, - const char *msg_ctxt_id, const char *msgid, - int category) -{ - const char *translation = dcgettext (domain, msg_ctxt_id, category); - if (translation == msg_ctxt_id) - return msgid; - else - return translation; -} - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static const char * -npgettext_aux (const char *domain, - const char *msg_ctxt_id, const char *msgid, - const char *msgid_plural, unsigned long int n, - int category) -{ - const char *translation = - dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); - if (translation == msg_ctxt_id || translation == msgid_plural) - return (n == 1 ? msgid : msgid_plural); - else - return translation; -} - -/* The same thing extended for non-constant arguments. Here MSGCTXT and MSGID - can be arbitrary expressions. But for string literals these macros are - less efficient than those above. */ - -#include <string.h> - -#define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS \ - (((__GNUC__ >= 3 || __GNUG__ >= 2) && !__STRICT_ANSI__) \ - /* || __STDC_VERSION__ >= 199901L */ ) - -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS -#include <stdlib.h> -#endif - -#define pgettext_expr(Msgctxt, Msgid) \ - dcpgettext_expr (NULL, Msgctxt, Msgid, LC_MESSAGES) -#define dpgettext_expr(Domainname, Msgctxt, Msgid) \ - dcpgettext_expr (Domainname, Msgctxt, Msgid, LC_MESSAGES) - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static const char * -dcpgettext_expr (const char *domain, - const char *msgctxt, const char *msgid, - int category) -{ - size_t msgctxt_len = strlen (msgctxt) + 1; - size_t msgid_len = strlen (msgid) + 1; - const char *translation; -#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - char msg_ctxt_id[msgctxt_len + msgid_len]; -#else - char buf[1024]; - char *msg_ctxt_id = - (msgctxt_len + msgid_len <= sizeof (buf) - ? buf - : (char *) malloc (msgctxt_len + msgid_len)); - if (msg_ctxt_id != NULL) -#endif - { - memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); - msg_ctxt_id[msgctxt_len - 1] = '\004'; - memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); - translation = dcgettext (domain, msg_ctxt_id, category); -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - if (msg_ctxt_id != buf) - free (msg_ctxt_id); -#endif - if (translation != msg_ctxt_id) - return translation; - } - return msgid; -} - -#define npgettext_expr(Msgctxt, Msgid, MsgidPlural, N) \ - dcnpgettext_expr (NULL, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) -#define dnpgettext_expr(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ - dcnpgettext_expr (Domainname, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static const char * -dcnpgettext_expr (const char *domain, - const char *msgctxt, const char *msgid, - const char *msgid_plural, unsigned long int n, - int category) -{ - size_t msgctxt_len = strlen (msgctxt) + 1; - size_t msgid_len = strlen (msgid) + 1; - const char *translation; -#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - char msg_ctxt_id[msgctxt_len + msgid_len]; -#else - char buf[1024]; - char *msg_ctxt_id = - (msgctxt_len + msgid_len <= sizeof (buf) - ? buf - : (char *) malloc (msgctxt_len + msgid_len)); - if (msg_ctxt_id != NULL) -#endif - { - memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); - msg_ctxt_id[msgctxt_len - 1] = '\004'; - memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); - translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - if (msg_ctxt_id != buf) - free (msg_ctxt_id); -#endif - if (!(translation == msg_ctxt_id || translation == msgid_plural)) - return translation; - } - return (n == 1 ? msgid : msgid_plural); -} - -#endif /* _LIBGETTEXT_H */ diff --git a/lib/imaxtostr.c b/lib/imaxtostr.c deleted file mode 100644 index 5e87ad5..0000000 --- a/lib/imaxtostr.c +++ /dev/null @@ -1,3 +0,0 @@ -#define inttostr imaxtostr -#define inttype intmax_t -#include "inttostr.c" diff --git a/lib/intprops.h b/lib/intprops.h deleted file mode 100644 index 34f971c..0000000 --- a/lib/intprops.h +++ /dev/null @@ -1,78 +0,0 @@ -/* intprops.h -- properties of integer types - - Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert. */ - -#include <limits.h> - -/* The extra casts in the following macros work around compiler bugs, - e.g., in Cray C 5.0.3.0. */ - -/* True if the arithmetic type T is an integer type. bool counts as - an integer. */ -#define TYPE_IS_INTEGER(t) ((t) 1.5 == 1) - -/* True if negative values of the signed integer type T use two's - complement, ones' complement, or signed magnitude representation, - respectively. Much GNU code assumes two's complement, but some - people like to be portable to all possible C hosts. */ -#define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1) -#define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0) -#define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1) - -/* True if the arithmetic type T is signed. */ -#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) - -/* The maximum and minimum values for the integer type T. These - macros have undefined behavior if T is signed and has padding bits. - If this is a problem for you, please let us know how to fix it for - your host. */ -#define TYPE_MINIMUM(t) \ - ((t) (! TYPE_SIGNED (t) \ - ? (t) 0 \ - : TYPE_SIGNED_MAGNITUDE (t) \ - ? ~ (t) 0 \ - : ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1))) -#define TYPE_MAXIMUM(t) \ - ((t) (! TYPE_SIGNED (t) \ - ? (t) -1 \ - : ~ (~ (t) 0 << (sizeof (t) * CHAR_BIT - 1)))) - -/* Return zero if T can be determined to be an unsigned type. - Otherwise, return 1. - When compiling with GCC, INT_STRLEN_BOUND uses this macro to obtain a - tighter bound. Otherwise, it overestimates the true bound by one byte - when applied to unsigned types of size 2, 4, 16, ... bytes. - The symbol signed_type_or_expr__ is private to this header file. */ -#if __GNUC__ >= 2 -# define signed_type_or_expr__(t) TYPE_SIGNED (__typeof__ (t)) -#else -# define signed_type_or_expr__(t) 1 -#endif - -/* Bound on length of the string representing an integer type or expression T. - Subtract 1 for the sign bit if T is signed; log10 (2.0) < 146/485; - add 1 for integer division truncation; add 1 more for a minus sign - if needed. */ -#define INT_STRLEN_BOUND(t) \ - ((sizeof (t) * CHAR_BIT - signed_type_or_expr__ (t)) * 146 / 485 \ - + signed_type_or_expr__ (t) + 1) - -/* Bound on buffer size needed to represent an integer type or expression T, - including the terminating null. */ -#define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND (t) + 1) diff --git a/lib/inttostr.c b/lib/inttostr.c deleted file mode 100644 index 246d658..0000000 --- a/lib/inttostr.c +++ /dev/null @@ -1,51 +0,0 @@ -/* inttostr.c -- convert integers to printable strings - - Copyright (C) 2001, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert */ - -#include <config.h> - -#include "inttostr.h" - -/* Convert I to a printable string in BUF, which must be at least - INT_BUFSIZE_BOUND (INTTYPE) bytes long. Return the address of the - printable string, which need not start at BUF. */ - -char * -inttostr (inttype i, char *buf) -{ - char *p = buf + INT_STRLEN_BOUND (inttype); - *p = 0; - - if (i < 0) - { - do - *--p = '0' - i % 10; - while ((i /= 10) != 0); - - *--p = '-'; - } - else - { - do - *--p = '0' + i % 10; - while ((i /= 10) != 0); - } - - return p; -} diff --git a/lib/inttostr.h b/lib/inttostr.h deleted file mode 100644 index 31258ca..0000000 --- a/lib/inttostr.h +++ /dev/null @@ -1,30 +0,0 @@ -/* inttostr.h -- convert integers to printable strings - - Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert */ - -#include <stdint.h> -#include <sys/types.h> - -#include "intprops.h" - -char *offtostr (off_t, char *); -char *imaxtostr (intmax_t, char *); -char *umaxtostr (uintmax_t, char *); -char *uinttostr (unsigned int, char *); diff --git a/lib/lstat.c b/lib/lstat.c deleted file mode 100644 index 77dd228..0000000 --- a/lib/lstat.c +++ /dev/null @@ -1,76 +0,0 @@ -/* Work around a bug of lstat on some systems - - Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* written by Jim Meyering */ - -#include <config.h> - -/* The specification of these functions is in sys_stat.h. But we cannot - include this include file here, because on some systems, a - "#define lstat lstat64" is being used, and sys_stat.h deletes this - definition. */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include <errno.h> - -/* lstat works differently on Linux and Solaris systems. POSIX (see - `pathname resolution' in the glossary) requires that programs like - `ls' take into consideration the fact that FILE has a trailing slash - when FILE is a symbolic link. On Linux and Solaris 10 systems, the - lstat function already has the desired semantics (in treating - `lstat ("symlink/", sbuf)' just like `lstat ("symlink/.", sbuf)', - but on Solaris 9 and earlier it does not. - - If FILE has a trailing slash and specifies a symbolic link, - then use stat() to get more info on the referent of FILE. - If the referent is a non-directory, then set errno to ENOTDIR - and return -1. Otherwise, return stat's result. */ - -int -rpl_lstat (const char *file, struct stat *sbuf) -{ - size_t len; - int lstat_result = lstat (file, sbuf); - - if (lstat_result != 0 || !S_ISLNK (sbuf->st_mode)) - return lstat_result; - - len = strlen (file); - if (len == 0 || file[len - 1] != '/') - return 0; - - /* FILE refers to a symbolic link and the name ends with a slash. - Call stat() to get info about the link's referent. */ - - /* If stat fails, then we do the same. */ - if (stat (file, sbuf) != 0) - return -1; - - /* If FILE references a directory, return 0. */ - if (S_ISDIR (sbuf->st_mode)) - return 0; - - /* Here, we know stat succeeded and FILE references a non-directory. - But it was specified via a name including a trailing slash. - Fail with errno set to ENOTDIR to indicate the contradiction. */ - errno = ENOTDIR; - return -1; -} diff --git a/lib/lstat.h b/lib/lstat.h deleted file mode 100644 index 6a83fbf..0000000 --- a/lib/lstat.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Retrieving information about files. - Copyright (C) 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <sys/stat.h> - -#if !LSTAT_FOLLOWS_SLASHED_SYMLINK -extern int rpl_lstat (const char *name, struct stat *buf); -# undef lstat -# define lstat rpl_lstat -#endif diff --git a/lib/malloc.c b/lib/malloc.c deleted file mode 100644 index d4dae3e..0000000 --- a/lib/malloc.c +++ /dev/null @@ -1,35 +0,0 @@ -/* malloc() function that is glibc compatible. - - Copyright (C) 1997, 1998, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* written by Jim Meyering */ - -#include <config.h> -#undef malloc - -#include <stdlib.h> - -/* Allocate an N-byte block of memory from the heap. - If N is zero, allocate a 1-byte block. */ - -void * -rpl_malloc (size_t n) -{ - if (n == 0) - n = 1; - return malloc (n); -} diff --git a/lib/mbchar.c b/lib/mbchar.c deleted file mode 100644 index 95373f5..0000000 --- a/lib/mbchar.c +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (C) 2001, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - - -#include <config.h> - -#include <limits.h> - -#include "mbchar.h" - -#if IS_BASIC_ASCII - -/* Bit table of characters in the ISO C "basic character set". */ -const unsigned int is_basic_table [UCHAR_MAX / 32 + 1] = -{ - 0x00001a00, /* '\t' '\v' '\f' */ - 0xffffffef, /* ' '...'#' '%'...'?' */ - 0xfffffffe, /* 'A'...'Z' '[' '\\' ']' '^' '_' */ - 0x7ffffffe /* 'a'...'z' '{' '|' '}' '~' */ - /* The remaining bits are 0. */ -}; - -#endif /* IS_BASIC_ASCII */ diff --git a/lib/mbchar.h b/lib/mbchar.h deleted file mode 100644 index f3e28ef..0000000 --- a/lib/mbchar.h +++ /dev/null @@ -1,353 +0,0 @@ -/* Multibyte character data type. - Copyright (C) 2001, 2005-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* Written by Bruno Haible <bruno@clisp.org>. */ - -/* A multibyte character is a short subsequence of a char* string, - representing a single wide character. - - We use multibyte characters instead of wide characters because of - the following goals: - 1) correct multibyte handling, i.e. operate according to the LC_CTYPE - locale, - 2) ease of maintenance, i.e. the maintainer needs not know all details - of the ISO C 99 standard, - 3) don't fail grossly if the input is not in the encoding set by the - locale, because often different encodings are in use in the same - countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...), - 4) fast in the case of ASCII characters, - 5) portability, i.e. don't make unportable assumptions about wchar_t. - - Multibyte characters are only accessed through the mb* macros. - - mb_ptr (mbc) - return a pointer to the beginning of the multibyte sequence. - - mb_len (mbc) - returns the number of bytes occupied by the multibyte sequence. - Always > 0. - - mb_iseq (mbc, sc) - returns true if mbc is the standard ASCII character sc. - - mb_isnul (mbc) - returns true if mbc is the nul character. - - mb_cmp (mbc1, mbc2) - returns a positive, zero, or negative value depending on whether mbc1 - sorts after, same or before mbc2. - - mb_casecmp (mbc1, mbc2) - returns a positive, zero, or negative value depending on whether mbc1 - sorts after, same or before mbc2, modulo upper/lowercase conversion. - - mb_equal (mbc1, mbc2) - returns true if mbc1 and mbc2 are equal. - - mb_caseequal (mbc1, mbc2) - returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion. - - mb_isalnum (mbc) - returns true if mbc is alphanumeric. - - mb_isalpha (mbc) - returns true if mbc is alphabetic. - - mb_isascii(mbc) - returns true if mbc is plain ASCII. - - mb_isblank (mbc) - returns true if mbc is a blank. - - mb_iscntrl (mbc) - returns true if mbc is a control character. - - mb_isdigit (mbc) - returns true if mbc is a decimal digit. - - mb_isgraph (mbc) - returns true if mbc is a graphic character. - - mb_islower (mbc) - returns true if mbc is lowercase. - - mb_isprint (mbc) - returns true if mbc is a printable character. - - mb_ispunct (mbc) - returns true if mbc is a punctuation character. - - mb_isspace (mbc) - returns true if mbc is a space character. - - mb_isupper (mbc) - returns true if mbc is uppercase. - - mb_isxdigit (mbc) - returns true if mbc is a hexadecimal digit. - - mb_width (mbc) - returns the number of columns on the output device occupied by mbc. - Always >= 0. - - mb_putc (mbc, stream) - outputs mbc on stream, a byte oriented FILE stream opened for output. - - mb_setascii (&mbc, sc) - assigns the standard ASCII character sc to mbc. - - mb_copy (&destmbc, &srcmbc) - copies srcmbc to destmbc. - - Here are the function prototypes of the macros. - - extern const char * mb_ptr (const mbchar_t mbc); - extern size_t mb_len (const mbchar_t mbc); - extern bool mb_iseq (const mbchar_t mbc, char sc); - extern bool mb_isnul (const mbchar_t mbc); - extern int mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2); - extern int mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2); - extern bool mb_equal (const mbchar_t mbc1, const mbchar_t mbc2); - extern bool mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2); - extern bool mb_isalnum (const mbchar_t mbc); - extern bool mb_isalpha (const mbchar_t mbc); - extern bool mb_isascii (const mbchar_t mbc); - extern bool mb_isblank (const mbchar_t mbc); - extern bool mb_iscntrl (const mbchar_t mbc); - extern bool mb_isdigit (const mbchar_t mbc); - extern bool mb_isgraph (const mbchar_t mbc); - extern bool mb_islower (const mbchar_t mbc); - extern bool mb_isprint (const mbchar_t mbc); - extern bool mb_ispunct (const mbchar_t mbc); - extern bool mb_isspace (const mbchar_t mbc); - extern bool mb_isupper (const mbchar_t mbc); - extern bool mb_isxdigit (const mbchar_t mbc); - extern int mb_width (const mbchar_t mbc); - extern void mb_putc (const mbchar_t mbc, FILE *stream); - extern void mb_setascii (mbchar_t *new, char sc); - extern void mb_copy (mbchar_t *new, const mbchar_t *old); - */ - -#ifndef _MBCHAR_H -#define _MBCHAR_H 1 - -#include <stdbool.h> -#include <string.h> - -/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before - <wchar.h>. - BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before - <wchar.h>. */ -#include <stdio.h> -#include <time.h> -#include <wchar.h> -#include <wctype.h> - -#include "wcwidth.h" - -#define MBCHAR_BUF_SIZE 24 - -struct mbchar -{ - const char *ptr; /* pointer to current character */ - size_t bytes; /* number of bytes of current character, > 0 */ - bool wc_valid; /* true if wc is a valid wide character */ - wchar_t wc; /* if wc_valid: the current character */ - char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */ -}; - -/* EOF (not a real character) is represented with bytes = 0 and - wc_valid = false. */ - -typedef struct mbchar mbchar_t; - -/* Access the current character. */ -#define mb_ptr(mbc) ((mbc).ptr) -#define mb_len(mbc) ((mbc).bytes) - -/* Comparison of characters. */ -#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc)) -#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0) -#define mb_cmp(mbc1, mbc2) \ - ((mbc1).wc_valid \ - ? ((mbc2).wc_valid \ - ? (int) (mbc1).wc - (int) (mbc2).wc \ - : -1) \ - : ((mbc2).wc_valid \ - ? 1 \ - : (mbc1).bytes == (mbc2).bytes \ - ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \ - : (mbc1).bytes < (mbc2).bytes \ - ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \ - : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))) -#define mb_casecmp(mbc1, mbc2) \ - ((mbc1).wc_valid \ - ? ((mbc2).wc_valid \ - ? (int) towlower ((mbc1).wc) - (int) towlower ((mbc2).wc) \ - : -1) \ - : ((mbc2).wc_valid \ - ? 1 \ - : (mbc1).bytes == (mbc2).bytes \ - ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \ - : (mbc1).bytes < (mbc2).bytes \ - ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \ - : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))) -#define mb_equal(mbc1, mbc2) \ - ((mbc1).wc_valid && (mbc2).wc_valid \ - ? (mbc1).wc == (mbc2).wc \ - : (mbc1).bytes == (mbc2).bytes \ - && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0) -#define mb_caseequal(mbc1, mbc2) \ - ((mbc1).wc_valid && (mbc2).wc_valid \ - ? towlower ((mbc1).wc) == towlower ((mbc2).wc) \ - : (mbc1).bytes == (mbc2).bytes \ - && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0) - -/* <ctype.h>, <wctype.h> classification. */ -#define mb_isascii(mbc) \ - ((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127) -#define mb_isalnum(mbc) ((mbc).wc_valid && iswalnum ((mbc).wc)) -#define mb_isalpha(mbc) ((mbc).wc_valid && iswalpha ((mbc).wc)) -#define mb_isblank(mbc) ((mbc).wc_valid && iswblank ((mbc).wc)) -#define mb_iscntrl(mbc) ((mbc).wc_valid && iswcntrl ((mbc).wc)) -#define mb_isdigit(mbc) ((mbc).wc_valid && iswdigit ((mbc).wc)) -#define mb_isgraph(mbc) ((mbc).wc_valid && iswgraph ((mbc).wc)) -#define mb_islower(mbc) ((mbc).wc_valid && iswlower ((mbc).wc)) -#define mb_isprint(mbc) ((mbc).wc_valid && iswprint ((mbc).wc)) -#define mb_ispunct(mbc) ((mbc).wc_valid && iswpunct ((mbc).wc)) -#define mb_isspace(mbc) ((mbc).wc_valid && iswspace ((mbc).wc)) -#define mb_isupper(mbc) ((mbc).wc_valid && iswupper ((mbc).wc)) -#define mb_isxdigit(mbc) ((mbc).wc_valid && iswxdigit ((mbc).wc)) - -/* Extra <wchar.h> function. */ - -/* Unprintable characters appear as a small box of width 1. */ -#define MB_UNPRINTABLE_WIDTH 1 - -static inline int -mb_width_aux (wint_t wc) -{ - int w = wcwidth (wc); - /* For unprintable characters, arbitrarily return 0 for control characters - and MB_UNPRINTABLE_WIDTH otherwise. */ - return (w >= 0 ? w : iswcntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH); -} - -#define mb_width(mbc) \ - ((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH) - -/* Output. */ -#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, (mbc).bytes, (stream)) - -/* Assignment. */ -#define mb_setascii(mbc, sc) \ - ((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \ - (mbc)->wc = (mbc)->buf[0] = (sc)) - -/* Copying a character. */ -static inline void -mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc) -{ - if (old_mbc->ptr == &old_mbc->buf[0]) - { - memcpy (&new_mbc->buf[0], &old_mbc->buf[0], old_mbc->bytes); - new_mbc->ptr = &new_mbc->buf[0]; - } - else - new_mbc->ptr = old_mbc->ptr; - new_mbc->bytes = old_mbc->bytes; - if ((new_mbc->wc_valid = old_mbc->wc_valid)) - new_mbc->wc = old_mbc->wc; -} - - -/* is_basic(c) tests whether the single-byte character c is in the - ISO C "basic character set". - This is a convenience function, and is in this file only to share code - between mbiter_multi.h and mbfile_multi.h. */ -#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ - && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ - && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ - && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ - && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ - && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ - && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ - && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ - && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ - && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ - && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ - && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ - && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ - && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ - && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ - && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ - && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ - && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ - && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ - && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ - && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ - && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ - && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126) -/* The character set is ISO-646, not EBCDIC. */ -# define IS_BASIC_ASCII 1 - -extern const unsigned int is_basic_table[]; - -static inline bool -is_basic (char c) -{ - return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31)) - & 1; -} - -#else - -static inline bool -is_basic (char c) -{ - switch (c) - { - case '\t': case '\v': case '\f': - case ' ': case '!': case '"': case '#': case '%': - case '&': case '\'': case '(': case ')': case '*': - case '+': case ',': case '-': case '.': case '/': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case ':': case ';': case '<': case '=': case '>': - case '?': - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case '[': case '\\': case ']': case '^': case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': case '{': case '|': case '}': case '~': - return 1; - default: - return 0; - } -} - -#endif - -#endif /* _MBCHAR_H */ diff --git a/lib/mbuiter.h b/lib/mbuiter.h deleted file mode 100644 index 9da3a6c..0000000 --- a/lib/mbuiter.h +++ /dev/null @@ -1,203 +0,0 @@ -/* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* Written by Bruno Haible <bruno@clisp.org>. */ - -/* The macros in this file implement forward iteration through a - multi-byte string, without knowing its length a-priori. - - With these macros, an iteration loop that looks like - - char *iter; - for (iter = buf; *iter != '\0'; iter++) - { - do_something (*iter); - } - - becomes - - mbui_iterator_t iter; - for (mbui_init (iter, buf); mbui_avail (iter); mbui_advance (iter)) - { - do_something (mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); - } - - The benefit of these macros over plain use of mbrtowc is: - - Handling of invalid multibyte sequences is possible without - making the code more complicated, while still preserving the - invalid multibyte sequences. - - Compared to mbiter.h, the macros here don't need to know the string's - length a-priori. The downside is that at each step, the look-ahead - that guards against overrunning the terminating '\0' is more expensive. - The mbui_* macros are therefore suitable when there is a high probability - that only the first few multibyte characters need to be inspected. - Whereas the mbi_* macros are better if usually the iteration runs - through the entire string. - - mbui_iterator_t - is a type usable for variable declarations. - - mbui_init (iter, startptr) - initializes the iterator, starting at startptr. - - mbui_avail (iter) - returns true if there are more multibyte chracters available before - the end of string is reached. In this case, mbui_cur (iter) is - initialized to the next multibyte chracter. - - mbui_advance (iter) - advances the iterator by one multibyte character. - - mbui_cur (iter) - returns the current multibyte character, of type mbchar_t. All the - macros defined in mbchar.h can be used on it. - - mbui_cur_ptr (iter) - return a pointer to the beginning of the current multibyte character. - - mbui_reloc (iter, ptrdiff) - relocates iterator when the string is moved by ptrdiff bytes. - - Here are the function prototypes of the macros. - - extern void mbui_init (mbui_iterator_t iter, const char *startptr); - extern bool mbui_avail (mbui_iterator_t iter); - extern void mbui_advance (mbui_iterator_t iter); - extern mbchar_t mbui_cur (mbui_iterator_t iter); - extern const char * mbui_cur_ptr (mbui_iterator_t iter); - extern void mbui_reloc (mbui_iterator_t iter, ptrdiff_t ptrdiff); - */ - -#ifndef _MBUITER_H -#define _MBUITER_H 1 - -#include <assert.h> -#include <stdbool.h> -#include <stdlib.h> - -/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before - <wchar.h>. - BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before - <wchar.h>. */ -#include <stdio.h> -#include <time.h> -#include <wchar.h> - -#include "mbchar.h" -#include "strnlen1.h" - -struct mbuiter_multi -{ - bool in_shift; /* true if next byte may not be interpreted as ASCII */ - mbstate_t state; /* if in_shift: current shift state */ - bool next_done; /* true if mbui_avail has already filled the following */ - struct mbchar cur; /* the current character: - const char *cur.ptr pointer to current character - The following are only valid after mbui_avail. - size_t cur.bytes number of bytes of current character - bool cur.wc_valid true if wc is a valid wide character - wchar_t cur.wc if wc_valid: the current character - */ -}; - -static inline void -mbuiter_multi_next (struct mbuiter_multi *iter) -{ - if (iter->next_done) - return; - if (iter->in_shift) - goto with_shift; - /* Handle most ASCII characters quickly, without calling mbrtowc(). */ - if (is_basic (*iter->cur.ptr)) - { - /* These characters are part of the basic character set. ISO C 99 - guarantees that their wide character code is identical to their - char code. */ - iter->cur.bytes = 1; - iter->cur.wc = *iter->cur.ptr; - iter->cur.wc_valid = true; - } - else - { - assert (mbsinit (&iter->state)); - iter->in_shift = true; - with_shift: - iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr, - strnlen1 (iter->cur.ptr, MB_CUR_MAX), - &iter->state); - if (iter->cur.bytes == (size_t) -1) - { - /* An invalid multibyte sequence was encountered. */ - iter->cur.bytes = 1; - iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not very important; the string is bogus anyway. */ - } - else if (iter->cur.bytes == (size_t) -2) - { - /* An incomplete multibyte character at the end. */ - iter->cur.bytes = strlen (iter->cur.ptr); - iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not important; the string end is reached anyway. */ - } - else - { - if (iter->cur.bytes == 0) - { - /* A null wide character was encountered. */ - iter->cur.bytes = 1; - assert (*iter->cur.ptr == '\0'); - assert (iter->cur.wc == 0); - } - iter->cur.wc_valid = true; - - /* When in the initial state, we can go back treating ASCII - characters more quickly. */ - if (mbsinit (&iter->state)) - iter->in_shift = false; - } - } - iter->next_done = true; -} - -static inline void -mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) -{ - iter->cur.ptr += ptrdiff; -} - -/* Iteration macros. */ -typedef struct mbuiter_multi mbui_iterator_t; -#define mbui_init(iter, startptr) \ - ((iter).cur.ptr = (startptr), \ - (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \ - (iter).next_done = false) -#define mbui_avail(iter) \ - (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur)) -#define mbui_advance(iter) \ - ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) - -/* Access to the current character. */ -#define mbui_cur(iter) (iter).cur -#define mbui_cur_ptr(iter) (iter).cur.ptr - -/* Relocation. */ -#define mbui_reloc(iter, ptrdiff) mbuiter_multi_reloc (&iter, ptrdiff) - -#endif /* _MBUITER_H */ diff --git a/lib/memchr.c b/lib/memchr.c deleted file mode 100644 index d44ad6d..0000000 --- a/lib/memchr.c +++ /dev/null @@ -1,201 +0,0 @@ -/* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004, 2006 Free - Software Foundation, Inc. - - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - -NOTE: The canonical source of this file is maintained with the GNU C Library. -Bugs can be reported to bug-glibc@prep.ai.mit.edu. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _LIBC -# include <config.h> -#endif - -#include <string.h> - -#include <stddef.h> - -#if defined _LIBC -# include <memcopy.h> -#else -# define reg_char char -#endif - -#include <limits.h> - -#if HAVE_BP_SYM_H || defined _LIBC -# include <bp-sym.h> -#else -# define BP_SYM(sym) sym -#endif - -#undef memchr -#undef __memchr - -/* Search no more than N bytes of S for C. */ -void * -__memchr (void const *s, int c_in, size_t n) -{ - const unsigned char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, magic_bits, charmask; - unsigned reg_char c; - int i; - - c = (unsigned char) c_in; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - n > 0 && (size_t) char_ptr % sizeof longword != 0; - --n, ++char_ptr) - if (*char_ptr == c) - return (void *) char_ptr; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to any size longwords. */ - - longword_ptr = (const unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - - /* Set MAGIC_BITS to be this pattern of 1 and 0 bits. - Set CHARMASK to be a longword, each of whose bytes is C. */ - - magic_bits = 0xfefefefe; - charmask = c | (c << 8); - charmask |= charmask << 16; -#if 0xffffffffU < ULONG_MAX - magic_bits |= magic_bits << 32; - charmask |= charmask << 32; - if (8 < sizeof longword) - for (i = 64; i < sizeof longword * 8; i *= 2) - { - magic_bits |= magic_bits << i; - charmask |= charmask << i; - } -#endif - magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - while (n >= sizeof longword) - { - /* We tentatively exit the loop if adding MAGIC_BITS to - LONGWORD fails to change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-30 is set, there will be a carry - into bit 31, so all of the hole bits will be changed. - - The one misfire occurs when bits 24-30 are clear and bit - 31 is set; in this case, the hole at bit 31 is not - changed. If we had access to the processor carry flag, - we could close this loophole by putting the fourth hole - at bit 32! - - So it ignores everything except 128's, when they're aligned - properly. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - longword = *longword_ptr++ ^ charmask; - - /* Add MAGIC_BITS to LONGWORD. */ - if ((((longword + magic_bits) - - /* Set those bits that were unchanged by the addition. */ - ^ ~longword) - - /* Look at only the hole bits. If any of the hole bits - are unchanged, most likely one of the bytes was a - zero. */ - & ~magic_bits) != 0) - { - /* Which of the bytes was C? If none of them were, it was - a misfire; continue the search. */ - - const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); - - if (cp[0] == c) - return (void *) cp; - if (cp[1] == c) - return (void *) &cp[1]; - if (cp[2] == c) - return (void *) &cp[2]; - if (cp[3] == c) - return (void *) &cp[3]; - if (4 < sizeof longword && cp[4] == c) - return (void *) &cp[4]; - if (5 < sizeof longword && cp[5] == c) - return (void *) &cp[5]; - if (6 < sizeof longword && cp[6] == c) - return (void *) &cp[6]; - if (7 < sizeof longword && cp[7] == c) - return (void *) &cp[7]; - if (8 < sizeof longword) - for (i = 8; i < sizeof longword; i++) - if (cp[i] == c) - return (void *) &cp[i]; - } - - n -= sizeof longword; - } - - char_ptr = (const unsigned char *) longword_ptr; - - while (n-- > 0) - { - if (*char_ptr == c) - return (void *) char_ptr; - else - ++char_ptr; - } - - return 0; -} -#ifdef weak_alias -weak_alias (__memchr, BP_SYM (memchr)) -#endif diff --git a/lib/memcmp.c b/lib/memcmp.c deleted file mode 100644 index cf98bfa..0000000 --- a/lib/memcmp.c +++ /dev/null @@ -1,363 +0,0 @@ -/* Copyright (C) 1991, 1993, 1995, 1997, 1998, 2003, 2006 Free Software - Foundation, Inc. - - Contributed by Torbjorn Granlund (tege@sics.se). - - NOTE: The canonical source of this file is maintained with the GNU C Library. - Bugs can be reported to bug-glibc@prep.ai.mit.edu. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - USA. */ - -#ifndef _LIBC -# include <config.h> -#endif - -#include <string.h> - -#undef memcmp - -#ifdef _LIBC - -# include <memcopy.h> -# include <endian.h> - -# if __BYTE_ORDER == __BIG_ENDIAN -# define WORDS_BIGENDIAN -# endif - -#else /* Not in the GNU C library. */ - -# include <sys/types.h> - -/* Type to use for aligned memory operations. - This should normally be the biggest type supported by a single load - and store. Must be an unsigned type. */ -# define op_t unsigned long int -# define OPSIZ (sizeof(op_t)) - -/* Threshold value for when to enter the unrolled loops. */ -# define OP_T_THRES 16 - -/* Type to use for unaligned operations. */ -typedef unsigned char byte; - -# ifndef WORDS_BIGENDIAN -# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) -# else -# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) -# endif - -#endif /* In the GNU C library. */ - -#ifdef WORDS_BIGENDIAN -# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) -#else -# define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) -#endif - -/* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ - -/* The strategy of this memcmp is: - - 1. Compare bytes until one of the block pointers is aligned. - - 2. Compare using memcmp_common_alignment or - memcmp_not_common_alignment, regarding the alignment of the other - block after the initial byte operations. The maximum number of - full words (of type op_t) are compared in this way. - - 3. Compare the few remaining bytes. */ - -#ifndef WORDS_BIGENDIAN -/* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. - A and B are known to be different. - This is needed only on little-endian machines. */ - -# ifdef __GNUC__ -__inline -# endif -static int -memcmp_bytes (long unsigned int a, long unsigned int b) -{ - long int srcp1 = (long int) &a; - long int srcp2 = (long int) &b; - op_t a0, b0; - - do - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - } - while (a0 == b0); - return a0 - b0; -} -#endif - -/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' - objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for - memory operations on `op_t's. */ -#ifdef __GNUC__ -__inline -#endif -static int -memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) -{ - op_t a0, a1; - op_t b0, b1; - - switch (len % 4) - { - default: /* Avoid warning about uninitialized local variables. */ - case 2: - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - srcp1 -= 2 * OPSIZ; - srcp2 -= 2 * OPSIZ; - len += 2; - goto do1; - case 3: - a1 = ((op_t *) srcp1)[0]; - b1 = ((op_t *) srcp2)[0]; - srcp1 -= OPSIZ; - srcp2 -= OPSIZ; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return 0; - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - goto do3; - case 1: - a1 = ((op_t *) srcp1)[0]; - b1 = ((op_t *) srcp2)[0]; - srcp1 += OPSIZ; - srcp2 += OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - /* Fall through. */ - } - - do - { - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - - do3: - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[1]; - if (a0 != b0) - return CMP_LT_OR_GT (a0, b0); - - do2: - a0 = ((op_t *) srcp1)[2]; - b0 = ((op_t *) srcp2)[2]; - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - - do1: - a1 = ((op_t *) srcp1)[3]; - b1 = ((op_t *) srcp2)[3]; - if (a0 != b0) - return CMP_LT_OR_GT (a0, b0); - - srcp1 += 4 * OPSIZ; - srcp2 += 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - return 0; -} - -/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN - `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory - operations on `op_t', but SRCP1 *should be unaligned*. */ -#ifdef __GNUC__ -__inline -#endif -static int -memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) -{ - op_t a0, a1, a2, a3; - op_t b0, b1, b2, b3; - op_t x; - int shl, shr; - - /* Calculate how to shift a word read at the memory operation - aligned srcp1 to make it aligned for comparison. */ - - shl = 8 * (srcp1 % OPSIZ); - shr = 8 * OPSIZ - shl; - - /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' - it points in the middle of. */ - srcp1 &= -OPSIZ; - - switch (len % 4) - { - default: /* Avoid warning about uninitialized local variables. */ - case 2: - a1 = ((op_t *) srcp1)[0]; - a2 = ((op_t *) srcp1)[1]; - b2 = ((op_t *) srcp2)[0]; - srcp1 -= 1 * OPSIZ; - srcp2 -= 2 * OPSIZ; - len += 2; - goto do1; - case 3: - a0 = ((op_t *) srcp1)[0]; - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[0]; - srcp2 -= 1 * OPSIZ; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return 0; - a3 = ((op_t *) srcp1)[0]; - a0 = ((op_t *) srcp1)[1]; - b0 = ((op_t *) srcp2)[0]; - srcp1 += 1 * OPSIZ; - goto do3; - case 1: - a2 = ((op_t *) srcp1)[0]; - a3 = ((op_t *) srcp1)[1]; - b3 = ((op_t *) srcp2)[0]; - srcp1 += 2 * OPSIZ; - srcp2 += 1 * OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - /* Fall through. */ - } - - do - { - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - x = MERGE(a2, shl, a3, shr); - if (x != b3) - return CMP_LT_OR_GT (x, b3); - - do3: - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[1]; - x = MERGE(a3, shl, a0, shr); - if (x != b0) - return CMP_LT_OR_GT (x, b0); - - do2: - a2 = ((op_t *) srcp1)[2]; - b2 = ((op_t *) srcp2)[2]; - x = MERGE(a0, shl, a1, shr); - if (x != b1) - return CMP_LT_OR_GT (x, b1); - - do1: - a3 = ((op_t *) srcp1)[3]; - b3 = ((op_t *) srcp2)[3]; - x = MERGE(a1, shl, a2, shr); - if (x != b2) - return CMP_LT_OR_GT (x, b2); - - srcp1 += 4 * OPSIZ; - srcp2 += 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - x = MERGE(a2, shl, a3, shr); - if (x != b3) - return CMP_LT_OR_GT (x, b3); - return 0; -} - -int -rpl_memcmp (const void *s1, const void *s2, size_t len) -{ - op_t a0; - op_t b0; - long int srcp1 = (long int) s1; - long int srcp2 = (long int) s2; - op_t res; - - if (len >= OP_T_THRES) - { - /* There are at least some bytes to compare. No need to test - for LEN == 0 in this alignment loop. */ - while (srcp2 % OPSIZ != 0) - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - res = a0 - b0; - if (res != 0) - return res; - len -= 1; - } - - /* SRCP2 is now aligned for memory operations on `op_t'. - SRCP1 alignment determines if we can do a simple, - aligned compare or need to shuffle bits. */ - - if (srcp1 % OPSIZ == 0) - res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); - else - res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); - if (res != 0) - return res; - - /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ - srcp1 += len & -OPSIZ; - srcp2 += len & -OPSIZ; - len %= OPSIZ; - } - - /* There are just a few bytes to compare. Use byte memory operations. */ - while (len != 0) - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - res = a0 - b0; - if (res != 0) - return res; - len -= 1; - } - - return 0; -} - -#ifdef weak_alias -# undef bcmp -weak_alias (memcmp, bcmp) -#endif diff --git a/lib/memcpy.c b/lib/memcpy.c deleted file mode 100644 index 2ab5ed6..0000000 --- a/lib/memcpy.c +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (C) 1995, 1997, 2000, 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Jim Meyering <meyering@na-net.ornl.gov>. */ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <stddef.h> - -/* Copy LEN bytes starting at SRCADDR to DESTADDR. Result undefined - if the source overlaps with the destination. - Return DESTADDR. */ - -void * -memcpy (void *destaddr, void const *srcaddr, size_t len) -{ - char *dest = destaddr; - char const *src = srcaddr; - - while (len-- > 0) - *dest++ = *src++; - return destaddr; -} diff --git a/lib/mempcpy.c b/lib/mempcpy.c deleted file mode 100644 index 3502da2..0000000 --- a/lib/mempcpy.c +++ /dev/null @@ -1,29 +0,0 @@ -/* Copy memory area and return pointer after last written byte. - Copyright (C) 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Specification. */ -#include "mempcpy.h" - -#include <string.h> - -/* Copy N bytes of SRC to DEST, return pointer to bytes after the - last written byte. */ -void * -mempcpy (void *dest, const void *src, size_t n) -{ - return (char *) memcpy (dest, src, n) + n; -} diff --git a/lib/mempcpy.h b/lib/mempcpy.h deleted file mode 100644 index fa20321..0000000 --- a/lib/mempcpy.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Copy memory area and return pointer after last written byte. - Copyright (C) 2003, 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef mempcpy - -# if HAVE_MEMPCPY - -/* Get mempcpy() declaration. */ -# include <string.h> - -# else - -/* Get size_t */ -# include <stddef.h> - -/* Copy N bytes of SRC to DEST, return pointer to bytes after the - last written byte. */ -extern void *mempcpy (void *dest, const void *src, size_t n); - -# endif - -#endif diff --git a/lib/memset.c b/lib/memset.c deleted file mode 100644 index 890cbf1..0000000 --- a/lib/memset.c +++ /dev/null @@ -1,28 +0,0 @@ -/* memset.c -- set an area of memory to a given value - Copyright (C) 1991, 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <stddef.h> - -void * -memset (void *str, int c, size_t len) -{ - register char *st = str; - - while (len-- > 0) - *st++ = c; - return str; -} diff --git a/lib/obstack.c b/lib/obstack.c deleted file mode 100644 index 5cd0b7a..0000000 --- a/lib/obstack.c +++ /dev/null @@ -1,431 +0,0 @@ -/* obstack.c - subroutines used implicitly by object stack macros - - Copyright (C) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997, - 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifdef _LIBC -# include <obstack.h> -# include <shlib-compat.h> -#else -# include <config.h> -# include "obstack.h" -#endif - -/* NOTE BEFORE MODIFYING THIS FILE: This version number must be - incremented whenever callers compiled using an old obstack.h can no - longer properly call the functions in this obstack.c. */ -#define OBSTACK_INTERFACE_VERSION 1 - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself, and the installed library - supports the same library interface we do. This code is part of the GNU - C Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object - files, it is simpler to just do this in the source for each such file. */ - -#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */ -#if !defined _LIBC && defined __GNU_LIBRARY__ && __GNU_LIBRARY__ > 1 -# include <gnu-versions.h> -# if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION -# define ELIDE_CODE -# endif -#endif - -#include <stddef.h> - -#ifndef ELIDE_CODE - -# include <stdint.h> - -/* Determine default alignment. */ -union fooround -{ - uintmax_t i; - long double d; - void *p; -}; -struct fooalign -{ - char c; - union fooround u; -}; -/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT. - But in fact it might be less smart and round addresses to as much as - DEFAULT_ROUNDING. So we prepare for it to do that. */ -enum - { - DEFAULT_ALIGNMENT = offsetof (struct fooalign, u), - DEFAULT_ROUNDING = sizeof (union fooround) - }; - -/* When we copy a long block of data, this is the unit to do it with. - On some machines, copying successive ints does not work; - in such a case, redefine COPYING_UNIT to `long' (if that works) - or `char' as a last resort. */ -# ifndef COPYING_UNIT -# define COPYING_UNIT int -# endif - - -/* The functions allocating more room by calling `obstack_chunk_alloc' - jump to the handler pointed to by `obstack_alloc_failed_handler'. - This can be set to a user defined function which should either - abort gracefully or use longjump - but shouldn't return. This - variable by default points to the internal function - `print_and_abort'. */ -static void print_and_abort (void); -void (*obstack_alloc_failed_handler) (void) = print_and_abort; - -/* Exit value used when `print_and_abort' is used. */ -# include <stdlib.h> -# ifdef _LIBC -int obstack_exit_failure = EXIT_FAILURE; -# else -# include "exitfail.h" -# define obstack_exit_failure exit_failure -# endif - -# ifdef _LIBC -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) -/* A looong time ago (before 1994, anyway; we're not sure) this global variable - was used by non-GNU-C macros to avoid multiple evaluation. The GNU C - library still exports it because somebody might use it. */ -struct obstack *_obstack_compat; -compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0); -# endif -# endif - -/* Define a macro that either calls functions with the traditional malloc/free - calling interface, or calls functions with the mmalloc/mfree interface - (that adds an extra first argument), based on the state of use_extra_arg. - For free, do not use ?:, since some compilers, like the MIPS compilers, - do not allow (expr) ? void : void. */ - -# define CALL_CHUNKFUN(h, size) \ - (((h) -> use_extra_arg) \ - ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ - : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size))) - -# define CALL_FREEFUN(h, old_chunk) \ - do { \ - if ((h) -> use_extra_arg) \ - (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ - else \ - (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \ - } while (0) - - -/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default). - Objects start on multiples of ALIGNMENT (0 means use default). - CHUNKFUN is the function to use to allocate chunks, - and FREEFUN the function to free them. - - Return nonzero if successful, calls obstack_alloc_failed_handler if - allocation fails. */ - -int -_obstack_begin (struct obstack *h, - int size, int alignment, - void *(*chunkfun) (long), - void (*freefun) (void *)) -{ - register struct _obstack_chunk *chunk; /* points to new chunk */ - - if (alignment == 0) - alignment = DEFAULT_ALIGNMENT; - if (size == 0) - /* Default size is what GNU malloc can fit in a 4096-byte block. */ - { - /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. - Use the values for range checking, because if range checking is off, - the extra bytes won't be missed terribly, but if range checking is on - and we used a larger request, a whole extra 4096 bytes would be - allocated. - - These number are irrelevant to the new GNU malloc. I suspect it is - less sensitive to the size of the request. */ - int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) - + 4 + DEFAULT_ROUNDING - 1) - & ~(DEFAULT_ROUNDING - 1)); - size = 4096 - extra; - } - - h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun; - h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; - h->chunk_size = size; - h->alignment_mask = alignment - 1; - h->use_extra_arg = 0; - - chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); - if (!chunk) - (*obstack_alloc_failed_handler) (); - h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents, - alignment - 1); - h->chunk_limit = chunk->limit - = (char *) chunk + h->chunk_size; - chunk->prev = 0; - /* The initial chunk now contains no empty object. */ - h->maybe_empty_object = 0; - h->alloc_failed = 0; - return 1; -} - -int -_obstack_begin_1 (struct obstack *h, int size, int alignment, - void *(*chunkfun) (void *, long), - void (*freefun) (void *, void *), - void *arg) -{ - register struct _obstack_chunk *chunk; /* points to new chunk */ - - if (alignment == 0) - alignment = DEFAULT_ALIGNMENT; - if (size == 0) - /* Default size is what GNU malloc can fit in a 4096-byte block. */ - { - /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. - Use the values for range checking, because if range checking is off, - the extra bytes won't be missed terribly, but if range checking is on - and we used a larger request, a whole extra 4096 bytes would be - allocated. - - These number are irrelevant to the new GNU malloc. I suspect it is - less sensitive to the size of the request. */ - int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) - + 4 + DEFAULT_ROUNDING - 1) - & ~(DEFAULT_ROUNDING - 1)); - size = 4096 - extra; - } - - h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun; - h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; - h->chunk_size = size; - h->alignment_mask = alignment - 1; - h->extra_arg = arg; - h->use_extra_arg = 1; - - chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); - if (!chunk) - (*obstack_alloc_failed_handler) (); - h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents, - alignment - 1); - h->chunk_limit = chunk->limit - = (char *) chunk + h->chunk_size; - chunk->prev = 0; - /* The initial chunk now contains no empty object. */ - h->maybe_empty_object = 0; - h->alloc_failed = 0; - return 1; -} - -/* Allocate a new current chunk for the obstack *H - on the assumption that LENGTH bytes need to be added - to the current object, or a new object of length LENGTH allocated. - Copies any partial object from the end of the old chunk - to the beginning of the new one. */ - -void -_obstack_newchunk (struct obstack *h, int length) -{ - register struct _obstack_chunk *old_chunk = h->chunk; - register struct _obstack_chunk *new_chunk; - register long new_size; - register long obj_size = h->next_free - h->object_base; - register long i; - long already; - char *object_base; - - /* Compute size for new chunk. */ - new_size = (obj_size + length) + (obj_size >> 3) + h->alignment_mask + 100; - if (new_size < h->chunk_size) - new_size = h->chunk_size; - - /* Allocate and initialize the new chunk. */ - new_chunk = CALL_CHUNKFUN (h, new_size); - if (!new_chunk) - (*obstack_alloc_failed_handler) (); - h->chunk = new_chunk; - new_chunk->prev = old_chunk; - new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size; - - /* Compute an aligned object_base in the new chunk */ - object_base = - __PTR_ALIGN ((char *) new_chunk, new_chunk->contents, h->alignment_mask); - - /* Move the existing object to the new chunk. - Word at a time is fast and is safe if the object - is sufficiently aligned. */ - if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT) - { - for (i = obj_size / sizeof (COPYING_UNIT) - 1; - i >= 0; i--) - ((COPYING_UNIT *)object_base)[i] - = ((COPYING_UNIT *)h->object_base)[i]; - /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT, - but that can cross a page boundary on a machine - which does not do strict alignment for COPYING_UNITS. */ - already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT); - } - else - already = 0; - /* Copy remaining bytes one by one. */ - for (i = already; i < obj_size; i++) - object_base[i] = h->object_base[i]; - - /* If the object just copied was the only data in OLD_CHUNK, - free that chunk and remove it from the chain. - But not if that chunk might contain an empty object. */ - if (! h->maybe_empty_object - && (h->object_base - == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents, - h->alignment_mask))) - { - new_chunk->prev = old_chunk->prev; - CALL_FREEFUN (h, old_chunk); - } - - h->object_base = object_base; - h->next_free = h->object_base + obj_size; - /* The new chunk certainly contains no empty object yet. */ - h->maybe_empty_object = 0; -} -# ifdef _LIBC -libc_hidden_def (_obstack_newchunk) -# endif - -/* Return nonzero if object OBJ has been allocated from obstack H. - This is here for debugging. - If you use it in a program, you are probably losing. */ - -/* Suppress -Wmissing-prototypes warning. We don't want to declare this in - obstack.h because it is just for debugging. */ -int _obstack_allocated_p (struct obstack *h, void *obj); - -int -_obstack_allocated_p (struct obstack *h, void *obj) -{ - register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk *plp; /* point to previous chunk if any */ - - lp = (h)->chunk; - /* We use >= rather than > since the object cannot be exactly at - the beginning of the chunk but might be an empty object exactly - at the end of an adjacent chunk. */ - while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj)) - { - plp = lp->prev; - lp = plp; - } - return lp != 0; -} - -/* Free objects in obstack H, including OBJ and everything allocate - more recently than OBJ. If OBJ is zero, free everything in H. */ - -# undef obstack_free - -void -__obstack_free (struct obstack *h, void *obj) -{ - register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk *plp; /* point to previous chunk if any */ - - lp = h->chunk; - /* We use >= because there cannot be an object at the beginning of a chunk. - But there can be an empty object at that address - at the end of another chunk. */ - while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj)) - { - plp = lp->prev; - CALL_FREEFUN (h, lp); - lp = plp; - /* If we switch chunks, we can't tell whether the new current - chunk contains an empty object, so assume that it may. */ - h->maybe_empty_object = 1; - } - if (lp) - { - h->object_base = h->next_free = (char *) (obj); - h->chunk_limit = lp->limit; - h->chunk = lp; - } - else if (obj != 0) - /* obj is not in any of the chunks! */ - abort (); -} - -# ifdef _LIBC -/* Older versions of libc used a function _obstack_free intended to be - called by non-GCC compilers. */ -strong_alias (obstack_free, _obstack_free) -# endif - -int -_obstack_memory_used (struct obstack *h) -{ - register struct _obstack_chunk* lp; - register int nbytes = 0; - - for (lp = h->chunk; lp != 0; lp = lp->prev) - { - nbytes += lp->limit - (char *) lp; - } - return nbytes; -} - -/* Define the error handler. */ -# ifdef _LIBC -# include <libintl.h> -# else -# include "gettext.h" -# endif -# ifndef _ -# define _(msgid) gettext (msgid) -# endif - -# ifdef _LIBC -# include <libio/iolibio.h> -# endif - -# ifndef __attribute__ -/* This feature is available in gcc versions 2.5 and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) -# define __attribute__(Spec) /* empty */ -# endif -# endif - -static void -__attribute__ ((noreturn)) -print_and_abort (void) -{ - /* Don't change any of these strings. Yes, it would be possible to add - the newline to the string and use fputs or so. But this must not - happen because the "memory exhausted" message appears in other places - like this and the translation should be reused instead of creating - a very similar string which requires a separate translation. */ -# ifdef _LIBC - (void) __fxprintf (NULL, "%s\n", _("memory exhausted")); -# else - fprintf (stderr, "%s\n", _("memory exhausted")); -# endif - exit (obstack_exit_failure); -} - -#endif /* !ELIDE_CODE */ diff --git a/lib/obstack.h b/lib/obstack.h deleted file mode 100644 index 3315dfe..0000000 --- a/lib/obstack.h +++ /dev/null @@ -1,513 +0,0 @@ -/* obstack.h - object stack macros - Copyright (C) 1988-1994,1996-1999,2003,2004,2005,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Summary: - -All the apparent functions defined here are macros. The idea -is that you would use these pre-tested macros to solve a -very specific set of problems, and they would run fast. -Caution: no side-effects in arguments please!! They may be -evaluated MANY times!! - -These macros operate a stack of objects. Each object starts life -small, and may grow to maturity. (Consider building a word syllable -by syllable.) An object can move while it is growing. Once it has -been "finished" it never changes address again. So the "top of the -stack" is typically an immature growing object, while the rest of the -stack is of mature, fixed size and fixed address objects. - -These routines grab large chunks of memory, using a function you -supply, called `obstack_chunk_alloc'. On occasion, they free chunks, -by calling `obstack_chunk_free'. You must define them and declare -them before using any obstack macros. - -Each independent stack is represented by a `struct obstack'. -Each of the obstack macros expects a pointer to such a structure -as the first argument. - -One motivation for this package is the problem of growing char strings -in symbol tables. Unless you are "fascist pig with a read-only mind" ---Gosper's immortal quote from HAKMEM item 154, out of context--you -would not like to put any arbitrary upper limit on the length of your -symbols. - -In practice this often means you will build many short symbols and a -few long symbols. At the time you are reading a symbol you don't know -how long it is. One traditional method is to read a symbol into a -buffer, realloc()ating the buffer every time you try to read a symbol -that is longer than the buffer. This is beaut, but you still will -want to copy the symbol from the buffer to a more permanent -symbol-table entry say about half the time. - -With obstacks, you can work differently. Use one obstack for all symbol -names. As you read a symbol, grow the name in the obstack gradually. -When the name is complete, finalize it. Then, if the symbol exists already, -free the newly read name. - -The way we do this is to take a large chunk, allocating memory from -low addresses. When you want to build a symbol in the chunk you just -add chars above the current "high water mark" in the chunk. When you -have finished adding chars, because you got to the end of the symbol, -you know how long the chars are, and you can create a new object. -Mostly the chars will not burst over the highest address of the chunk, -because you would typically expect a chunk to be (say) 100 times as -long as an average object. - -In case that isn't clear, when we have enough chars to make up -the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed) -so we just point to it where it lies. No moving of chars is -needed and this is the second win: potentially long strings need -never be explicitly shuffled. Once an object is formed, it does not -change its address during its lifetime. - -When the chars burst over a chunk boundary, we allocate a larger -chunk, and then copy the partly formed object from the end of the old -chunk to the beginning of the new larger chunk. We then carry on -accreting characters to the end of the object as we normally would. - -A special macro is provided to add a single char at a time to a -growing object. This allows the use of register variables, which -break the ordinary 'growth' macro. - -Summary: - We allocate large chunks. - We carve out one object at a time from the current chunk. - Once carved, an object never moves. - We are free to append data of any size to the currently - growing object. - Exactly one object is growing in an obstack at any one time. - You can run one obstack per control block. - You may have as many control blocks as you dare. - Because of the way we do it, you can `unwind' an obstack - back to a previous state. (You may remove objects much - as you would with a stack.) -*/ - - -/* Don't do the contents of this file more than once. */ - -#ifndef _OBSTACK_H -#define _OBSTACK_H 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* We need the type of a pointer subtraction. If __PTRDIFF_TYPE__ is - defined, as with GNU C, use that; that way we don't pollute the - namespace with <stddef.h>'s symbols. Otherwise, include <stddef.h> - and use ptrdiff_t. */ - -#ifdef __PTRDIFF_TYPE__ -# define PTR_INT_TYPE __PTRDIFF_TYPE__ -#else -# include <stddef.h> -# define PTR_INT_TYPE ptrdiff_t -#endif - -/* If B is the base of an object addressed by P, return the result of - aligning P to the next multiple of A + 1. B and P must be of type - char *. A + 1 must be a power of 2. */ - -#define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A))) - -/* Similiar to _BPTR_ALIGN (B, P, A), except optimize the common case - where pointers can be converted to integers, aligned as integers, - and converted back again. If PTR_INT_TYPE is narrower than a - pointer (e.g., the AS/400), play it safe and compute the alignment - relative to B. Otherwise, use the faster strategy of computing the - alignment relative to 0. */ - -#define __PTR_ALIGN(B, P, A) \ - __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \ - P, A) - -#include <string.h> - -struct _obstack_chunk /* Lives at front of each chunk. */ -{ - char *limit; /* 1 past end of this chunk */ - struct _obstack_chunk *prev; /* address of prior chunk or NULL */ - char contents[4]; /* objects begin here */ -}; - -struct obstack /* control current object in current chunk */ -{ - long chunk_size; /* preferred size to allocate chunks in */ - struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ - char *object_base; /* address of object we are building */ - char *next_free; /* where to add next char to current object */ - char *chunk_limit; /* address of char after current chunk */ - union - { - PTR_INT_TYPE tempint; - void *tempptr; - } temp; /* Temporary for some macros. */ - int alignment_mask; /* Mask of alignment for each object. */ - /* These prototypes vary based on `use_extra_arg', and we use - casts to the prototypeless function type in all assignments, - but having prototypes here quiets -Wstrict-prototypes. */ - struct _obstack_chunk *(*chunkfun) (void *, long); - void (*freefun) (void *, struct _obstack_chunk *); - void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ - unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */ - unsigned maybe_empty_object:1;/* There is a possibility that the current - chunk contains a zero-length object. This - prevents freeing the chunk if we allocate - a bigger chunk to replace it. */ - unsigned alloc_failed:1; /* No longer used, as we now call the failed - handler on error, but retained for binary - compatibility. */ -}; - -/* Declare the external functions we use; they are in obstack.c. */ - -extern void _obstack_newchunk (struct obstack *, int); -extern int _obstack_begin (struct obstack *, int, int, - void *(*) (long), void (*) (void *)); -extern int _obstack_begin_1 (struct obstack *, int, int, - void *(*) (void *, long), - void (*) (void *, void *), void *); -extern int _obstack_memory_used (struct obstack *); - -/* The default name of the function for freeing a chunk is 'obstack_free', - but gnulib users can override this by defining '__obstack_free'. */ -#ifndef __obstack_free -# define __obstack_free obstack_free -#endif -extern void __obstack_free (struct obstack *obstack, void *block); - - -/* Error handler called when `obstack_chunk_alloc' failed to allocate - more memory. This can be set to a user defined function which - should either abort gracefully or use longjump - but shouldn't - return. The default action is to print a message and abort. */ -extern void (*obstack_alloc_failed_handler) (void); - -/* Exit value used when `print_and_abort' is used. */ -extern int obstack_exit_failure; - -/* Pointer to beginning of object being allocated or to be allocated next. - Note that this might not be the final address of the object - because a new chunk might be needed to hold the final size. */ - -#define obstack_base(h) ((void *) (h)->object_base) - -/* Size for allocating ordinary chunks. */ - -#define obstack_chunk_size(h) ((h)->chunk_size) - -/* Pointer to next byte not yet allocated in current chunk. */ - -#define obstack_next_free(h) ((h)->next_free) - -/* Mask specifying low bits that should be clear in address of an object. */ - -#define obstack_alignment_mask(h) ((h)->alignment_mask) - -/* To prevent prototype warnings provide complete argument list. */ -#define obstack_init(h) \ - _obstack_begin ((h), 0, 0, \ - (void *(*) (long)) obstack_chunk_alloc, \ - (void (*) (void *)) obstack_chunk_free) - -#define obstack_begin(h, size) \ - _obstack_begin ((h), (size), 0, \ - (void *(*) (long)) obstack_chunk_alloc, \ - (void (*) (void *)) obstack_chunk_free) - -#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ - _obstack_begin ((h), (size), (alignment), \ - (void *(*) (long)) (chunkfun), \ - (void (*) (void *)) (freefun)) - -#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ - _obstack_begin_1 ((h), (size), (alignment), \ - (void *(*) (void *, long)) (chunkfun), \ - (void (*) (void *, void *)) (freefun), (arg)) - -#define obstack_chunkfun(h, newchunkfun) \ - ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun)) - -#define obstack_freefun(h, newfreefun) \ - ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun)) - -#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = (achar)) - -#define obstack_blank_fast(h,n) ((h)->next_free += (n)) - -#define obstack_memory_used(h) _obstack_memory_used (h) - -#if defined __GNUC__ && defined __STDC__ && __STDC__ -/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and - does not implement __extension__. But that compiler doesn't define - __GNUC_MINOR__. */ -# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__) -# define __extension__ -# endif - -/* For GNU C, if not -traditional, - we can define these macros to compute all args only once - without using a global variable. - Also, we can avoid using the `temp' slot, to make faster code. */ - -# define obstack_object_size(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (unsigned) (__o->next_free - __o->object_base); }) - -# define obstack_room(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (unsigned) (__o->chunk_limit - __o->next_free); }) - -# define obstack_make_room(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->chunk_limit - __o->next_free < __len) \ - _obstack_newchunk (__o, __len); \ - (void) 0; }) - -# define obstack_empty_p(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (__o->chunk->prev == 0 \ - && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \ - __o->chunk->contents, \ - __o->alignment_mask)); }) - -# define obstack_grow(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->next_free + __len > __o->chunk_limit) \ - _obstack_newchunk (__o, __len); \ - memcpy (__o->next_free, where, __len); \ - __o->next_free += __len; \ - (void) 0; }) - -# define obstack_grow0(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->next_free + __len + 1 > __o->chunk_limit) \ - _obstack_newchunk (__o, __len + 1); \ - memcpy (__o->next_free, where, __len); \ - __o->next_free += __len; \ - *(__o->next_free)++ = 0; \ - (void) 0; }) - -# define obstack_1grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + 1 > __o->chunk_limit) \ - _obstack_newchunk (__o, 1); \ - obstack_1grow_fast (__o, datum); \ - (void) 0; }) - -/* These assume that the obstack alignment is good enough for pointers - or ints, and that the data added so far to the current object - shares that much alignment. */ - -# define obstack_ptr_grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ - _obstack_newchunk (__o, sizeof (void *)); \ - obstack_ptr_grow_fast (__o, datum); }) \ - -# define obstack_int_grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + sizeof (int) > __o->chunk_limit) \ - _obstack_newchunk (__o, sizeof (int)); \ - obstack_int_grow_fast (__o, datum); }) - -# define obstack_ptr_grow_fast(OBSTACK,aptr) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - *(const void **) __o1->next_free = (aptr); \ - __o1->next_free += sizeof (const void *); \ - (void) 0; }) - -# define obstack_int_grow_fast(OBSTACK,aint) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - *(int *) __o1->next_free = (aint); \ - __o1->next_free += sizeof (int); \ - (void) 0; }) - -# define obstack_blank(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->chunk_limit - __o->next_free < __len) \ - _obstack_newchunk (__o, __len); \ - obstack_blank_fast (__o, __len); \ - (void) 0; }) - -# define obstack_alloc(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_blank (__h, (length)); \ - obstack_finish (__h); }) - -# define obstack_copy(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_grow (__h, (where), (length)); \ - obstack_finish (__h); }) - -# define obstack_copy0(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_grow0 (__h, (where), (length)); \ - obstack_finish (__h); }) - -/* The local variable is named __o1 to avoid a name conflict - when obstack_blank is called. */ -# define obstack_finish(OBSTACK) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - void *__value = (void *) __o1->object_base; \ - if (__o1->next_free == __value) \ - __o1->maybe_empty_object = 1; \ - __o1->next_free \ - = __PTR_ALIGN (__o1->object_base, __o1->next_free, \ - __o1->alignment_mask); \ - if (__o1->next_free - (char *)__o1->chunk \ - > __o1->chunk_limit - (char *)__o1->chunk) \ - __o1->next_free = __o1->chunk_limit; \ - __o1->object_base = __o1->next_free; \ - __value; }) - -# define obstack_free(OBSTACK, OBJ) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - void *__obj = (OBJ); \ - if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \ - __o->next_free = __o->object_base = (char *)__obj; \ - else (__obstack_free) (__o, __obj); }) - -#else /* not __GNUC__ or not __STDC__ */ - -# define obstack_object_size(h) \ - (unsigned) ((h)->next_free - (h)->object_base) - -# define obstack_room(h) \ - (unsigned) ((h)->chunk_limit - (h)->next_free) - -# define obstack_empty_p(h) \ - ((h)->chunk->prev == 0 \ - && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \ - (h)->chunk->contents, \ - (h)->alignment_mask)) - -/* Note that the call to _obstack_newchunk is enclosed in (..., 0) - so that we can avoid having void expressions - in the arms of the conditional expression. - Casting the third operand to void was tried before, - but some compilers won't accept it. */ - -# define obstack_make_room(h,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0)) - -# define obstack_grow(h,where,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ - memcpy ((h)->next_free, where, (h)->temp.tempint), \ - (h)->next_free += (h)->temp.tempint) - -# define obstack_grow0(h,where,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0), \ - memcpy ((h)->next_free, where, (h)->temp.tempint), \ - (h)->next_free += (h)->temp.tempint, \ - *((h)->next_free)++ = 0) - -# define obstack_1grow(h,datum) \ -( (((h)->next_free + 1 > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), 1), 0) : 0), \ - obstack_1grow_fast (h, datum)) - -# define obstack_ptr_grow(h,datum) \ -( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ - obstack_ptr_grow_fast (h, datum)) - -# define obstack_int_grow(h,datum) \ -( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ - obstack_int_grow_fast (h, datum)) - -# define obstack_ptr_grow_fast(h,aptr) \ - (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr)) - -# define obstack_int_grow_fast(h,aint) \ - (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint)) - -# define obstack_blank(h,length) \ -( (h)->temp.tempint = (length), \ - (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ - obstack_blank_fast (h, (h)->temp.tempint)) - -# define obstack_alloc(h,length) \ - (obstack_blank ((h), (length)), obstack_finish ((h))) - -# define obstack_copy(h,where,length) \ - (obstack_grow ((h), (where), (length)), obstack_finish ((h))) - -# define obstack_copy0(h,where,length) \ - (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) - -# define obstack_finish(h) \ -( ((h)->next_free == (h)->object_base \ - ? (((h)->maybe_empty_object = 1), 0) \ - : 0), \ - (h)->temp.tempptr = (h)->object_base, \ - (h)->next_free \ - = __PTR_ALIGN ((h)->object_base, (h)->next_free, \ - (h)->alignment_mask), \ - (((h)->next_free - (char *) (h)->chunk \ - > (h)->chunk_limit - (char *) (h)->chunk) \ - ? ((h)->next_free = (h)->chunk_limit) : 0), \ - (h)->object_base = (h)->next_free, \ - (h)->temp.tempptr) - -# define obstack_free(h,obj) \ -( (h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk, \ - ((((h)->temp.tempint > 0 \ - && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk)) \ - ? (int) ((h)->next_free = (h)->object_base \ - = (h)->temp.tempint + (char *) (h)->chunk) \ - : (((__obstack_free) ((h), (h)->temp.tempint + (char *) (h)->chunk), 0), 0))) - -#endif /* not __GNUC__ or not __STDC__ */ - -#ifdef __cplusplus -} /* C++ */ -#endif - -#endif /* obstack.h */ diff --git a/lib/offtostr.c b/lib/offtostr.c deleted file mode 100644 index 45196e2..0000000 --- a/lib/offtostr.c +++ /dev/null @@ -1,3 +0,0 @@ -#define inttostr offtostr -#define inttype off_t -#include "inttostr.c" diff --git a/lib/pathmax.h b/lib/pathmax.h deleted file mode 100644 index 6941e45..0000000 --- a/lib/pathmax.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Define PATH_MAX somehow. Requires sys/types.h. - Copyright (C) 1992, 1999, 2001, 2003, 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _PATHMAX_H -# define _PATHMAX_H - -# include <unistd.h> - -# include <limits.h> - -# ifndef _POSIX_PATH_MAX -# define _POSIX_PATH_MAX 256 -# endif - -# if !defined PATH_MAX && defined _PC_PATH_MAX -# define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 \ - : pathconf ("/", _PC_PATH_MAX)) -# endif - -/* Don't include sys/param.h if it already has been. */ -# if defined HAVE_SYS_PARAM_H && !defined PATH_MAX && !defined MAXPATHLEN -# include <sys/param.h> -# endif - -# if !defined PATH_MAX && defined MAXPATHLEN -# define PATH_MAX MAXPATHLEN -# endif - -# ifndef PATH_MAX -# define PATH_MAX _POSIX_PATH_MAX -# endif - -#endif /* _PATHMAX_H */ diff --git a/lib/quotearg.c b/lib/quotearg.c deleted file mode 100644 index c9e89bf..0000000 --- a/lib/quotearg.c +++ /dev/null @@ -1,698 +0,0 @@ -/* quotearg.c - quote arguments for output - - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert <eggert@twinsun.com> */ - -#include <config.h> - -#include "quotearg.h" - -#include "xalloc.h" - -#include <ctype.h> -#include <errno.h> -#include <limits.h> -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> -#include <wchar.h> - -#include "gettext.h" -#define _(msgid) gettext (msgid) -#define N_(msgid) msgid - -#if !HAVE_MBRTOWC -/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the - other macros are defined only for documentation and to satisfy C - syntax. */ -# undef MB_CUR_MAX -# define MB_CUR_MAX 1 -# undef mbstate_t -# define mbstate_t int -# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) -# define iswprint(wc) isprint ((unsigned char) (wc)) -# undef HAVE_MBSINIT -#endif - -#if !defined mbsinit && !HAVE_MBSINIT -# define mbsinit(ps) 1 -#endif - -#include <wctype.h> - -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -#define INT_BITS (sizeof (int) * CHAR_BIT) - -struct quoting_options -{ - /* Basic quoting style. */ - enum quoting_style style; - - /* Quote the characters indicated by this bit vector even if the - quoting style would not normally require them to be quoted. */ - unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; -}; - -/* Names of quoting styles. */ -char const *const quoting_style_args[] = -{ - "literal", - "shell", - "shell-always", - "c", - "escape", - "locale", - "clocale", - 0 -}; - -/* Correspondences to quoting style names. */ -enum quoting_style const quoting_style_vals[] = -{ - literal_quoting_style, - shell_quoting_style, - shell_always_quoting_style, - c_quoting_style, - escape_quoting_style, - locale_quoting_style, - clocale_quoting_style -}; - -/* The default quoting options. */ -static struct quoting_options default_quoting_options; - -/* Allocate a new set of quoting options, with contents initially identical - to O if O is not null, or to the default if O is null. - It is the caller's responsibility to free the result. */ -struct quoting_options * -clone_quoting_options (struct quoting_options *o) -{ - int e = errno; - struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, - sizeof *o); - errno = e; - return p; -} - -/* Get the value of O's quoting style. If O is null, use the default. */ -enum quoting_style -get_quoting_style (struct quoting_options *o) -{ - return (o ? o : &default_quoting_options)->style; -} - -/* In O (or in the default if O is null), - set the value of the quoting style to S. */ -void -set_quoting_style (struct quoting_options *o, enum quoting_style s) -{ - (o ? o : &default_quoting_options)->style = s; -} - -/* In O (or in the default if O is null), - set the value of the quoting options for character C to I. - Return the old value. Currently, the only values defined for I are - 0 (the default) and 1 (which means to quote the character even if - it would not otherwise be quoted). */ -int -set_char_quoting (struct quoting_options *o, char c, int i) -{ - unsigned char uc = c; - unsigned int *p = - (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; - int shift = uc % INT_BITS; - int r = (*p >> shift) & 1; - *p ^= ((i & 1) ^ r) << shift; - return r; -} - -/* MSGID approximates a quotation mark. Return its translation if it - has one; otherwise, return either it or "\"", depending on S. */ -static char const * -gettext_quote (char const *msgid, enum quoting_style s) -{ - char const *translation = _(msgid); - if (translation == msgid && s == clocale_quoting_style) - translation = "\""; - return translation; -} - -/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of - argument ARG (of size ARGSIZE), using QUOTING_STYLE and the - non-quoting-style part of O to control quoting. - Terminate the output with a null character, and return the written - size of the output, not counting the terminating null. - If BUFFERSIZE is too small to store the output string, return the - value that would have been returned had BUFFERSIZE been large enough. - If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. - - This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, - ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting - style specified by O, and O may not be null. */ - -static size_t -quotearg_buffer_restyled (char *buffer, size_t buffersize, - char const *arg, size_t argsize, - enum quoting_style quoting_style, - struct quoting_options const *o) -{ - size_t i; - size_t len = 0; - char const *quote_string = 0; - size_t quote_string_len = 0; - bool backslash_escapes = false; - bool unibyte_locale = MB_CUR_MAX == 1; - -#define STORE(c) \ - do \ - { \ - if (len < buffersize) \ - buffer[len] = (c); \ - len++; \ - } \ - while (0) - - switch (quoting_style) - { - case c_quoting_style: - STORE ('"'); - backslash_escapes = true; - quote_string = "\""; - quote_string_len = 1; - break; - - case escape_quoting_style: - backslash_escapes = true; - break; - - case locale_quoting_style: - case clocale_quoting_style: - { - /* TRANSLATORS: - Get translations for open and closing quotation marks. - - The message catalog should translate "`" to a left - quotation mark suitable for the locale, and similarly for - "'". If the catalog has no translation, - locale_quoting_style quotes `like this', and - clocale_quoting_style quotes "like this". - - For example, an American English Unicode locale should - translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and - should translate "'" to U+201D (RIGHT DOUBLE QUOTATION - MARK). A British English Unicode locale should instead - translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and - U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. - - If you don't know what to put here, please see - <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> - and use glyphs suitable for your language. */ - - char const *left = gettext_quote (N_("`"), quoting_style); - char const *right = gettext_quote (N_("'"), quoting_style); - for (quote_string = left; *quote_string; quote_string++) - STORE (*quote_string); - backslash_escapes = true; - quote_string = right; - quote_string_len = strlen (quote_string); - } - break; - - case shell_always_quoting_style: - STORE ('\''); - quote_string = "'"; - quote_string_len = 1; - break; - - default: - break; - } - - for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) - { - unsigned char c; - unsigned char esc; - - if (backslash_escapes - && quote_string_len - && i + quote_string_len <= argsize - && memcmp (arg + i, quote_string, quote_string_len) == 0) - STORE ('\\'); - - c = arg[i]; - switch (c) - { - case '\0': - if (backslash_escapes) - { - STORE ('\\'); - STORE ('0'); - STORE ('0'); - c = '0'; - } - break; - - case '?': - switch (quoting_style) - { - case shell_quoting_style: - goto use_shell_always_quoting_style; - - case c_quoting_style: - if (i + 2 < argsize && arg[i + 1] == '?') - switch (arg[i + 2]) - { - case '!': case '\'': - case '(': case ')': case '-': case '/': - case '<': case '=': case '>': - /* Escape the second '?' in what would otherwise be - a trigraph. */ - c = arg[i + 2]; - i += 2; - STORE ('?'); - STORE ('\\'); - STORE ('?'); - break; - - default: - break; - } - break; - - default: - break; - } - break; - - case '\a': esc = 'a'; goto c_escape; - case '\b': esc = 'b'; goto c_escape; - case '\f': esc = 'f'; goto c_escape; - case '\n': esc = 'n'; goto c_and_shell_escape; - case '\r': esc = 'r'; goto c_and_shell_escape; - case '\t': esc = 't'; goto c_and_shell_escape; - case '\v': esc = 'v'; goto c_escape; - case '\\': esc = c; goto c_and_shell_escape; - - c_and_shell_escape: - if (quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; - c_escape: - if (backslash_escapes) - { - c = esc; - goto store_escape; - } - break; - - case '{': case '}': /* sometimes special if isolated */ - if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) - break; - /* Fall through. */ - case '#': case '~': - if (i != 0) - break; - /* Fall through. */ - case ' ': - case '!': /* special in bash */ - case '"': case '$': case '&': - case '(': case ')': case '*': case ';': - case '<': - case '=': /* sometimes special in 0th or (with "set -k") later args */ - case '>': case '[': - case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ - case '`': case '|': - /* A shell special character. In theory, '$' and '`' could - be the first bytes of multibyte characters, which means - we should check them with mbrtowc, but in practice this - doesn't happen so it's not worth worrying about. */ - if (quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; - break; - - case '\'': - switch (quoting_style) - { - case shell_quoting_style: - goto use_shell_always_quoting_style; - - case shell_always_quoting_style: - STORE ('\''); - STORE ('\\'); - STORE ('\''); - break; - - default: - break; - } - break; - - case '%': case '+': case ',': case '-': case '.': case '/': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': case ':': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': - case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': - case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - /* These characters don't cause problems, no matter what the - quoting style is. They cannot start multibyte sequences. */ - break; - - default: - /* If we have a multibyte sequence, copy it until we reach - its end, find an error, or come back to the initial shift - state. For C-like styles, if the sequence has - unprintable characters, escape the whole sequence, since - we can't easily escape single characters within it. */ - { - /* Length of multibyte sequence found so far. */ - size_t m; - - bool printable; - - if (unibyte_locale) - { - m = 1; - printable = isprint (c) != 0; - } - else - { - mbstate_t mbstate; - memset (&mbstate, 0, sizeof mbstate); - - m = 0; - printable = true; - if (argsize == SIZE_MAX) - argsize = strlen (arg); - - do - { - wchar_t w; - size_t bytes = mbrtowc (&w, &arg[i + m], - argsize - (i + m), &mbstate); - if (bytes == 0) - break; - else if (bytes == (size_t) -1) - { - printable = false; - break; - } - else if (bytes == (size_t) -2) - { - printable = false; - while (i + m < argsize && arg[i + m]) - m++; - break; - } - else - { - /* Work around a bug with older shells that "see" a '\' - that is really the 2nd byte of a multibyte character. - In practice the problem is limited to ASCII - chars >= '@' that are shell special chars. */ - if ('[' == 0x5b && quoting_style == shell_quoting_style) - { - size_t j; - for (j = 1; j < bytes; j++) - switch (arg[i + m + j]) - { - case '[': case '\\': case '^': - case '`': case '|': - goto use_shell_always_quoting_style; - - default: - break; - } - } - - if (! iswprint (w)) - printable = false; - m += bytes; - } - } - while (! mbsinit (&mbstate)); - } - - if (1 < m || (backslash_escapes && ! printable)) - { - /* Output a multibyte sequence, or an escaped - unprintable unibyte character. */ - size_t ilim = i + m; - - for (;;) - { - if (backslash_escapes && ! printable) - { - STORE ('\\'); - STORE ('0' + (c >> 6)); - STORE ('0' + ((c >> 3) & 7)); - c = '0' + (c & 7); - } - if (ilim <= i + 1) - break; - STORE (c); - c = arg[++i]; - } - - goto store_c; - } - } - } - - if (! (backslash_escapes - && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) - goto store_c; - - store_escape: - STORE ('\\'); - - store_c: - STORE (c); - } - - if (i == 0 && quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; - - if (quote_string) - for (; *quote_string; quote_string++) - STORE (*quote_string); - - if (len < buffersize) - buffer[len] = '\0'; - return len; - - use_shell_always_quoting_style: - return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, - shell_always_quoting_style, o); -} - -/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of - argument ARG (of size ARGSIZE), using O to control quoting. - If O is null, use the default. - Terminate the output with a null character, and return the written - size of the output, not counting the terminating null. - If BUFFERSIZE is too small to store the output string, return the - value that would have been returned had BUFFERSIZE been large enough. - If ARGSIZE is SIZE_MAX, use the string length of the argument for - ARGSIZE. */ -size_t -quotearg_buffer (char *buffer, size_t buffersize, - char const *arg, size_t argsize, - struct quoting_options const *o) -{ - struct quoting_options const *p = o ? o : &default_quoting_options; - int e = errno; - size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, - p->style, p); - errno = e; - return r; -} - -/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly - allocated storage containing the quoted string. */ -char * -quotearg_alloc (char const *arg, size_t argsize, - struct quoting_options const *o) -{ - int e = errno; - size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; - char *buf = xcharalloc (bufsize); - quotearg_buffer (buf, bufsize, arg, argsize, o); - errno = e; - return buf; -} - -/* A storage slot with size and pointer to a value. */ -struct slotvec -{ - size_t size; - char *val; -}; - -/* Preallocate a slot 0 buffer, so that the caller can always quote - one small component of a "memory exhausted" message in slot 0. */ -static char slot0[256]; -static unsigned int nslots = 1; -static struct slotvec slotvec0 = {sizeof slot0, slot0}; -static struct slotvec *slotvec = &slotvec0; - -void -quotearg_free (void) -{ - struct slotvec *sv = slotvec; - unsigned int i; - for (i = 1; i < nslots; i++) - free (sv[i].val); - if (sv[0].val != slot0) - { - free (sv[0].val); - slotvec0.size = sizeof slot0; - slotvec0.val = slot0; - } - if (sv != &slotvec0) - { - free (sv); - slotvec = &slotvec0; - } - nslots = 1; -} - -/* Use storage slot N to return a quoted version of argument ARG. - ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a - null-terminated string. - OPTIONS specifies the quoting options. - The returned value points to static storage that can be - reused by the next call to this function with the same value of N. - N must be nonnegative. N is deliberately declared with type "int" - to allow for future extensions (using negative values). */ -static char * -quotearg_n_options (int n, char const *arg, size_t argsize, - struct quoting_options const *options) -{ - int e = errno; - - unsigned int n0 = n; - struct slotvec *sv = slotvec; - - if (n < 0) - abort (); - - if (nslots <= n0) - { - /* FIXME: technically, the type of n1 should be `unsigned int', - but that evokes an unsuppressible warning from gcc-4.0.1 and - older. If gcc ever provides an option to suppress that warning, - revert to the original type, so that the test in xalloc_oversized - is once again performed only at compile time. */ - size_t n1 = n0 + 1; - bool preallocated = (sv == &slotvec0); - - if (xalloc_oversized (n1, sizeof *sv)) - xalloc_die (); - - slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); - if (preallocated) - *sv = slotvec0; - memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); - nslots = n1; - } - - { - size_t size = sv[n].size; - char *val = sv[n].val; - size_t qsize = quotearg_buffer (val, size, arg, argsize, options); - - if (size <= qsize) - { - sv[n].size = size = qsize + 1; - if (val != slot0) - free (val); - sv[n].val = val = xcharalloc (size); - quotearg_buffer (val, size, arg, argsize, options); - } - - errno = e; - return val; - } -} - -char * -quotearg_n (int n, char const *arg) -{ - return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); -} - -char * -quotearg (char const *arg) -{ - return quotearg_n (0, arg); -} - -/* Return quoting options for STYLE, with no extra quoting. */ -static struct quoting_options -quoting_options_from_style (enum quoting_style style) -{ - struct quoting_options o; - o.style = style; - memset (o.quote_these_too, 0, sizeof o.quote_these_too); - return o; -} - -char * -quotearg_n_style (int n, enum quoting_style s, char const *arg) -{ - struct quoting_options const o = quoting_options_from_style (s); - return quotearg_n_options (n, arg, SIZE_MAX, &o); -} - -char * -quotearg_n_style_mem (int n, enum quoting_style s, - char const *arg, size_t argsize) -{ - struct quoting_options const o = quoting_options_from_style (s); - return quotearg_n_options (n, arg, argsize, &o); -} - -char * -quotearg_style (enum quoting_style s, char const *arg) -{ - return quotearg_n_style (0, s, arg); -} - -char * -quotearg_char (char const *arg, char ch) -{ - struct quoting_options options; - options = default_quoting_options; - set_char_quoting (&options, ch, 1); - return quotearg_n_options (0, arg, SIZE_MAX, &options); -} - -char * -quotearg_colon (char const *arg) -{ - return quotearg_char (arg, ':'); -} diff --git a/lib/quotearg.h b/lib/quotearg.h deleted file mode 100644 index 4887df3..0000000 --- a/lib/quotearg.h +++ /dev/null @@ -1,140 +0,0 @@ -/* quotearg.h - quote arguments for output - - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert <eggert@twinsun.com> */ - -#ifndef QUOTEARG_H_ -# define QUOTEARG_H_ 1 - -# include <stddef.h> - -/* Basic quoting styles. */ -enum quoting_style - { - /* Output names as-is (ls --quoting-style=literal). */ - literal_quoting_style, - - /* Quote names for the shell if they contain shell metacharacters - or would cause ambiguous output (ls --quoting-style=shell). */ - shell_quoting_style, - - /* Quote names for the shell, even if they would normally not - require quoting (ls --quoting-style=shell-always). */ - shell_always_quoting_style, - - /* Quote names as for a C language string (ls --quoting-style=c). */ - c_quoting_style, - - /* Like c_quoting_style except omit the surrounding double-quote - characters (ls --quoting-style=escape). */ - escape_quoting_style, - - /* Like clocale_quoting_style, but quote `like this' instead of - "like this" in the default C locale (ls --quoting-style=locale). */ - locale_quoting_style, - - /* Like c_quoting_style except use quotation marks appropriate for - the locale (ls --quoting-style=clocale). */ - clocale_quoting_style - }; - -/* For now, --quoting-style=literal is the default, but this may change. */ -# ifndef DEFAULT_QUOTING_STYLE -# define DEFAULT_QUOTING_STYLE literal_quoting_style -# endif - -/* Names of quoting styles and their corresponding values. */ -extern char const *const quoting_style_args[]; -extern enum quoting_style const quoting_style_vals[]; - -struct quoting_options; - -/* The functions listed below set and use a hidden variable - that contains the default quoting style options. */ - -/* Allocate a new set of quoting options, with contents initially identical - to O if O is not null, or to the default if O is null. - It is the caller's responsibility to free the result. */ -struct quoting_options *clone_quoting_options (struct quoting_options *o); - -/* Get the value of O's quoting style. If O is null, use the default. */ -enum quoting_style get_quoting_style (struct quoting_options *o); - -/* In O (or in the default if O is null), - set the value of the quoting style to S. */ -void set_quoting_style (struct quoting_options *o, enum quoting_style s); - -/* In O (or in the default if O is null), - set the value of the quoting options for character C to I. - Return the old value. Currently, the only values defined for I are - 0 (the default) and 1 (which means to quote the character even if - it would not otherwise be quoted). */ -int set_char_quoting (struct quoting_options *o, char c, int i); - -/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of - argument ARG (of size ARGSIZE), using O to control quoting. - If O is null, use the default. - Terminate the output with a null character, and return the written - size of the output, not counting the terminating null. - If BUFFERSIZE is too small to store the output string, return the - value that would have been returned had BUFFERSIZE been large enough. - If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ -size_t quotearg_buffer (char *buffer, size_t buffersize, - char const *arg, size_t argsize, - struct quoting_options const *o); - -/* Like quotearg_buffer, except return the result in a newly allocated - buffer. It is the caller's responsibility to free the result. */ -char *quotearg_alloc (char const *arg, size_t argsize, - struct quoting_options const *o); - -/* Use storage slot N to return a quoted version of the string ARG. - Use the default quoting options. - The returned value points to static storage that can be - reused by the next call to this function with the same value of N. - N must be nonnegative. */ -char *quotearg_n (int n, char const *arg); - -/* Equivalent to quotearg_n (0, ARG). */ -char *quotearg (char const *arg); - -/* Use style S and storage slot N to return a quoted version of the string ARG. - This is like quotearg_n (N, ARG), except that it uses S with no other - options to specify the quoting method. */ -char *quotearg_n_style (int n, enum quoting_style s, char const *arg); - -/* Use style S and storage slot N to return a quoted version of the - argument ARG of size ARGSIZE. This is like quotearg_n_style - (N, S, ARG), except it can quote null bytes. */ -char *quotearg_n_style_mem (int n, enum quoting_style s, - char const *arg, size_t argsize); - -/* Equivalent to quotearg_n_style (0, S, ARG). */ -char *quotearg_style (enum quoting_style s, char const *arg); - -/* Like quotearg (ARG), except also quote any instances of CH. */ -char *quotearg_char (char const *arg, char ch); - -/* Equivalent to quotearg_char (ARG, ':'). */ -char *quotearg_colon (char const *arg); - -/* Free any dynamically allocated memory. */ -void quotearg_free (void); - -#endif /* !QUOTEARG_H_ */ diff --git a/lib/realloc.c b/lib/realloc.c deleted file mode 100644 index c1fe9e5..0000000 --- a/lib/realloc.c +++ /dev/null @@ -1,45 +0,0 @@ -/* realloc() function that is glibc compatible. - - Copyright (C) 1997, 2003, 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* written by Jim Meyering */ - -#include <config.h> -#undef realloc - -#include <stdlib.h> - -/* Change the size of an allocated block of memory P to N bytes, - with error checking. If N is zero, change it to 1. If P is NULL, - use malloc. */ - -void * -rpl_realloc (void *p, size_t n) -{ - if (n == 0) - { - n = 1; - - /* In theory realloc might fail, so don't rely on it to free. */ - free (p); - p = NULL; - } - - if (p == NULL) - return malloc (n); - return realloc (p, n); -} diff --git a/lib/regcomp.c b/lib/regcomp.c deleted file mode 100644 index 0209bb1..0000000 --- a/lib/regcomp.c +++ /dev/null @@ -1,3850 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, - size_t length, reg_syntax_t syntax); -static void re_compile_fastmap_iter (regex_t *bufp, - const re_dfastate_t *init_state, - char *fastmap); -static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); -#ifdef RE_ENABLE_I18N -static void free_charset (re_charset_t *cset); -#endif /* RE_ENABLE_I18N */ -static void free_workarea_compile (regex_t *preg); -static reg_errcode_t create_initial_state (re_dfa_t *dfa); -#ifdef RE_ENABLE_I18N -static void optimize_utf8 (re_dfa_t *dfa); -#endif -static reg_errcode_t analyze (regex_t *preg); -static reg_errcode_t preorder (bin_tree_t *root, - reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra); -static reg_errcode_t postorder (bin_tree_t *root, - reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra); -static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); -static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); -static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, - bin_tree_t *node); -static reg_errcode_t calc_first (void *extra, bin_tree_t *node); -static reg_errcode_t calc_next (void *extra, bin_tree_t *node); -static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); -static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint); -static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node, - unsigned int constraint); -static reg_errcode_t calc_eclosure (re_dfa_t *dfa); -static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, - Idx node, bool root); -static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); -static Idx fetch_number (re_string_t *input, re_token_t *token, - reg_syntax_t syntax); -static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax) internal_function; -static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - Idx nest, reg_errcode_t *err); -static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - Idx nest, reg_errcode_t *err); -static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - Idx nest, reg_errcode_t *err); -static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - Idx nest, reg_errcode_t *err); -static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, - re_dfa_t *dfa, re_token_t *token, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, - re_token_t *token, reg_syntax_t syntax, - reg_errcode_t *err); -static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token, int token_len, - re_dfa_t *dfa, - reg_syntax_t syntax, - bool accept_hyphen); -static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token); -#ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (bitset_t sbcset, - re_charset_t *mbcset, - Idx *equiv_class_alloc, - const unsigned char *name); -static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, - bitset_t sbcset, - re_charset_t *mbcset, - Idx *char_class_alloc, - const unsigned char *class_name, - reg_syntax_t syntax); -#else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (bitset_t sbcset, - const unsigned char *name); -static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, - bitset_t sbcset, - const unsigned char *class_name, - reg_syntax_t syntax); -#endif /* not RE_ENABLE_I18N */ -static bin_tree_t *build_charclass_op (re_dfa_t *dfa, - RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, - bool non_match, reg_errcode_t *err); -static bin_tree_t *create_tree (re_dfa_t *dfa, - bin_tree_t *left, bin_tree_t *right, - re_token_type_t type); -static bin_tree_t *create_token_tree (re_dfa_t *dfa, - bin_tree_t *left, bin_tree_t *right, - const re_token_t *token); -static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); -static void free_token (re_token_t *node); -static reg_errcode_t free_tree (void *extra, bin_tree_t *node); -static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ - -static const char __re_error_msgid[] = - { -#define REG_NOERROR_IDX 0 - gettext_noop ("Success") /* REG_NOERROR */ - "\0" -#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") - gettext_noop ("No match") /* REG_NOMATCH */ - "\0" -#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") - gettext_noop ("Invalid regular expression") /* REG_BADPAT */ - "\0" -#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") - gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ - "\0" -#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") - gettext_noop ("Invalid character class name") /* REG_ECTYPE */ - "\0" -#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") - gettext_noop ("Trailing backslash") /* REG_EESCAPE */ - "\0" -#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") - gettext_noop ("Invalid back reference") /* REG_ESUBREG */ - "\0" -#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ - "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") - gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ - "\0" -#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") - gettext_noop ("Unmatched \\{") /* REG_EBRACE */ - "\0" -#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") - gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ - "\0" -#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") - gettext_noop ("Invalid range end") /* REG_ERANGE */ - "\0" -#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") - gettext_noop ("Memory exhausted") /* REG_ESPACE */ - "\0" -#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") - gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ - "\0" -#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") - gettext_noop ("Premature end of regular expression") /* REG_EEND */ - "\0" -#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") - gettext_noop ("Regular expression too big") /* REG_ESIZE */ - "\0" -#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") - gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ - }; - -static const size_t __re_error_msgid_idx[] = - { - REG_NOERROR_IDX, - REG_NOMATCH_IDX, - REG_BADPAT_IDX, - REG_ECOLLATE_IDX, - REG_ECTYPE_IDX, - REG_EESCAPE_IDX, - REG_ESUBREG_IDX, - REG_EBRACK_IDX, - REG_EPAREN_IDX, - REG_EBRACE_IDX, - REG_BADBR_IDX, - REG_ERANGE_IDX, - REG_ESPACE_IDX, - REG_BADRPT_IDX, - REG_EEND_IDX, - REG_ESIZE_IDX, - REG_ERPAREN_IDX - }; - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length LENGTH) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. */ - -#ifdef _LIBC -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; -#else /* size_t might promote */ -const char * -re_compile_pattern (const char *pattern, size_t length, - struct re_pattern_buffer *bufp) -#endif -{ - reg_errcode_t ret; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub, unless RE_NO_SUB is set. */ - bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = re_compile_internal (bufp, pattern, length, re_syntax_options); - - if (!ret) - return NULL; - return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} -#ifdef _LIBC -weak_alias (__re_compile_pattern, re_compile_pattern) -#endif - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} -#ifdef _LIBC -weak_alias (__re_set_syntax, re_set_syntax) -#endif - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - char *fastmap = bufp->fastmap; - - memset (fastmap, '\0', sizeof (char) * SBC_MAX); - re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); - if (dfa->init_state != dfa->init_state_word) - re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); - if (dfa->init_state != dfa->init_state_nl) - re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); - if (dfa->init_state != dfa->init_state_begbuf) - re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); - bufp->fastmap_accurate = 1; - return 0; -} -#ifdef _LIBC -weak_alias (__re_compile_fastmap, re_compile_fastmap) -#endif - -static inline void -__attribute ((always_inline)) -re_set_fastmap (char *fastmap, bool icase, int ch) -{ - fastmap[ch] = 1; - if (icase) - fastmap[tolower (ch)] = 1; -} - -/* Helper function for re_compile_fastmap. - Compile fastmap for the initial_state INIT_STATE. */ - -static void -re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, - char *fastmap) -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - Idx node_cnt; - bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); - for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) - { - Idx node = init_state->nodes.elems[node_cnt]; - re_token_type_t type = dfa->nodes[node].type; - - if (type == CHARACTER) - { - re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); -#ifdef RE_ENABLE_I18N - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) - { - unsigned char buf[MB_LEN_MAX]; - unsigned char *p; - wchar_t wc; - mbstate_t state; - - p = buf; - *p++ = dfa->nodes[node].opr.c; - while (++node < dfa->nodes_len - && dfa->nodes[node].type == CHARACTER - && dfa->nodes[node].mb_partial) - *p++ = dfa->nodes[node].opr.c; - memset (&state, '\0', sizeof (state)); - if (mbrtowc (&wc, (const char *) buf, p - buf, - &state) == p - buf - && (__wcrtomb ((char *) buf, towlower (wc), &state) - != (size_t) -1)) - re_set_fastmap (fastmap, false, buf[0]); - } -#endif - } - else if (type == SIMPLE_BRACKET) - { - int i, ch; - for (i = 0, ch = 0; i < BITSET_WORDS; ++i) - { - int j; - bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; - for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) - if (w & ((bitset_word_t) 1 << j)) - re_set_fastmap (fastmap, icase, ch); - } - } -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET) - { - Idx i; - re_charset_t *cset = dfa->nodes[node].opr.mbcset; - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes - || cset->nranges || cset->nchar_classes) - { -# ifdef _LIBC - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) - { - /* In this case we want to catch the bytes which are - the first byte of any collation elements. - e.g. In da_DK, we want to catch 'a' since "aa" - is a valid collation element, and don't catch - 'b' since 'b' is the only collation element - which starts from 'b'. */ - const int32_t *table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0; i < SBC_MAX; ++i) - if (table[i] < 0) - re_set_fastmap (fastmap, icase, i); - } -# else - if (dfa->mb_cur_max > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - re_set_fastmap (fastmap, icase, i); -# endif /* not _LIBC */ - } - for (i = 0; i < cset->nmbchars; ++i) - { - char buf[256]; - mbstate_t state; - memset (&state, '\0', sizeof (state)); - if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) - { - if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) - != (size_t) -1) - re_set_fastmap (fastmap, false, *(unsigned char *) buf); - } - } - } -#endif /* RE_ENABLE_I18N */ - else if (type == OP_PERIOD -#ifdef RE_ENABLE_I18N - || type == OP_UTF8_PERIOD -#endif /* RE_ENABLE_I18N */ - || type == END_OF_RE) - { - memset (fastmap, '\1', sizeof (char) * SBC_MAX); - if (type == END_OF_RE) - bufp->can_be_null = 1; - return; - } - } -} - -/* Entry point for POSIX code. */ -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *_Restrict_ preg; - const char *_Restrict_ pattern; - int cflags; -{ - reg_errcode_t ret; - reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED - : RE_SYNTAX_POSIX_BASIC); - - preg->buffer = NULL; - preg->allocated = 0; - preg->used = 0; - - /* Try to allocate space for the fastmap. */ - preg->fastmap = re_malloc (char, SBC_MAX); - if (BE (preg->fastmap == NULL, 0)) - return REG_ESPACE; - - syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - preg->no_sub = !!(cflags & REG_NOSUB); - preg->translate = NULL; - - ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) - ret = REG_EPAREN; - - /* We have already checked preg->fastmap != NULL. */ - if (BE (ret == REG_NOERROR, 1)) - /* Compute the fastmap now, since regexec cannot modify the pattern - buffer. This function never fails in this implementation. */ - (void) re_compile_fastmap (preg); - else - { - /* Some error occurred while compiling the expression. */ - re_free (preg->fastmap); - preg->fastmap = NULL; - } - - return (int) ret; -} -#ifdef _LIBC -weak_alias (__regcomp, regcomp) -#endif - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -#ifdef _LIBC -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *_Restrict_ preg; - char *_Restrict_ errbuf; - size_t errbuf_size; -#else /* size_t might promote */ -size_t -regerror (int errcode, const regex_t *_Restrict_ preg, - char *_Restrict_ errbuf, size_t errbuf_size) -#endif -{ - const char *msg; - size_t msg_size; - - if (BE (errcode < 0 - || errcode >= (int) (sizeof (__re_error_msgid_idx) - / sizeof (__re_error_msgid_idx[0])), 0)) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (BE (errbuf_size != 0, 1)) - { - size_t cpy_size = msg_size; - if (BE (msg_size > errbuf_size, 0)) - { - cpy_size = errbuf_size - 1; - errbuf[cpy_size] = '\0'; - } - memcpy (errbuf, msg, cpy_size); - } - - return msg_size; -} -#ifdef _LIBC -weak_alias (__regerror, regerror) -#endif - - -#ifdef RE_ENABLE_I18N -/* This static array is used for the map to single-byte characters when - UTF-8 is used. Otherwise we would allocate memory just to initialize - it the same all the time. UTF-8 is the preferred encoding so this is - a worthwhile optimization. */ -static const bitset_t utf8_sb_map = -{ - /* Set the first 128 bits. */ -# if 4 * BITSET_WORD_BITS < ASCII_CHARS -# error "bitset_word_t is narrower than 32 bits" -# elif 3 * BITSET_WORD_BITS < ASCII_CHARS - BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, -# elif 2 * BITSET_WORD_BITS < ASCII_CHARS - BITSET_WORD_MAX, BITSET_WORD_MAX, -# elif 1 * BITSET_WORD_BITS < ASCII_CHARS - BITSET_WORD_MAX, -# endif - (BITSET_WORD_MAX - >> (SBC_MAX % BITSET_WORD_BITS == 0 - ? 0 - : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) -}; -#endif - - -static void -free_dfa_content (re_dfa_t *dfa) -{ - Idx i, j; - - if (dfa->nodes) - for (i = 0; i < dfa->nodes_len; ++i) - free_token (dfa->nodes + i); - re_free (dfa->nexts); - for (i = 0; i < dfa->nodes_len; ++i) - { - if (dfa->eclosures != NULL) - re_node_set_free (dfa->eclosures + i); - if (dfa->inveclosures != NULL) - re_node_set_free (dfa->inveclosures + i); - if (dfa->edests != NULL) - re_node_set_free (dfa->edests + i); - } - re_free (dfa->edests); - re_free (dfa->eclosures); - re_free (dfa->inveclosures); - re_free (dfa->nodes); - - if (dfa->state_table) - for (i = 0; i <= dfa->state_hash_mask; ++i) - { - struct re_state_table_entry *entry = dfa->state_table + i; - for (j = 0; j < entry->num; ++j) - { - re_dfastate_t *state = entry->array[j]; - free_state (state); - } - re_free (entry->array); - } - re_free (dfa->state_table); -#ifdef RE_ENABLE_I18N - if (dfa->sb_char != utf8_sb_map) - re_free (dfa->sb_char); -#endif - re_free (dfa->subexp_map); -#ifdef DEBUG - re_free (dfa->re_str); -#endif - - re_free (dfa); -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - if (BE (dfa != NULL, 1)) - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - - re_free (preg->fastmap); - preg->fastmap = NULL; - - re_free (preg->translate); - preg->translate = NULL; -} -#ifdef _LIBC -weak_alias (__regfree, regfree) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -# ifdef _LIBC -/* Make these definitions weak in libc, so POSIX programs can redefine - these names if they don't use our functions, and still use - regcomp/regexec above without link errors. */ -weak_function -# endif -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - char *fastmap; - - if (!s) - { - if (!re_comp_buf.buffer) - return gettext ("No previous regular expression"); - return 0; - } - - if (re_comp_buf.buffer) - { - fastmap = re_comp_buf.fastmap; - re_comp_buf.fastmap = NULL; - __regfree (&re_comp_buf); - memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); - re_comp_buf.fastmap = fastmap; - } - - if (re_comp_buf.fastmap == NULL) - { - re_comp_buf.fastmap = (char *) malloc (SBC_MAX); - if (re_comp_buf.fastmap == NULL) - return (char *) gettext (__re_error_msgid - + __re_error_msgid_idx[(int) REG_ESPACE]); - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); - - if (!ret) - return NULL; - - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} - -#ifdef _LIBC -libc_freeres_fn (free_mem) -{ - __regfree (&re_comp_buf); -} -#endif - -#endif /* _REGEX_RE_COMP */ - -/* Internal entry point. - Compile the regular expression PATTERN, whose length is LENGTH. - SYNTAX indicate regular expression's syntax. */ - -static reg_errcode_t -re_compile_internal (regex_t *preg, const char * pattern, size_t length, - reg_syntax_t syntax) -{ - reg_errcode_t err = REG_NOERROR; - re_dfa_t *dfa; - re_string_t regexp; - - /* Initialize the pattern buffer. */ - preg->fastmap_accurate = 0; - preg->syntax = syntax; - preg->not_bol = preg->not_eol = 0; - preg->used = 0; - preg->re_nsub = 0; - preg->can_be_null = 0; - preg->regs_allocated = REGS_UNALLOCATED; - - /* Initialize the dfa. */ - dfa = (re_dfa_t *) preg->buffer; - if (BE (preg->allocated < sizeof (re_dfa_t), 0)) - { - /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. If ->buffer is NULL this - is a simple allocation. */ - dfa = re_realloc (preg->buffer, re_dfa_t, 1); - if (dfa == NULL) - return REG_ESPACE; - preg->allocated = sizeof (re_dfa_t); - preg->buffer = (unsigned char *) dfa; - } - preg->used = sizeof (re_dfa_t); - - err = init_dfa (dfa, length); - if (BE (err != REG_NOERROR, 0)) - { - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } -#ifdef DEBUG - /* Note: length+1 will not overflow since it is checked in init_dfa. */ - dfa->re_str = re_malloc (char, length + 1); - strncpy (dfa->re_str, pattern, length + 1); -#endif - - __libc_lock_init (dfa->lock); - - err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE, dfa); - if (BE (err != REG_NOERROR, 0)) - { - re_compile_internal_free_return: - free_workarea_compile (preg); - re_string_destruct (®exp); - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } - - /* Parse the regular expression, and build a structure tree. */ - preg->re_nsub = 0; - dfa->str_tree = parse (®exp, preg, syntax, &err); - if (BE (dfa->str_tree == NULL, 0)) - goto re_compile_internal_free_return; - - /* Analyze the tree and create the nfa. */ - err = analyze (preg); - if (BE (err != REG_NOERROR, 0)) - goto re_compile_internal_free_return; - -#ifdef RE_ENABLE_I18N - /* If possible, do searching in single byte encoding to speed things up. */ - if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) - optimize_utf8 (dfa); -#endif - - /* Then create the initial state of the dfa. */ - err = create_initial_state (dfa); - - /* Release work areas. */ - free_workarea_compile (preg); - re_string_destruct (®exp); - - if (BE (err != REG_NOERROR, 0)) - { - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - } - - return err; -} - -/* Initialize DFA. We use the length of the regular expression PAT_LEN - as the initial length of some arrays. */ - -static reg_errcode_t -init_dfa (re_dfa_t *dfa, size_t pat_len) -{ - __re_size_t table_size; -#ifndef _LIBC - char *codeset_name; -#endif -#ifdef RE_ENABLE_I18N - size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); -#else - size_t max_i18n_object_size = 0; -#endif - size_t max_object_size = - MAX (sizeof (struct re_state_table_entry), - MAX (sizeof (re_token_t), - MAX (sizeof (re_node_set), - MAX (sizeof (regmatch_t), - max_i18n_object_size)))); - - memset (dfa, '\0', sizeof (re_dfa_t)); - - /* Force allocation of str_tree_storage the first time. */ - dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; - - /* Avoid overflows. The extra "/ 2" is for the table_size doubling - calculation below, and for similar doubling calculations - elsewhere. And it's <= rather than <, because some of the - doubling calculations add 1 afterwards. */ - if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0)) - return REG_ESPACE; - - dfa->nodes_alloc = pat_len + 1; - dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); - - /* table_size = 2 ^ ceil(log pat_len) */ - for (table_size = 1; ; table_size <<= 1) - if (table_size > pat_len) - break; - - dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); - dfa->state_hash_mask = table_size - 1; - - dfa->mb_cur_max = MB_CUR_MAX; -#ifdef _LIBC - if (dfa->mb_cur_max == 6 - && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) - dfa->is_utf8 = 1; - dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) - != 0); -#else -# ifdef HAVE_LANGINFO_CODESET - codeset_name = nl_langinfo (CODESET); -# else - codeset_name = getenv ("LC_ALL"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LC_CTYPE"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LANG"); - if (codeset_name == NULL) - codeset_name = ""; - else if (strchr (codeset_name, '.') != NULL) - codeset_name = strchr (codeset_name, '.') + 1; -# endif - - if (strcasecmp (codeset_name, "UTF-8") == 0 - || strcasecmp (codeset_name, "UTF8") == 0) - dfa->is_utf8 = 1; - - /* We check exhaustively in the loop below if this charset is a - superset of ASCII. */ - dfa->map_notascii = 0; -#endif - -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - if (dfa->is_utf8) - dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; - else - { - int i, j, ch; - - dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); - if (BE (dfa->sb_char == NULL, 0)) - return REG_ESPACE; - - /* Set the bits corresponding to single byte chars. */ - for (i = 0, ch = 0; i < BITSET_WORDS; ++i) - for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) - { - wint_t wch = __btowc (ch); - if (wch != WEOF) - dfa->sb_char[i] |= (bitset_word_t) 1 << j; -# ifndef _LIBC - if (isascii (ch) && wch != ch) - dfa->map_notascii = 1; -# endif - } - } - } -#endif - - if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) - return REG_ESPACE; - return REG_NOERROR; -} - -/* Initialize WORD_CHAR table, which indicate which character is - "word". In this case "word" means that it is the word construction - character used by some operators like "\<", "\>", etc. */ - -static void -internal_function -init_word_char (re_dfa_t *dfa) -{ - int i, j, ch; - dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_WORDS; ++i) - for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) - if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= (bitset_word_t) 1 << j; -} - -/* Free the work area which are only used while compiling. */ - -static void -free_workarea_compile (regex_t *preg) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_storage_t *storage, *next; - for (storage = dfa->str_tree_storage; storage; storage = next) - { - next = storage->next; - re_free (storage); - } - dfa->str_tree_storage = NULL; - dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; - dfa->str_tree = NULL; - re_free (dfa->org_indices); - dfa->org_indices = NULL; -} - -/* Create initial states for all contexts. */ - -static reg_errcode_t -create_initial_state (re_dfa_t *dfa) -{ - Idx first, i; - reg_errcode_t err; - re_node_set init_nodes; - - /* Initial states have the epsilon closure of the node which is - the first node of the regular expression. */ - first = dfa->str_tree->first->node_idx; - dfa->init_node = first; - err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* The back-references which are in initial states can epsilon transit, - since in this case all of the subexpressions can be null. - Then we add epsilon closures of the nodes which are the next nodes of - the back-references. */ - if (dfa->nbackref > 0) - for (i = 0; i < init_nodes.nelem; ++i) - { - Idx node_idx = init_nodes.elems[i]; - re_token_type_t type = dfa->nodes[node_idx].type; - - Idx clexp_idx; - if (type != OP_BACK_REF) - continue; - for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) - { - re_token_t *clexp_node; - clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; - if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) - break; - } - if (clexp_idx == init_nodes.nelem) - continue; - - if (type == OP_BACK_REF) - { - Idx dest_idx = dfa->edests[node_idx].elems[0]; - if (!re_node_set_contains (&init_nodes, dest_idx)) - { - re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); - i = 0; - } - } - } - - /* It must be the first time to invoke acquire_state. */ - dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); - /* We don't check ERR here, since the initial state must not be NULL. */ - if (BE (dfa->init_state == NULL, 0)) - return err; - if (dfa->init_state->has_constraint) - { - dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_WORD); - dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_NEWLINE); - dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, - &init_nodes, - CONTEXT_NEWLINE - | CONTEXT_BEGBUF); - if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return err; - } - else - dfa->init_state_word = dfa->init_state_nl - = dfa->init_state_begbuf = dfa->init_state; - - re_node_set_free (&init_nodes); - return REG_NOERROR; -} - -#ifdef RE_ENABLE_I18N -/* If it is possible to do searching in single byte encoding instead of UTF-8 - to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change - DFA nodes where needed. */ - -static void -optimize_utf8 (re_dfa_t *dfa) -{ - Idx node; - int i; - bool mb_chars = false; - bool has_period = false; - - for (node = 0; node < dfa->nodes_len; ++node) - switch (dfa->nodes[node].type) - { - case CHARACTER: - if (dfa->nodes[node].opr.c >= ASCII_CHARS) - mb_chars = true; - break; - case ANCHOR: - switch (dfa->nodes[node].opr.idx) - { - case LINE_FIRST: - case LINE_LAST: - case BUF_FIRST: - case BUF_LAST: - break; - default: - /* Word anchors etc. cannot be handled. */ - return; - } - break; - case OP_PERIOD: - has_period = true; - break; - case OP_BACK_REF: - case OP_ALT: - case END_OF_RE: - case OP_DUP_ASTERISK: - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: - break; - case COMPLEX_BRACKET: - return; - case SIMPLE_BRACKET: - /* Just double check. */ - { - int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0 - ? 0 - : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS); - for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) - { - if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0) - return; - rshift = 0; - } - } - break; - default: - abort (); - } - - if (mb_chars || has_period) - for (node = 0; node < dfa->nodes_len; ++node) - { - if (dfa->nodes[node].type == CHARACTER - && dfa->nodes[node].opr.c >= ASCII_CHARS) - dfa->nodes[node].mb_partial = 0; - else if (dfa->nodes[node].type == OP_PERIOD) - dfa->nodes[node].type = OP_UTF8_PERIOD; - } - - /* The search can be in single byte locale. */ - dfa->mb_cur_max = 1; - dfa->is_utf8 = 0; - dfa->has_mb_node = dfa->nbackref > 0 || has_period; -} -#endif - -/* Analyze the structure tree, and calculate "first", "next", "edest", - "eclosure", and "inveclosure". */ - -static reg_errcode_t -analyze (regex_t *preg) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - reg_errcode_t ret; - - /* Allocate arrays. */ - dfa->nexts = re_malloc (Idx, dfa->nodes_alloc); - dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); - dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); - dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); - if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL - || dfa->eclosures == NULL, 0)) - return REG_ESPACE; - - dfa->subexp_map = re_malloc (Idx, preg->re_nsub); - if (dfa->subexp_map != NULL) - { - Idx i; - for (i = 0; i < preg->re_nsub; i++) - dfa->subexp_map[i] = i; - preorder (dfa->str_tree, optimize_subexps, dfa); - for (i = 0; i < preg->re_nsub; i++) - if (dfa->subexp_map[i] != i) - break; - if (i == preg->re_nsub) - { - free (dfa->subexp_map); - dfa->subexp_map = NULL; - } - } - - ret = postorder (dfa->str_tree, lower_subexps, preg); - if (BE (ret != REG_NOERROR, 0)) - return ret; - ret = postorder (dfa->str_tree, calc_first, dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - preorder (dfa->str_tree, calc_next, dfa); - ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - ret = calc_eclosure (dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - /* We only need this during the prune_impossible_nodes pass in regexec.c; - skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ - if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) - || dfa->nbackref) - { - dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); - if (BE (dfa->inveclosures == NULL, 0)) - return REG_ESPACE; - ret = calc_inveclosure (dfa); - } - - return ret; -} - -/* Our parse trees are very unbalanced, so we cannot use a stack to - implement parse tree visits. Instead, we use parent pointers and - some hairy code in these two functions. */ -static reg_errcode_t -postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra) -{ - bin_tree_t *node, *prev; - - for (node = root; ; ) - { - /* Descend down the tree, preferably to the left (or to the right - if that's the only child). */ - while (node->left || node->right) - if (node->left) - node = node->left; - else - node = node->right; - - do - { - reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) - return err; - if (node->parent == NULL) - return REG_NOERROR; - prev = node; - node = node->parent; - } - /* Go up while we have a node that is reached from the right. */ - while (node->right == prev || node->right == NULL); - node = node->right; - } -} - -static reg_errcode_t -preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra) -{ - bin_tree_t *node; - - for (node = root; ; ) - { - reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* Go to the left node, or up and to the right. */ - if (node->left) - node = node->left; - else - { - bin_tree_t *prev = NULL; - while (node->right == prev || node->right == NULL) - { - prev = node; - node = node->parent; - if (!node) - return REG_NOERROR; - } - node = node->right; - } - } -} - -/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell - re_search_internal to map the inner one's opr.idx to this one's. Adjust - backreferences as well. Requires a preorder visit. */ -static reg_errcode_t -optimize_subexps (void *extra, bin_tree_t *node) -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - - if (node->token.type == OP_BACK_REF && dfa->subexp_map) - { - int idx = node->token.opr.idx; - node->token.opr.idx = dfa->subexp_map[idx]; - dfa->used_bkref_map |= 1 << node->token.opr.idx; - } - - else if (node->token.type == SUBEXP - && node->left && node->left->token.type == SUBEXP) - { - Idx other_idx = node->left->token.opr.idx; - - node->left = node->left->left; - if (node->left) - node->left->parent = node; - - dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < BITSET_WORD_BITS) - dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); - } - - return REG_NOERROR; -} - -/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation - of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ -static reg_errcode_t -lower_subexps (void *extra, bin_tree_t *node) -{ - regex_t *preg = (regex_t *) extra; - reg_errcode_t err = REG_NOERROR; - - if (node->left && node->left->token.type == SUBEXP) - { - node->left = lower_subexp (&err, preg, node->left); - if (node->left) - node->left->parent = node; - } - if (node->right && node->right->token.type == SUBEXP) - { - node->right = lower_subexp (&err, preg, node->right); - if (node->right) - node->right->parent = node; - } - - return err; -} - -static bin_tree_t * -lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *body = node->left; - bin_tree_t *op, *cls, *tree1, *tree; - - if (preg->no_sub - /* We do not optimize empty subexpressions, because otherwise we may - have bad CONCAT nodes with NULL children. This is obviously not - very common, so we do not lose much. An example that triggers - this case is the sed "script" /\(\)/x. */ - && node->left != NULL - && (node->token.opr.idx >= BITSET_WORD_BITS - || !(dfa->used_bkref_map - & ((bitset_word_t) 1 << node->token.opr.idx)))) - return node->left; - - /* Convert the SUBEXP node to the concatenation of an - OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ - op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); - cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); - tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; - tree = create_tree (dfa, op, tree1, CONCAT); - if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - - op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; - op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; - return tree; -} - -/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton - nodes. Requires a postorder visit. */ -static reg_errcode_t -calc_first (void *extra, bin_tree_t *node) -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - if (node->token.type == CONCAT) - { - node->first = node->left->first; - node->node_idx = node->left->node_idx; - } - else - { - node->first = node; - node->node_idx = re_dfa_add_node (dfa, node->token); - if (BE (node->node_idx == REG_MISSING, 0)) - return REG_ESPACE; - } - return REG_NOERROR; -} - -/* Pass 2: compute NEXT on the tree. Preorder visit. */ -static reg_errcode_t -calc_next (void *extra, bin_tree_t *node) -{ - switch (node->token.type) - { - case OP_DUP_ASTERISK: - node->left->next = node; - break; - case CONCAT: - node->left->next = node->right->first; - node->right->next = node->next; - break; - default: - if (node->left) - node->left->next = node->next; - if (node->right) - node->right->next = node->next; - break; - } - return REG_NOERROR; -} - -/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ -static reg_errcode_t -link_nfa_nodes (void *extra, bin_tree_t *node) -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - Idx idx = node->node_idx; - reg_errcode_t err = REG_NOERROR; - - switch (node->token.type) - { - case CONCAT: - break; - - case END_OF_RE: - assert (node->next == NULL); - break; - - case OP_DUP_ASTERISK: - case OP_ALT: - { - Idx left, right; - dfa->has_plural_match = 1; - if (node->left != NULL) - left = node->left->first->node_idx; - else - left = node->next->node_idx; - if (node->right != NULL) - right = node->right->first->node_idx; - else - right = node->next->node_idx; - assert (REG_VALID_INDEX (left)); - assert (REG_VALID_INDEX (right)); - err = re_node_set_init_2 (dfa->edests + idx, left, right); - } - break; - - case ANCHOR: - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: - err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); - break; - - case OP_BACK_REF: - dfa->nexts[idx] = node->next->node_idx; - if (node->token.type == OP_BACK_REF) - re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); - break; - - default: - assert (!IS_EPSILON_NODE (node->token.type)); - dfa->nexts[idx] = node->next->node_idx; - break; - } - - return err; -} - -/* Duplicate the epsilon closure of the node ROOT_NODE. - Note that duplicated nodes have constraint INIT_CONSTRAINT in addition - to their own constraint. */ - -static reg_errcode_t -internal_function -duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, - Idx root_node, unsigned int init_constraint) -{ - Idx org_node, clone_node; - bool ok; - unsigned int constraint = init_constraint; - for (org_node = top_org_node, clone_node = top_clone_node;;) - { - Idx org_dest, clone_dest; - if (dfa->nodes[org_node].type == OP_BACK_REF) - { - /* If the back reference epsilon-transit, its destination must - also have the constraint. Then duplicate the epsilon closure - of the destination of the back reference, and store it in - edests of the back reference. */ - org_dest = dfa->nexts[org_node]; - re_node_set_empty (dfa->edests + clone_node); - clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) - return REG_ESPACE; - dfa->nexts[clone_node] = dfa->nexts[org_node]; - ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - } - else if (dfa->edests[org_node].nelem == 0) - { - /* In case of the node can't epsilon-transit, don't duplicate the - destination and store the original destination as the - destination of the node. */ - dfa->nexts[clone_node] = dfa->nexts[org_node]; - break; - } - else if (dfa->edests[org_node].nelem == 1) - { - /* In case of the node can epsilon-transit, and it has only one - destination. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - if (dfa->nodes[org_node].type == ANCHOR) - { - /* In case of the node has another constraint, append it. */ - if (org_node == root_node && clone_node != org_node) - { - /* ...but if the node is root_node itself, it means the - epsilon closure have a loop, then tie it to the - destination of the root_node. */ - ok = re_node_set_insert (dfa->edests + clone_node, org_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - break; - } - constraint |= dfa->nodes[org_node].opr.ctx_type; - } - clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) - return REG_ESPACE; - ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - } - else /* dfa->edests[org_node].nelem == 2 */ - { - /* In case of the node can epsilon-transit, and it has two - destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - /* Search for a duplicated node which satisfies the constraint. */ - clone_dest = search_duplicated_node (dfa, org_dest, constraint); - if (clone_dest == REG_MISSING) - { - /* There are no such a duplicated node, create a new one. */ - reg_errcode_t err; - clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) - return REG_ESPACE; - ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - err = duplicate_node_closure (dfa, org_dest, clone_dest, - root_node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - { - /* There are a duplicated node which satisfy the constraint, - use it to avoid infinite loop. */ - ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - } - - org_dest = dfa->edests[org_node].elems[1]; - clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) - return REG_ESPACE; - ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) - return REG_ESPACE; - } - org_node = org_dest; - clone_node = clone_dest; - } - return REG_NOERROR; -} - -/* Search for a node which is duplicated from the node ORG_NODE, and - satisfies the constraint CONSTRAINT. */ - -static Idx -search_duplicated_node (const re_dfa_t *dfa, Idx org_node, - unsigned int constraint) -{ - Idx idx; - for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) - { - if (org_node == dfa->org_indices[idx] - && constraint == dfa->nodes[idx].constraint) - return idx; /* Found. */ - } - return REG_MISSING; /* Not found. */ -} - -/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. - Return the index of the new node, or REG_MISSING if insufficient storage is - available. */ - -static Idx -duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) -{ - Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); - if (BE (dup_idx != REG_MISSING, 1)) - { - dfa->nodes[dup_idx].constraint = constraint; - if (dfa->nodes[org_idx].type == ANCHOR) - dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; - dfa->nodes[dup_idx].duplicated = 1; - - /* Store the index of the original node. */ - dfa->org_indices[dup_idx] = org_idx; - } - return dup_idx; -} - -static reg_errcode_t -calc_inveclosure (re_dfa_t *dfa) -{ - Idx src, idx; - bool ok; - for (idx = 0; idx < dfa->nodes_len; ++idx) - re_node_set_init_empty (dfa->inveclosures + idx); - - for (src = 0; src < dfa->nodes_len; ++src) - { - Idx *elems = dfa->eclosures[src].elems; - for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) - { - ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); - if (BE (! ok, 0)) - return REG_ESPACE; - } - } - - return REG_NOERROR; -} - -/* Calculate "eclosure" for all the node in DFA. */ - -static reg_errcode_t -calc_eclosure (re_dfa_t *dfa) -{ - Idx node_idx; - bool incomplete; -#ifdef DEBUG - assert (dfa->nodes_len > 0); -#endif - incomplete = false; - /* For each nodes, calculate epsilon closure. */ - for (node_idx = 0; ; ++node_idx) - { - reg_errcode_t err; - re_node_set eclosure_elem; - if (node_idx == dfa->nodes_len) - { - if (!incomplete) - break; - incomplete = false; - node_idx = 0; - } - -#ifdef DEBUG - assert (dfa->eclosures[node_idx].nelem != REG_MISSING); -#endif - - /* If we have already calculated, skip it. */ - if (dfa->eclosures[node_idx].nelem != 0) - continue; - /* Calculate epsilon closure of `node_idx'. */ - err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (dfa->eclosures[node_idx].nelem == 0) - { - incomplete = true; - re_node_set_free (&eclosure_elem); - } - } - return REG_NOERROR; -} - -/* Calculate epsilon closure of NODE. */ - -static reg_errcode_t -calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) -{ - reg_errcode_t err; - unsigned int constraint; - Idx i; - bool incomplete; - bool ok; - re_node_set eclosure; - incomplete = false; - err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* This indicates that we are calculating this node now. - We reference this value to avoid infinite loop. */ - dfa->eclosures[node].nelem = REG_MISSING; - - constraint = ((dfa->nodes[node].type == ANCHOR) - ? dfa->nodes[node].opr.ctx_type : 0); - /* If the current node has constraints, duplicate all nodes. - Since they must inherit the constraints. */ - if (constraint - && dfa->edests[node].nelem - && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) - { - err = duplicate_node_closure (dfa, node, node, node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Expand each epsilon destination nodes. */ - if (IS_EPSILON_NODE(dfa->nodes[node].type)) - for (i = 0; i < dfa->edests[node].nelem; ++i) - { - re_node_set eclosure_elem; - Idx edest = dfa->edests[node].elems[i]; - /* If calculating the epsilon closure of `edest' is in progress, - return intermediate result. */ - if (dfa->eclosures[edest].nelem == REG_MISSING) - { - incomplete = true; - continue; - } - /* If we haven't calculated the epsilon closure of `edest' yet, - calculate now. Otherwise use calculated epsilon closure. */ - if (dfa->eclosures[edest].nelem == 0) - { - err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - eclosure_elem = dfa->eclosures[edest]; - /* Merge the epsilon closure of `edest'. */ - re_node_set_merge (&eclosure, &eclosure_elem); - /* If the epsilon closure of `edest' is incomplete, - the epsilon closure of this node is also incomplete. */ - if (dfa->eclosures[edest].nelem == 0) - { - incomplete = true; - re_node_set_free (&eclosure_elem); - } - } - - /* Epsilon closures include itself. */ - ok = re_node_set_insert (&eclosure, node); - if (BE (! ok, 0)) - return REG_ESPACE; - if (incomplete && !root) - dfa->eclosures[node].nelem = 0; - else - dfa->eclosures[node] = eclosure; - *new_set = eclosure; - return REG_NOERROR; -} - -/* Functions for token which are used in the parser. */ - -/* Fetch a token from INPUT. - We must not use this function inside bracket expressions. */ - -static void -internal_function -fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) -{ - re_string_skip_bytes (input, peek_token (result, input, syntax)); -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function inside bracket expressions. */ - -static int -internal_function -peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) -{ - unsigned char c; - - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - - c = re_string_peek_byte (input, 0); - token->opr.c = c; - - token->word_char = 0; -#ifdef RE_ENABLE_I18N - token->mb_partial = 0; - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - token->mb_partial = 1; - return 1; - } -#endif - if (c == '\\') - { - unsigned char c2; - if (re_string_cur_idx (input) + 1 >= re_string_length (input)) - { - token->type = BACK_SLASH; - return 1; - } - - c2 = re_string_peek_byte_case (input, 1); - token->opr.c = c2; - token->type = CHARACTER; -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc = re_string_wchar_at (input, - re_string_cur_idx (input) + 1); - token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; - } - else -#endif - token->word_char = IS_WORD_CHAR (c2) != 0; - - switch (c2) - { - case '|': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (!(syntax & RE_NO_BK_REFS)) - { - token->type = OP_BACK_REF; - token->opr.idx = c2 - '1'; - } - break; - case '<': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_FIRST; - } - break; - case '>': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_LAST; - } - break; - case 'b': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_DELIM; - } - break; - case 'B': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = NOT_WORD_DELIM; - } - break; - case 'w': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_WORD; - break; - case 'W': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_NOTWORD; - break; - case 's': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_SPACE; - break; - case 'S': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_NOTSPACE; - break; - case '`': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = BUF_FIRST; - } - break; - case '\'': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = BUF_LAST; - } - break; - case '(': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_CLOSE_SUBEXP; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_CLOSE_DUP_NUM; - break; - default: - break; - } - return 2; - } - - token->type = CHARACTER; -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); - token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; - } - else -#endif - token->word_char = IS_WORD_CHAR (token->opr.c); - - switch (c) - { - case '\n': - if (syntax & RE_NEWLINE_ALT) - token->type = OP_ALT; - break; - case '|': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '*': - token->type = OP_DUP_ASTERISK; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_CLOSE_DUP_NUM; - break; - case '(': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_CLOSE_SUBEXP; - break; - case '[': - token->type = OP_OPEN_BRACKET; - break; - case '.': - token->type = OP_PERIOD; - break; - case '^': - if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && - re_string_cur_idx (input) != 0) - { - char prev = re_string_peek_byte (input, -1); - if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') - break; - } - token->type = ANCHOR; - token->opr.ctx_type = LINE_FIRST; - break; - case '$': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && - re_string_cur_idx (input) + 1 != re_string_length (input)) - { - re_token_t next; - re_string_skip_bytes (input, 1); - peek_token (&next, input, syntax); - re_string_skip_bytes (input, -1); - if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) - break; - } - token->type = ANCHOR; - token->opr.ctx_type = LINE_LAST; - break; - default: - break; - } - return 1; -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function out of bracket expressions. */ - -static int -internal_function -peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) -{ - unsigned char c; - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - c = re_string_peek_byte (input, 0); - token->opr.c = c; - -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - return 1; - } -#endif /* RE_ENABLE_I18N */ - - if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) - && re_string_cur_idx (input) + 1 < re_string_length (input)) - { - /* In this case, '\' escape a character. */ - unsigned char c2; - re_string_skip_bytes (input, 1); - c2 = re_string_peek_byte (input, 0); - token->opr.c = c2; - token->type = CHARACTER; - return 1; - } - if (c == '[') /* '[' is a special char in a bracket exps. */ - { - unsigned char c2; - int token_len; - if (re_string_cur_idx (input) + 1 < re_string_length (input)) - c2 = re_string_peek_byte (input, 1); - else - c2 = 0; - token->opr.c = c2; - token_len = 2; - switch (c2) - { - case '.': - token->type = OP_OPEN_COLL_ELEM; - break; - case '=': - token->type = OP_OPEN_EQUIV_CLASS; - break; - case ':': - if (syntax & RE_CHAR_CLASSES) - { - token->type = OP_OPEN_CHAR_CLASS; - break; - } - /* else fall through. */ - default: - token->type = CHARACTER; - token->opr.c = c; - token_len = 1; - break; - } - return token_len; - } - switch (c) - { - case '-': - token->type = OP_CHARSET_RANGE; - break; - case ']': - token->type = OP_CLOSE_BRACKET; - break; - case '^': - token->type = OP_NON_MATCH_LIST; - break; - default: - token->type = CHARACTER; - } - return 1; -} - -/* Functions for parser. */ - -/* Entry point of the parser. - Parse the regular expression REGEXP and return the structure tree. - If an error is occured, ERR is set by error code, and return NULL. - This function build the following tree, from regular expression <reg_exp>: - CAT - / \ - / \ - <reg_exp> EOR - - CAT means concatenation. - EOR means end of regular expression. */ - -static bin_tree_t * -parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, - reg_errcode_t *err) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *eor, *root; - re_token_t current_token; - dfa->syntax = syntax; - fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); - tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - eor = create_tree (dfa, NULL, NULL, END_OF_RE); - if (tree != NULL) - root = create_tree (dfa, tree, eor, CONCAT); - else - root = eor; - if (BE (eor == NULL || root == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - return root; -} - -/* This function build the following tree, from regular expression - <branch1>|<branch2>: - ALT - / \ - / \ - <branch1> <branch2> - - ALT means alternative, which represents the operator `|'. */ - -static bin_tree_t * -parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, - reg_syntax_t syntax, Idx nest, reg_errcode_t *err) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *branch = NULL; - tree = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type == OP_ALT) - { - fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); - if (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - branch = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && branch == NULL, 0)) - return NULL; - } - else - branch = NULL; - tree = create_tree (dfa, tree, branch, OP_ALT); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - return tree; -} - -/* This function build the following tree, from regular expression - <exp1><exp2>: - CAT - / \ - / \ - <exp1> <exp2> - - CAT means concatenation. */ - -static bin_tree_t * -parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, - reg_syntax_t syntax, Idx nest, reg_errcode_t *err) -{ - bin_tree_t *tree, *expr; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - tree = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - expr = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && expr == NULL, 0)) - { - return NULL; - } - if (tree != NULL && expr != NULL) - { - tree = create_tree (dfa, tree, expr, CONCAT); - if (tree == NULL) - { - *err = REG_ESPACE; - return NULL; - } - } - else if (tree == NULL) - tree = expr; - /* Otherwise expr == NULL, we don't need to create new tree. */ - } - return tree; -} - -/* This function build the following tree, from regular expression a*: - * - | - a -*/ - -static bin_tree_t * -parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, - reg_syntax_t syntax, Idx nest, reg_errcode_t *err) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; - switch (token->type) - { - case CHARACTER: - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - while (!re_string_eoi (regexp) - && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) - { - bin_tree_t *mbc_remain; - fetch_token (token, regexp, syntax); - mbc_remain = create_token_tree (dfa, NULL, NULL, token); - tree = create_tree (dfa, tree, mbc_remain, CONCAT); - if (BE (mbc_remain == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - } -#endif - break; - case OP_OPEN_SUBEXP: - tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_OPEN_BRACKET: - tree = parse_bracket_exp (regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_BACK_REF: - if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) - { - *err = REG_ESUBREG; - return NULL; - } - dfa->used_bkref_map |= 1 << token->opr.idx; - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - ++dfa->nbackref; - dfa->has_mb_node = 1; - break; - case OP_OPEN_DUP_NUM: - if (syntax & RE_CONTEXT_INVALID_DUP) - { - *err = REG_BADRPT; - return NULL; - } - /* FALLTHROUGH */ - case OP_DUP_ASTERISK: - case OP_DUP_PLUS: - case OP_DUP_QUESTION: - if (syntax & RE_CONTEXT_INVALID_OPS) - { - *err = REG_BADRPT; - return NULL; - } - else if (syntax & RE_CONTEXT_INDEP_OPS) - { - fetch_token (token, regexp, syntax); - return parse_expression (regexp, preg, token, syntax, nest, err); - } - /* else fall through */ - case OP_CLOSE_SUBEXP: - if ((token->type == OP_CLOSE_SUBEXP) && - !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) - { - *err = REG_ERPAREN; - return NULL; - } - /* else fall through */ - case OP_CLOSE_DUP_NUM: - /* We treat it as a normal character. */ - - /* Then we can these characters as normal characters. */ - token->type = CHARACTER; - /* mb_partial and word_char bits should be initialized already - by peek_token. */ - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - break; - case ANCHOR: - if ((token->opr.ctx_type - & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) - && dfa->word_ops_used == 0) - init_word_char (dfa); - if (token->opr.ctx_type == WORD_DELIM - || token->opr.ctx_type == NOT_WORD_DELIM) - { - bin_tree_t *tree_first, *tree_last; - if (token->opr.ctx_type == WORD_DELIM) - { - token->opr.ctx_type = WORD_FIRST; - tree_first = create_token_tree (dfa, NULL, NULL, token); - token->opr.ctx_type = WORD_LAST; - } - else - { - token->opr.ctx_type = INSIDE_WORD; - tree_first = create_token_tree (dfa, NULL, NULL, token); - token->opr.ctx_type = INSIDE_NOTWORD; - } - tree_last = create_token_tree (dfa, NULL, NULL, token); - tree = create_tree (dfa, tree_first, tree_last, OP_ALT); - if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - else - { - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - /* We must return here, since ANCHORs can't be followed - by repetition operators. - eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", - it must not be "<ANCHOR(^)><REPEAT(*)>". */ - fetch_token (token, regexp, syntax); - return tree; - case OP_PERIOD: - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - if (dfa->mb_cur_max > 1) - dfa->has_mb_node = 1; - break; - case OP_WORD: - case OP_NOTWORD: - tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "alnum", - (const unsigned char *) "_", - token->type == OP_NOTWORD, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_SPACE: - case OP_NOTSPACE: - tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "space", - (const unsigned char *) "", - token->type == OP_NOTSPACE, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_ALT: - case END_OF_RE: - return NULL; - case BACK_SLASH: - *err = REG_EESCAPE; - return NULL; - default: - /* Must not happen? */ -#ifdef DEBUG - assert (0); -#endif - return NULL; - } - fetch_token (token, regexp, syntax); - - while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS - || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) - { - tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - /* In BRE consecutive duplications are not allowed. */ - if ((syntax & RE_CONTEXT_INVALID_DUP) - && (token->type == OP_DUP_ASTERISK - || token->type == OP_OPEN_DUP_NUM)) - { - *err = REG_BADRPT; - return NULL; - } - } - - return tree; -} - -/* This function build the following tree, from regular expression - (<reg_exp>): - SUBEXP - | - <reg_exp> -*/ - -static bin_tree_t * -parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, - reg_syntax_t syntax, Idx nest, reg_errcode_t *err) -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; - size_t cur_nsub; - cur_nsub = preg->re_nsub++; - - fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); - - /* The subexpression may be a null string. */ - if (token->type == OP_CLOSE_SUBEXP) - tree = NULL; - else - { - tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); - if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) - *err = REG_EPAREN; - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - - if (cur_nsub <= '9' - '1') - dfa->completed_bkref_map |= 1 << cur_nsub; - - tree = create_tree (dfa, tree, NULL, SUBEXP); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - tree->token.opr.idx = cur_nsub; - return tree; -} - -/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ - -static bin_tree_t * -parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, - re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) -{ - bin_tree_t *tree = NULL, *old_tree = NULL; - Idx i, start, end, start_idx = re_string_cur_idx (regexp); - re_token_t start_token = *token; - - if (token->type == OP_OPEN_DUP_NUM) - { - end = 0; - start = fetch_number (regexp, token, syntax); - if (start == REG_MISSING) - { - if (token->type == CHARACTER && token->opr.c == ',') - start = 0; /* We treat "{,m}" as "{0,m}". */ - else - { - *err = REG_BADBR; /* <re>{} is invalid. */ - return NULL; - } - } - if (BE (start != REG_ERROR, 1)) - { - /* We treat "{n}" as "{n,n}". */ - end = ((token->type == OP_CLOSE_DUP_NUM) ? start - : ((token->type == CHARACTER && token->opr.c == ',') - ? fetch_number (regexp, token, syntax) : REG_ERROR)); - } - if (BE (start == REG_ERROR || end == REG_ERROR, 0)) - { - /* Invalid sequence. */ - if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) - { - if (token->type == END_OF_RE) - *err = REG_EBRACE; - else - *err = REG_BADBR; - - return NULL; - } - - /* If the syntax bit is set, rollback. */ - re_string_set_index (regexp, start_idx); - *token = start_token; - token->type = CHARACTER; - /* mb_partial and word_char bits should be already initialized by - peek_token. */ - return elem; - } - - if (BE (end != REG_MISSING && start > end, 0)) - { - /* First number greater than second. */ - *err = REG_BADBR; - return NULL; - } - } - else - { - start = (token->type == OP_DUP_PLUS) ? 1 : 0; - end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING; - } - - fetch_token (token, regexp, syntax); - - if (BE (elem == NULL, 0)) - return NULL; - if (BE (start == 0 && end == 0, 0)) - { - postorder (elem, free_tree, NULL); - return NULL; - } - - /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ - if (BE (start > 0, 0)) - { - tree = elem; - for (i = 2; i <= start; ++i) - { - elem = duplicate_tree (elem, dfa); - tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) - goto parse_dup_op_espace; - } - - if (start == end) - return tree; - - /* Duplicate ELEM before it is marked optional. */ - elem = duplicate_tree (elem, dfa); - old_tree = tree; - } - else - old_tree = NULL; - - if (elem->token.type == SUBEXP) - postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); - - tree = create_tree (dfa, elem, NULL, - (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); - if (BE (tree == NULL, 0)) - goto parse_dup_op_espace; - - /* This loop is actually executed only when end != REG_MISSING, - to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have - already created the start+1-th copy. */ - if ((Idx) -1 < 0 || end != REG_MISSING) - for (i = start + 2; i <= end; ++i) - { - elem = duplicate_tree (elem, dfa); - tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) - goto parse_dup_op_espace; - - tree = create_tree (dfa, tree, NULL, OP_ALT); - if (BE (tree == NULL, 0)) - goto parse_dup_op_espace; - } - - if (old_tree) - tree = create_tree (dfa, old_tree, tree, CONCAT); - - return tree; - - parse_dup_op_espace: - *err = REG_ESPACE; - return NULL; -} - -/* Size of the names for collating symbol/equivalence_class/character_class. - I'm not sure, but maybe enough. */ -#define BRACKET_NAME_BUF_SIZE 32 - -#ifndef _LIBC - /* Local function for parse_bracket_exp only used in case of NOT _LIBC. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - -static reg_errcode_t -internal_function -# ifdef RE_ENABLE_I18N -build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, - bracket_elem_t *start_elem, bracket_elem_t *end_elem) -# else /* not RE_ENABLE_I18N */ -build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, - bracket_elem_t *end_elem) -# endif /* not RE_ENABLE_I18N */ -{ - unsigned int start_ch, end_ch; - /* Equivalence Classes and Character Classes can't be a range start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - /* We can handle no multi character collating elements without libc - support. */ - if (BE ((start_elem->type == COLL_SYM - && strlen ((char *) start_elem->opr.name) > 1) - || (end_elem->type == COLL_SYM - && strlen ((char *) end_elem->opr.name) > 1), 0)) - return REG_ECOLLATE; - -# ifdef RE_ENABLE_I18N - { - wchar_t wc; - wint_t start_wc; - wint_t end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - - start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) - ? __btowc (start_ch) : start_elem->opr.wch); - end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) - ? __btowc (end_ch) : end_elem->opr.wch); - if (start_wc == WEOF || end_wc == WEOF) - return REG_ECOLLATE; - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - if (wcscoll (cmp_buf, cmp_buf + 4) > 0) - return REG_ERANGE; - - /* Got valid collation sequence values, add them as a new entry. - However, for !_LIBC we have no collation elements: if the - character set is single byte, the single byte character set - that we build below suffices. parse_bracket_exp passes - no MBCSET if dfa->mb_cur_max == 1. */ - if (mbcset) - { - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) - { - /* There is not enough space, need realloc. */ - wchar_t *new_array_start, *new_array_end; - Idx new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, wchar_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, wchar_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } - - mbcset->range_starts[mbcset->nranges] = start_wc; - mbcset->range_ends[mbcset->nranges++] = end_wc; - } - - /* Build the table for single byte characters. */ - for (wc = 0; wc < SBC_MAX; ++wc) - { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - bitset_set (sbcset, wc); - } - } -# else /* not RE_ENABLE_I18N */ - { - unsigned int ch; - start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - if (start_ch > end_ch) - return REG_ERANGE; - /* Build the table for single byte characters. */ - for (ch = 0; ch < SBC_MAX; ++ch) - if (start_ch <= ch && ch <= end_ch) - bitset_set (sbcset, ch); - } -# endif /* not RE_ENABLE_I18N */ - return REG_NOERROR; -} -#endif /* not _LIBC */ - -#ifndef _LIBC -/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument since we may update it. */ - -static reg_errcode_t -internal_function -build_collating_symbol (bitset_t sbcset, -# ifdef RE_ENABLE_I18N - re_charset_t *mbcset, Idx *coll_sym_alloc, -# endif - const unsigned char *name) -{ - size_t name_len = strlen ((const char *) name); - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } -} -#endif /* not _LIBC */ - -/* This function parse bracket expression like "[abc]", "[a-c]", - "[[.a-a.]]" etc. */ - -static bin_tree_t * -parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, - reg_syntax_t syntax, reg_errcode_t *err) -{ -#ifdef _LIBC - const unsigned char *collseqmb; - const char *collseqwc; - uint32_t nrules; - int32_t table_size; - const int32_t *symb_table; - const unsigned char *extra; - - /* Local function for parse_bracket_exp used in _LIBC environement. - Seek the collating symbol entry correspondings to NAME. - Return the index of the symbol in the SYMB_TABLE. */ - - auto inline int32_t - __attribute ((always_inline)) - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; - { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - int32_t second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - return elem; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Look up the collation sequence value of BR_ELEM. - Return the value if succeeded, UINT_MAX otherwise. */ - - auto inline unsigned int - __attribute ((always_inline)) - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; - { - if (br_elem->type == SB_CHAR) - { - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - return collseqmb[br_elem->opr.ch]; - else - { - wint_t wc = __btowc (br_elem->opr.ch); - return __collseq_table_lookup (collseqwc, wc); - } - } - else if (br_elem->type == MB_CHAR) - { - return __collseq_table_lookup (collseqwc, br_elem->opr.wch); - } - else if (br_elem->type == COLL_SYM) - { - size_t sym_name_len = strlen ((char *) br_elem->opr.name); - if (nrules != 0) - { - int32_t elem, idx; - elem = seek_collating_symbol_entry (br_elem->opr.name, - sym_name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - /* Skip the byte sequence of the collating element. */ - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the multibyte collation sequence value. */ - idx += sizeof (unsigned int); - /* Skip the wide char sequence of the collating element. */ - idx += sizeof (unsigned int) * - (1 + *(unsigned int *) (extra + idx)); - /* Return the collation sequence value. */ - return *(unsigned int *) (extra + idx); - } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) - { - /* No valid character. Match it as a single byte - character. */ - return collseqmb[br_elem->opr.name[0]]; - } - } - else if (sym_name_len == 1) - return collseqmb[br_elem->opr.name[0]]; - } - return UINT_MAX; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - - auto inline reg_errcode_t - __attribute ((always_inline)) - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - Idx *range_alloc; - bitset_t sbcset; - bracket_elem_t *start_elem, *end_elem; - { - unsigned int ch; - uint32_t start_collseq; - uint32_t end_collseq; - - /* Equivalence Classes and Character Classes can't be a range - start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - start_collseq = lookup_collation_sequence_value (start_elem); - end_collseq = lookup_collation_sequence_value (end_elem); - /* Check start/end collation sequence values. */ - if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) - return REG_ECOLLATE; - if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) - return REG_ERANGE; - - /* Got valid collation sequence values, add them as a new entry. - However, if we have no collation elements, and the character set - is single byte, the single byte character set that we - build below suffices. */ - if (nrules > 0 || dfa->mb_cur_max > 1) - { - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) - { - /* There is not enough space, need realloc. */ - uint32_t *new_array_start; - uint32_t *new_array_end; - Idx new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } - - mbcset->range_starts[mbcset->nranges] = start_collseq; - mbcset->range_ends[mbcset->nranges++] = end_collseq; - } - - /* Build the table for single byte characters. */ - for (ch = 0; ch < SBC_MAX; ch++) - { - uint32_t ch_collseq; - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - ch_collseq = collseqmb[ch]; - else - ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); - if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) - bitset_set (sbcset, ch); - } - return REG_NOERROR; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument sinse we may update it. */ - - auto inline reg_errcode_t - __attribute ((always_inline)) - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - Idx *coll_sym_alloc; - bitset_t sbcset; - const unsigned char *name; - { - int32_t elem, idx; - size_t name_len = strlen ((const char *) name); - if (nrules != 0) - { - elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - } - else if (symb_table[2 * elem] == 0 && name_len == 1) - { - /* No valid character, treat it as a normal - character. */ - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - else - return REG_ECOLLATE; - - /* Got valid collation sequence, add it as a new entry. */ - /* Check the space of the arrays. */ - if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->ncoll_syms is 0. */ - Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; - /* Use realloc since mbcset->coll_syms is NULL - if *alloc == 0. */ - int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, - new_coll_sym_alloc); - if (BE (new_coll_syms == NULL, 0)) - return REG_ESPACE; - mbcset->coll_syms = new_coll_syms; - *coll_sym_alloc = new_coll_sym_alloc; - } - mbcset->coll_syms[mbcset->ncoll_syms++] = idx; - return REG_NOERROR; - } - else - { - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - } - } -#endif - - re_token_t br_token; - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; - Idx equiv_class_alloc = 0, char_class_alloc = 0; -#endif /* not RE_ENABLE_I18N */ - bool non_match = false; - bin_tree_t *work_tree; - int token_len; - bool first_round = true; -#ifdef _LIBC - collseqmb = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules) - { - /* - if (MB_CUR_MAX > 1) - */ - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - } -#endif - sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else - if (BE (sbcset == NULL, 0)) -#endif /* RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_NON_MATCH_LIST) - { -#ifdef RE_ENABLE_I18N - mbcset->non_match = 1; -#endif /* not RE_ENABLE_I18N */ - non_match = true; - if (syntax & RE_HAT_LISTS_NOT_NEWLINE) - bitset_set (sbcset, '\n'); - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - } - - /* We treat the first ']' as a normal character. */ - if (token->type == OP_CLOSE_BRACKET) - token->type = CHARACTER; - - while (1) - { - bracket_elem_t start_elem, end_elem; - unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; - unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; - reg_errcode_t ret; - int token_len2 = 0; - bool is_range_exp = false; - re_token_t token2; - - start_elem.opr.name = start_name_buf; - ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, - syntax, first_round); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - first_round = false; - - /* Get information about the next token. We need it in any case. */ - token_len = peek_token_bracket (token, regexp, syntax); - - /* Do not check for ranges if we know they are not allowed. */ - if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) - { - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_CHARSET_RANGE) - { - re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ - token_len2 = peek_token_bracket (&token2, regexp, syntax); - if (BE (token2.type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token2.type == OP_CLOSE_BRACKET) - { - /* We treat the last '-' as a normal character. */ - re_string_skip_bytes (regexp, -token_len); - token->type = CHARACTER; - } - else - is_range_exp = true; - } - } - - if (is_range_exp == true) - { - end_elem.opr.name = end_name_buf; - ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, - dfa, syntax, true); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - - token_len = peek_token_bracket (token, regexp, syntax); - -#ifdef _LIBC - *err = build_range_exp (sbcset, mbcset, &range_alloc, - &start_elem, &end_elem); -#else -# ifdef RE_ENABLE_I18N - *err = build_range_exp (sbcset, - dfa->mb_cur_max > 1 ? mbcset : NULL, - &range_alloc, &start_elem, &end_elem); -# else - *err = build_range_exp (sbcset, &start_elem, &end_elem); -# endif -#endif /* RE_ENABLE_I18N */ - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - } - else - { - switch (start_elem.type) - { - case SB_CHAR: - bitset_set (sbcset, start_elem.opr.ch); - break; -#ifdef RE_ENABLE_I18N - case MB_CHAR: - /* Check whether the array has enough space. */ - if (BE (mbchar_alloc == mbcset->nmbchars, 0)) - { - wchar_t *new_mbchars; - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nmbchars is 0. */ - mbchar_alloc = 2 * mbcset->nmbchars + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - new_mbchars = re_realloc (mbcset->mbchars, wchar_t, - mbchar_alloc); - if (BE (new_mbchars == NULL, 0)) - goto parse_bracket_exp_espace; - mbcset->mbchars = new_mbchars; - } - mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; - break; -#endif /* RE_ENABLE_I18N */ - case EQUIV_CLASS: - *err = build_equiv_class (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &equiv_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case COLL_SYM: - *err = build_collating_symbol (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &coll_sym_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case CHAR_CLASS: - *err = build_charclass (regexp->trans, sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &char_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name, syntax); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - default: - assert (0); - break; - } - } - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_CLOSE_BRACKET) - break; - } - - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - - /* If it is non-matching list. */ - if (non_match) - bitset_not (sbcset); - -#ifdef RE_ENABLE_I18N - /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) - bitset_mask (sbcset, dfa->sb_char); - - if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes - || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes - || mbcset->non_match))) - { - bin_tree_t *mbc_tree; - int sbc_idx; - /* Build a tree for complex bracket. */ - dfa->has_mb_node = 1; - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) - goto parse_bracket_exp_espace; - for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) - if (sbcset[sbc_idx]) - break; - /* If there are no bits set in sbcset, there is no point - of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx < BITSET_WORDS) - { - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - - /* Then join them by ALT node. */ - work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - } - else - { - re_free (sbcset); - work_tree = mbc_tree; - } - } - else -#endif /* not RE_ENABLE_I18N */ - { -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - } - return work_tree; - - parse_bracket_exp_espace: - *err = REG_ESPACE; - parse_bracket_exp_free_return: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - return NULL; -} - -/* Parse an element in the bracket expression. */ - -static reg_errcode_t -parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, - re_token_t *token, int token_len, re_dfa_t *dfa, - reg_syntax_t syntax, bool accept_hyphen) -{ -#ifdef RE_ENABLE_I18N - int cur_char_size; - cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); - if (cur_char_size > 1) - { - elem->type = MB_CHAR; - elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); - re_string_skip_bytes (regexp, cur_char_size); - return REG_NOERROR; - } -#endif /* RE_ENABLE_I18N */ - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS - || token->type == OP_OPEN_EQUIV_CLASS) - return parse_bracket_symbol (elem, regexp, token); - if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) - { - /* A '-' must only appear as anything but a range indicator before - the closing bracket. Everything else is an error. */ - re_token_t token2; - (void) peek_token_bracket (&token2, regexp, syntax); - if (token2.type != OP_CLOSE_BRACKET) - /* The actual error value is not standardized since this whole - case is undefined. But ERANGE makes good sense. */ - return REG_ERANGE; - } - elem->type = SB_CHAR; - elem->opr.ch = token->opr.c; - return REG_NOERROR; -} - -/* Parse a bracket symbol in the bracket expression. Bracket symbols are - such as [:<character_class>:], [.<collating_element>.], and - [=<equivalent_class>=]. */ - -static reg_errcode_t -parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, - re_token_t *token) -{ - unsigned char ch, delim = token->opr.c; - int i = 0; - if (re_string_eoi(regexp)) - return REG_EBRACK; - for (;; ++i) - { - if (i >= BRACKET_NAME_BUF_SIZE) - return REG_EBRACK; - if (token->type == OP_OPEN_CHAR_CLASS) - ch = re_string_fetch_byte_case (regexp); - else - ch = re_string_fetch_byte (regexp); - if (re_string_eoi(regexp)) - return REG_EBRACK; - if (ch == delim && re_string_peek_byte (regexp, 0) == ']') - break; - elem->opr.name[i] = ch; - } - re_string_skip_bytes (regexp, 1); - elem->opr.name[i] = '\0'; - switch (token->type) - { - case OP_OPEN_COLL_ELEM: - elem->type = COLL_SYM; - break; - case OP_OPEN_EQUIV_CLASS: - elem->type = EQUIV_CLASS; - break; - case OP_OPEN_CHAR_CLASS: - elem->type = CHAR_CLASS; - break; - default: - break; - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the equivalence class which is represented by NAME. - The result are written to MBCSET and SBCSET. - EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, - Idx *equiv_class_alloc, const unsigned char *name) -#else /* not RE_ENABLE_I18N */ -build_equiv_class (bitset_t sbcset, const unsigned char *name) -#endif /* not RE_ENABLE_I18N */ -{ -#ifdef _LIBC - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules != 0) - { - const int32_t *table, *indirect; - const unsigned char *weights, *extra, *cp; - unsigned char char_buf[2]; - int32_t idx1, idx2; - unsigned int ch; - size_t len; - /* This #include defines a local function! */ -# include <locale/weight.h> - /* Calculate the index for equivalence class. */ - cp = name; - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp); - if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) - /* This isn't a valid character. */ - return REG_ECOLLATE; - - /* Build single byte matcing table for this equivalence class. */ - char_buf[1] = (unsigned char) '\0'; - len = weights[idx1]; - for (ch = 0; ch < SBC_MAX; ++ch) - { - char_buf[0] = ch; - cp = char_buf; - idx2 = findidx (&cp); -/* - idx2 = table[ch]; -*/ - if (idx2 == 0) - /* This isn't a valid character. */ - continue; - if (len == weights[idx2]) - { - int cnt = 0; - while (cnt <= len && - weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) - ++cnt; - - if (cnt > len) - bitset_set (sbcset, ch); - } - } - /* Check whether the array has enough space. */ - if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nequiv_classes is 0. */ - Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; - /* Use realloc since the array is NULL if *alloc == 0. */ - int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, - int32_t, - new_equiv_class_alloc); - if (BE (new_equiv_classes == NULL, 0)) - return REG_ESPACE; - mbcset->equiv_classes = new_equiv_classes; - *equiv_class_alloc = new_equiv_class_alloc; - } - mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; - } - else -#endif /* _LIBC */ - { - if (BE (strlen ((const char *) name) != 1, 0)) - return REG_ECOLLATE; - bitset_set (sbcset, *name); - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the character class which is represented by NAME. - The result are written to MBCSET and SBCSET. - CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - re_charset_t *mbcset, Idx *char_class_alloc, - const unsigned char *class_name, reg_syntax_t syntax) -#else /* not RE_ENABLE_I18N */ -build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - const unsigned char *class_name, reg_syntax_t syntax) -#endif /* not RE_ENABLE_I18N */ -{ - int i; - const char *name = (const char *) class_name; - - /* In case of REG_ICASE "upper" and "lower" match the both of - upper and lower cases. */ - if ((syntax & RE_ICASE) - && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) - name = "alpha"; - -#ifdef RE_ENABLE_I18N - /* Check the space of the arrays. */ - if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nchar_classes is 0. */ - Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, - new_char_class_alloc); - if (BE (new_char_classes == NULL, 0)) - return REG_ESPACE; - mbcset->char_classes = new_char_classes; - *char_class_alloc = new_char_class_alloc; - } - mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); -#endif /* RE_ENABLE_I18N */ - -#define BUILD_CHARCLASS_LOOP(ctype_func) \ - do { \ - if (BE (trans != NULL, 0)) \ - { \ - for (i = 0; i < SBC_MAX; ++i) \ - if (ctype_func (i)) \ - bitset_set (sbcset, trans[i]); \ - } \ - else \ - { \ - for (i = 0; i < SBC_MAX; ++i) \ - if (ctype_func (i)) \ - bitset_set (sbcset, i); \ - } \ - } while (0) - - if (strcmp (name, "alnum") == 0) - BUILD_CHARCLASS_LOOP (isalnum); - else if (strcmp (name, "cntrl") == 0) - BUILD_CHARCLASS_LOOP (iscntrl); - else if (strcmp (name, "lower") == 0) - BUILD_CHARCLASS_LOOP (islower); - else if (strcmp (name, "space") == 0) - BUILD_CHARCLASS_LOOP (isspace); - else if (strcmp (name, "alpha") == 0) - BUILD_CHARCLASS_LOOP (isalpha); - else if (strcmp (name, "digit") == 0) - BUILD_CHARCLASS_LOOP (isdigit); - else if (strcmp (name, "print") == 0) - BUILD_CHARCLASS_LOOP (isprint); - else if (strcmp (name, "upper") == 0) - BUILD_CHARCLASS_LOOP (isupper); - else if (strcmp (name, "blank") == 0) - BUILD_CHARCLASS_LOOP (isblank); - else if (strcmp (name, "graph") == 0) - BUILD_CHARCLASS_LOOP (isgraph); - else if (strcmp (name, "punct") == 0) - BUILD_CHARCLASS_LOOP (ispunct); - else if (strcmp (name, "xdigit") == 0) - BUILD_CHARCLASS_LOOP (isxdigit); - else - return REG_ECTYPE; - - return REG_NOERROR; -} - -static bin_tree_t * -build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, bool non_match, - reg_errcode_t *err) -{ - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - Idx alloc = 0; -#endif /* not RE_ENABLE_I18N */ - reg_errcode_t ret; - re_token_t br_token; - bin_tree_t *tree; - - sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ - -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else /* not RE_ENABLE_I18N */ - if (BE (sbcset == NULL, 0)) -#endif /* not RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - if (non_match) - { -#ifdef RE_ENABLE_I18N - mbcset->non_match = 1; -#endif /* not RE_ENABLE_I18N */ - } - - /* We don't care the syntax in this case. */ - ret = build_charclass (trans, sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &alloc, -#endif /* RE_ENABLE_I18N */ - class_name, 0); - - if (BE (ret != REG_NOERROR, 0)) - { - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = ret; - return NULL; - } - /* \w match '_' also. */ - for (; *extra; extra++) - bitset_set (sbcset, *extra); - - /* If it is non-matching list. */ - if (non_match) - bitset_not (sbcset); - -#ifdef RE_ENABLE_I18N - /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) - bitset_mask (sbcset, dfa->sb_char); -#endif - - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (tree == NULL, 0)) - goto build_word_op_espace; - -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - bin_tree_t *mbc_tree; - /* Build a tree for complex bracket. */ - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - dfa->has_mb_node = 1; - mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) - goto build_word_op_espace; - /* Then join them by ALT node. */ - tree = create_tree (dfa, tree, mbc_tree, OP_ALT); - if (BE (mbc_tree != NULL, 1)) - return tree; - } - else - { - free_charset (mbcset); - return tree; - } -#else /* not RE_ENABLE_I18N */ - return tree; -#endif /* not RE_ENABLE_I18N */ - - build_word_op_espace: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = REG_ESPACE; - return NULL; -} - -/* This is intended for the expressions like "a{1,3}". - Fetch a number from `input', and return the number. - Return REG_MISSING if the number field is empty like "{,1}". - Return REG_ERROR if an error occurred. */ - -static Idx -fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) -{ - Idx num = REG_MISSING; - unsigned char c; - while (1) - { - fetch_token (token, input, syntax); - c = token->opr.c; - if (BE (token->type == END_OF_RE, 0)) - return REG_ERROR; - if (token->type == OP_CLOSE_DUP_NUM || c == ',') - break; - num = ((token->type != CHARACTER || c < '0' || '9' < c - || num == REG_ERROR) - ? REG_ERROR - : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0')); - num = (num > RE_DUP_MAX) ? REG_ERROR : num; - } - return num; -} - -#ifdef RE_ENABLE_I18N -static void -free_charset (re_charset_t *cset) -{ - re_free (cset->mbchars); -# ifdef _LIBC - re_free (cset->coll_syms); - re_free (cset->equiv_classes); - re_free (cset->range_starts); - re_free (cset->range_ends); -# endif - re_free (cset->char_classes); - re_free (cset); -} -#endif /* RE_ENABLE_I18N */ - -/* Functions for binary tree operation. */ - -/* Create a tree node. */ - -static bin_tree_t * -create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, - re_token_type_t type) -{ - re_token_t t; - t.type = type; - return create_token_tree (dfa, left, right, &t); -} - -static bin_tree_t * -create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, - const re_token_t *token) -{ - bin_tree_t *tree; - if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) - { - bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); - - if (storage == NULL) - return NULL; - storage->next = dfa->str_tree_storage; - dfa->str_tree_storage = storage; - dfa->str_tree_storage_idx = 0; - } - tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; - - tree->parent = NULL; - tree->left = left; - tree->right = right; - tree->token = *token; - tree->token.duplicated = 0; - tree->token.opt_subexp = 0; - tree->first = NULL; - tree->next = NULL; - tree->node_idx = REG_MISSING; - - if (left != NULL) - left->parent = tree; - if (right != NULL) - right->parent = tree; - return tree; -} - -/* Mark the tree SRC as an optional subexpression. - To be called from preorder or postorder. */ - -static reg_errcode_t -mark_opt_subexp (void *extra, bin_tree_t *node) -{ - Idx idx = (Idx) (long) extra; - if (node->token.type == SUBEXP && node->token.opr.idx == idx) - node->token.opt_subexp = 1; - - return REG_NOERROR; -} - -/* Free the allocated memory inside NODE. */ - -static void -free_token (re_token_t *node) -{ -#ifdef RE_ENABLE_I18N - if (node->type == COMPLEX_BRACKET && node->duplicated == 0) - free_charset (node->opr.mbcset); - else -#endif /* RE_ENABLE_I18N */ - if (node->type == SIMPLE_BRACKET && node->duplicated == 0) - re_free (node->opr.sbcset); -} - -/* Worker function for tree walking. Free the allocated memory inside NODE - and its children. */ - -static reg_errcode_t -free_tree (void *extra, bin_tree_t *node) -{ - free_token (&node->token); - return REG_NOERROR; -} - - -/* Duplicate the node SRC, and return new node. This is a preorder - visit similar to the one implemented by the generic visitor, but - we need more infrastructure to maintain two parallel trees --- so, - it's easier to duplicate. */ - -static bin_tree_t * -duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) -{ - const bin_tree_t *node; - bin_tree_t *dup_root; - bin_tree_t **p_new = &dup_root, *dup_node = root->parent; - - for (node = root; ; ) - { - /* Create a new tree and link it back to the current parent. */ - *p_new = create_token_tree (dfa, NULL, NULL, &node->token); - if (*p_new == NULL) - return NULL; - (*p_new)->parent = dup_node; - (*p_new)->token.duplicated = 1; - dup_node = *p_new; - - /* Go to the left node, or up and to the right. */ - if (node->left) - { - node = node->left; - p_new = &dup_node->left; - } - else - { - const bin_tree_t *prev = NULL; - while (node->right == prev || node->right == NULL) - { - prev = node; - node = node->parent; - dup_node = dup_node->parent; - if (!node) - return dup_root; - } - node = node->right; - p_new = &dup_node->right; - } - } -} diff --git a/lib/regex.c b/lib/regex.c deleted file mode 100644 index d4eb726..0000000 --- a/lib/regex.c +++ /dev/null @@ -1,71 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Make sure noone compiles this code with a C++ compiler. */ -#if defined __cplusplus && defined _LIBC -# error "This is C code, use a C compiler" -#endif - -#ifdef _LIBC -/* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ - __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ - __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ - __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ - __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ - __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ - __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ - __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) - -# include "../locale/localeinfo.h" -#endif - -/* On some systems, limits.h sets RE_DUP_MAX to a lower value than - GNU regex allows. Include it before <regex.h>, which correctly - #undefs RE_DUP_MAX and sets it to the right value. */ -#include <limits.h> - -#include <regex.h> -#include "regex_internal.h" - -#include "regex_internal.c" -#include "regcomp.c" -#include "regexec.c" - -/* Binary backward compatibility. */ -#if _LIBC -# include <shlib-compat.h> -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) -link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") -int re_max_failures = 2000; -# endif -#endif diff --git a/lib/regex.h b/lib/regex.h deleted file mode 100644 index 7a79ca3..0000000 --- a/lib/regex.h +++ /dev/null @@ -1,675 +0,0 @@ -/* Definitions for data structures and routines for the regular - expression library. - Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _REGEX_H -#define _REGEX_H 1 - -#include <sys/types.h> - -/* Allow the use in C++ code. */ -#ifdef __cplusplus -extern "C" { -#endif - -/* Define __USE_GNU_REGEX to declare GNU extensions that violate the - POSIX name space rules. */ -#undef __USE_GNU_REGEX -#if (defined _GNU_SOURCE \ - || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \ - && !defined _XOPEN_SOURCE)) -# define __USE_GNU_REGEX 1 -#endif - -#ifdef _REGEX_LARGE_OFFSETS - -/* Use types and values that are wide enough to represent signed and - unsigned byte offsets in memory. This currently works only when - the regex code is used outside of the GNU C library; it is not yet - supported within glibc itself, and glibc users should not define - _REGEX_LARGE_OFFSETS. */ - -/* The type of the offset of a byte within a string. - For historical reasons POSIX 1003.1-2004 requires that regoff_t be - at least as wide as off_t. However, many common POSIX platforms set - regoff_t to the more-sensible ssize_t and the Open Group has - signalled its intention to change the requirement to be that - regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN - 60 (2005-08-25). We don't know of any hosts where ssize_t or - ptrdiff_t is wider than ssize_t, so ssize_t is safe. */ -typedef ssize_t regoff_t; - -/* The type of nonnegative object indexes. Traditionally, GNU regex - uses 'int' for these. Code that uses __re_idx_t should work - regardless of whether the type is signed. */ -typedef size_t __re_idx_t; - -/* The type of object sizes. */ -typedef size_t __re_size_t; - -/* The type of object sizes, in places where the traditional code - uses unsigned long int. */ -typedef size_t __re_long_size_t; - -#else - -/* Use types that are binary-compatible with the traditional GNU regex - implementation, which mishandles strings longer than INT_MAX. */ - -typedef int regoff_t; -typedef int __re_idx_t; -typedef unsigned int __re_size_t; -typedef unsigned long int __re_long_size_t; - -#endif - -/* The following two types have to be signed and unsigned integer type - wide enough to hold a value of a pointer. For most ANSI compilers - ptrdiff_t and size_t should be likely OK. Still size of these two - types is 2 for Microsoft C. Ugh... */ -typedef long int s_reg_t; -typedef unsigned long int active_reg_t; - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned long int reg_syntax_t; - -#ifdef __USE_GNU_REGEX - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -# define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \<digit> matches <digit>. - If not set, then \<digit> is a back-reference. */ -# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* If this bit is set, succeed as soon as we match the whole pattern, - without further backtracking. */ -# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) - -/* If this bit is set, do not process the GNU regex operators. - If not set, then the GNU regex operators are recognized. */ -# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) - -/* If this bit is set, turn on internal regex debugging. - If not set, and debugging was on, turn it off. - This only works if regex.c is compiled -DDEBUG. - We define this bit always, so that all that's needed to turn on - debugging is to recompile regex.c; the calling code can always have - this bit set, and it won't affect anything in the normal case. */ -# define RE_DEBUG (RE_NO_GNU_OPS << 1) - -/* If this bit is set, a syntactically invalid interval is treated as - a string of ordinary characters. For example, the ERE 'a{1' is - treated as 'a\{1'. */ -# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) - -/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only - for ^, because it is difficult to scan the regex backwards to find - whether ^ should be special. */ -# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) - -/* If this bit is set, then \{ cannot be first in an bre or - immediately after an alternation or begin-group operator. */ -# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) - -/* If this bit is set, then no_sub will be set to 1 during - re_compile_pattern. */ -# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) - -#endif /* defined __USE_GNU_REGEX */ - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -#ifdef __USE_GNU_REGEX -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -# define RE_SYNTAX_EMACS 0 - -# define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ - | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) - -# define RE_SYNTAX_GNU_AWK \ - ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ - | RE_CONTEXT_INVALID_OPS )) - -# define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ - | RE_INTERVALS | RE_NO_GNU_OPS) - -# define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -# define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -# define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ - | RE_INVALID_INTERVAL_ORD) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -# define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -# define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -# define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -# define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -# define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -# define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is - removed and RE_NO_BK_REFS is added. */ -# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -#endif /* defined __USE_GNU_REGEX */ - -#ifdef __USE_GNU_REGEX - -/* Maximum number of duplicates an interval can allow. POSIX-conforming - systems might define this in <limits.h>, but we want our - value, so remove any previous define. */ -# ifdef RE_DUP_MAX -# undef RE_DUP_MAX -# endif - -/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored - the counter as a 2-byte signed integer. This is no longer true, so - RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to - ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. - However, there would be a huge performance problem if someone - actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains - its historical value. */ -# define RE_DUP_MAX (0x7fff) - -#endif /* defined __USE_GNU_REGEX */ - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (1 << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (1 << 2) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (1 << 3) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - -/* Use PMATCH[0] to delimit the start and end of the search in the - buffer. */ -#define REG_STARTEND (1 << 2) - - -/* If any error codes are removed, changed, or added, update the - `__re_error_msgid' table in regcomp.c. */ - -typedef enum -{ - _REG_ENOSYS = -1, /* This will never happen for this implementation. */ - _REG_NOERROR = 0, /* Success. */ - _REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - _REG_BADPAT, /* Invalid pattern. */ - _REG_ECOLLATE, /* Invalid collating element. */ - _REG_ECTYPE, /* Invalid character class name. */ - _REG_EESCAPE, /* Trailing backslash. */ - _REG_ESUBREG, /* Invalid back reference. */ - _REG_EBRACK, /* Unmatched left bracket. */ - _REG_EPAREN, /* Parenthesis imbalance. */ - _REG_EBRACE, /* Unmatched \{. */ - _REG_BADBR, /* Invalid contents of \{\}. */ - _REG_ERANGE, /* Invalid range end. */ - _REG_ESPACE, /* Ran out of memory. */ - _REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - _REG_EEND, /* Premature end. */ - _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -#ifdef _XOPEN_SOURCE -# define REG_ENOSYS _REG_ENOSYS -#endif -#define REG_NOERROR _REG_NOERROR -#define REG_NOMATCH _REG_NOMATCH -#define REG_BADPAT _REG_BADPAT -#define REG_ECOLLATE _REG_ECOLLATE -#define REG_ECTYPE _REG_ECTYPE -#define REG_EESCAPE _REG_EESCAPE -#define REG_ESUBREG _REG_ESUBREG -#define REG_EBRACK _REG_EBRACK -#define REG_EPAREN _REG_EPAREN -#define REG_EBRACE _REG_EBRACE -#define REG_BADBR _REG_BADBR -#define REG_ERANGE _REG_ERANGE -#define REG_ESPACE _REG_ESPACE -#define REG_BADRPT _REG_BADRPT -#define REG_EEND _REG_EEND -#define REG_ESIZE _REG_ESIZE -#define REG_ERPAREN _REG_ERPAREN - -/* struct re_pattern_buffer normally uses member names like `buffer' - that POSIX does not allow. In POSIX mode these members have names - with leading `re_' (e.g., `re_buffer'). */ -#ifdef __USE_GNU_REGEX -# define _REG_RE_NAME(id) id -# define _REG_RM_NAME(id) id -#else -# define _REG_RE_NAME(id) re_##id -# define _REG_RM_NAME(id) rm_##id -#endif - -/* The user can specify the type of the re_translate member by - defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned - char *. This pollutes the POSIX name space, so in POSIX mode just - use unsigned char *. */ -#ifdef __USE_GNU_REGEX -# ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE unsigned char * -# endif -# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE -#else -# define REG_TRANSLATE_TYPE unsigned char * -#endif - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -struct re_pattern_buffer -{ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are sometimes used as - array indexes. */ - unsigned char *_REG_RE_NAME (buffer); - - /* Number of bytes to which `buffer' points. */ - __re_long_size_t _REG_RE_NAME (allocated); - - /* Number of bytes actually used in `buffer'. */ - __re_long_size_t _REG_RE_NAME (used); - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t _REG_RE_NAME (syntax); - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses the - fastmap, if there is one, to skip over impossible starting points - for matches. */ - char *_REG_RE_NAME (fastmap); - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation is - applied to a pattern when it is compiled and to a string when it - is matched. */ - REG_TRANSLATE_TYPE _REG_RE_NAME (translate); - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see whether or - not we should use the fastmap, so we don't set this absolutely - perfectly; see `re_compile_fastmap' (the `duplicate' case). */ - unsigned int _REG_RE_NAME (can_be_null) : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#ifdef __USE_GNU_REGEX -# define REGS_UNALLOCATED 0 -# define REGS_REALLOCATE 1 -# define REGS_FIXED 2 -#endif - unsigned int _REG_RE_NAME (regs_allocated) : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned int _REG_RE_NAME (fastmap_accurate) : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned int _REG_RE_NAME (no_sub) : 1; - - /* If set, a beginning-of-line anchor doesn't match at the beginning - of the string. */ - unsigned int _REG_RE_NAME (not_bol) : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned int _REG_RE_NAME (not_eol) : 1; - - /* If true, an anchor at a newline matches. */ - unsigned int _REG_RE_NAME (newline_anchor) : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - __re_size_t _REG_RM_NAME (num_regs); - regoff_t *_REG_RM_NAME (start); - regoff_t *_REG_RM_NAME (end); -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#if !defined RE_NREGS && defined __USE_GNU_REGEX -# define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern (const char *__pattern, size_t __length, - struct re_pattern_buffer *__buffer); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern regoff_t re_search (struct re_pattern_buffer *__buffer, - const char *__string, __re_idx_t __length, - __re_idx_t __start, regoff_t __range, - struct re_registers *__regs); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, - const char *__string1, __re_idx_t __length1, - const char *__string2, __re_idx_t __length2, - __re_idx_t __start, regoff_t __range, - struct re_registers *__regs, - __re_idx_t __stop); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern regoff_t re_match (struct re_pattern_buffer *__buffer, - const char *__string, __re_idx_t __length, - __re_idx_t __start, struct re_registers *__regs); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, - const char *__string1, __re_idx_t __length1, - const char *__string2, __re_idx_t __length2, - __re_idx_t __start, struct re_registers *__regs, - __re_idx_t __stop); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers (struct re_pattern_buffer *__buffer, - struct re_registers *__regs, - __re_size_t __num_regs, - regoff_t *__starts, regoff_t *__ends); - -#if defined _REGEX_RE_COMP || defined _LIBC -# ifndef _CRAY -/* 4.2 bsd compatibility. */ -extern char *re_comp (const char *); -extern int re_exec (const char *); -# endif -#endif - -/* GCC 2.95 and later have "__restrict"; C99 compilers have - "restrict", and "configure" may have defined "restrict". - Other compilers use __restrict, __restrict__, and _Restrict, and - 'configure' might #define 'restrict' to those words, so pick a - different name. */ -#ifndef _Restrict_ -# if 199901L <= __STDC_VERSION__ -# define _Restrict_ restrict -# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) -# define _Restrict_ __restrict -# else -# define _Restrict_ -# endif -#endif -/* gcc 3.1 and up support the [restrict] syntax. Don't trust - sys/cdefs.h's definition of __restrict_arr, though, as it - mishandles gcc -ansi -pedantic. */ -#ifndef _Restrict_arr_ -# if ((199901L <= __STDC_VERSION__ \ - || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ - && !__STRICT_ANSI__)) \ - && !defined __GNUG__) -# define _Restrict_arr_ _Restrict_ -# else -# define _Restrict_arr_ -# endif -#endif - -/* POSIX compatibility. */ -extern int regcomp (regex_t *_Restrict_ __preg, - const char *_Restrict_ __pattern, - int __cflags); - -extern int regexec (const regex_t *_Restrict_ __preg, - const char *_Restrict_ __string, size_t __nmatch, - regmatch_t __pmatch[_Restrict_arr_], - int __eflags); - -extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, - char *_Restrict_ __errbuf, size_t __errbuf_size); - -extern void regfree (regex_t *__preg); - - -#ifdef __cplusplus -} -#endif /* C++ */ - -#endif /* regex.h */ diff --git a/lib/regex_internal.c b/lib/regex_internal.c deleted file mode 100644 index 2129888..0000000 --- a/lib/regex_internal.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software - Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -static void re_string_construct_common (const char *str, Idx len, - re_string_t *pstr, - RE_TRANSLATE_TYPE trans, bool icase, - const re_dfa_t *dfa) internal_function; -static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, - const re_node_set *nodes, - re_hashval_t hash) internal_function; -static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context, - re_hashval_t hash) internal_function; - -/* Functions for string operation. */ - -/* This function allocate the buffers. It is necessary to call - re_string_reconstruct before using the object. */ - -static reg_errcode_t -internal_function -re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, - RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) -{ - reg_errcode_t ret; - Idx init_buf_len; - - /* Ensure at least one character fits into the buffers. */ - if (init_len < dfa->mb_cur_max) - init_len = dfa->mb_cur_max; - init_buf_len = (len + 1 < init_len) ? len + 1: init_len; - re_string_construct_common (str, len, pstr, trans, icase, dfa); - - ret = re_string_realloc_buffers (pstr, init_buf_len); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - pstr->word_char = dfa->word_char; - pstr->word_ops_used = dfa->word_ops_used; - pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; - pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; - pstr->valid_raw_len = pstr->valid_len; - return REG_NOERROR; -} - -/* This function allocate the buffers, and initialize them. */ - -static reg_errcode_t -internal_function -re_string_construct (re_string_t *pstr, const char *str, Idx len, - RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) -{ - reg_errcode_t ret; - memset (pstr, '\0', sizeof (re_string_t)); - re_string_construct_common (str, len, pstr, trans, icase, dfa); - - if (len > 0) - { - ret = re_string_realloc_buffers (pstr, len + 1); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; - - if (icase) - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - while (1) - { - ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - if (pstr->valid_raw_len >= len) - break; - if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) - break; - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - } - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (trans != NULL) - re_string_translate_buffer (pstr); - else - { - pstr->valid_len = pstr->bufs_len; - pstr->valid_raw_len = pstr->bufs_len; - } - } - } - - return REG_NOERROR; -} - -/* Helper functions for re_string_allocate, and re_string_construct. */ - -static reg_errcode_t -internal_function -re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) -{ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - wint_t *new_wcs; - - /* Avoid overflow. */ - size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); - if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) - return REG_ESPACE; - - new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (new_wcs == NULL, 0)) - return REG_ESPACE; - pstr->wcs = new_wcs; - if (pstr->offsets != NULL) - { - Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len); - if (BE (new_offsets == NULL, 0)) - return REG_ESPACE; - pstr->offsets = new_offsets; - } - } -#endif /* RE_ENABLE_I18N */ - if (pstr->mbs_allocated) - { - unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, - new_buf_len); - if (BE (new_mbs == NULL, 0)) - return REG_ESPACE; - pstr->mbs = new_mbs; - } - pstr->bufs_len = new_buf_len; - return REG_NOERROR; -} - - -static void -internal_function -re_string_construct_common (const char *str, Idx len, re_string_t *pstr, - RE_TRANSLATE_TYPE trans, bool icase, - const re_dfa_t *dfa) -{ - pstr->raw_mbs = (const unsigned char *) str; - pstr->len = len; - pstr->raw_len = len; - pstr->trans = trans; - pstr->icase = icase; - pstr->mbs_allocated = (trans != NULL || icase); - pstr->mb_cur_max = dfa->mb_cur_max; - pstr->is_utf8 = dfa->is_utf8; - pstr->map_notascii = dfa->map_notascii; - pstr->stop = pstr->len; - pstr->raw_stop = pstr->stop; -} - -#ifdef RE_ENABLE_I18N - -/* Build wide character buffer PSTR->WCS. - If the byte sequence of the string are: - <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> - Then wide character buffer will be: - <wc1> , WEOF , <wc2> , WEOF , <wc3> - We use WEOF for padding, they indicate that the position isn't - a first byte of a multibyte character. - - Note that this function assumes PSTR->VALID_LEN elements are already - built and starts from PSTR->VALID_LEN. */ - -static void -internal_function -build_wcs_buffer (re_string_t *pstr) -{ -#ifdef _LIBC - unsigned char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); -#else - unsigned char buf[64]; -#endif - mbstate_t prev_st; - Idx byte_idx, end_idx, remain_len; - size_t mbclen; - - /* Build the buffers from pstr->valid_len to either pstr->len or - pstr->bufs_len. */ - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - for (byte_idx = pstr->valid_len; byte_idx < end_idx;) - { - wchar_t wc; - const char *p; - - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - /* Apply the translation if we need. */ - if (BE (pstr->trans != NULL, 0)) - { - int i, ch; - - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) - { - ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; - buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; - } - p = (const char *) buf; - } - else - p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; - mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - if (BE (pstr->trans != NULL, 0)) - wc = pstr->trans[wc]; - pstr->cur_state = prev_st; - } - - /* Write wide character and padding. */ - pstr->wcs[byte_idx++] = wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = byte_idx; -} - -/* Build wide character buffer PSTR->WCS like build_wcs_buffer, - but for REG_ICASE. */ - -static reg_errcode_t -internal_function -build_wcs_upper_buffer (re_string_t *pstr) -{ - mbstate_t prev_st; - Idx src_idx, byte_idx, end_idx, remain_len; - size_t mbclen; -#ifdef _LIBC - char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); -#else - char buf[64]; -#endif - - byte_idx = pstr->valid_len; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - /* The following optimization assumes that ASCII characters can be - mapped to wide characters with a simple cast. */ - if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) - { - while (byte_idx < end_idx) - { - wchar_t wc; - - if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) - && mbsinit (&pstr->cur_state)) - { - /* In case of a singlebyte character. */ - pstr->mbs[byte_idx] - = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); - /* The next step uses the assumption that wchar_t is encoded - ASCII-safe: all ASCII values can be converted like this. */ - pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; - ++byte_idx; - continue; - } - - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, - ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen < (size_t) -2, 1)) - { - wchar_t wcu = wc; - if (iswlower (wc)) - { - size_t mbcdlen; - - wcu = towupper (wc); - mbcdlen = wcrtomb (buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) - memcpy (pstr->mbs + byte_idx, buf, mbclen); - else - { - src_idx = byte_idx; - goto offsets_needed; - } - } - else - memcpy (pstr->mbs + byte_idx, - pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); - pstr->wcs[byte_idx++] = wcu; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - else if (mbclen == (size_t) -1 || mbclen == 0) - { - /* It is an invalid character or '\0'. Just use the byte. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - pstr->mbs[byte_idx] = ch; - /* And also cast it to wide char. */ - pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = byte_idx; - return REG_NOERROR; - } - else - for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) - { - wchar_t wc; - const char *p; - offsets_needed: - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - if (BE (pstr->trans != NULL, 0)) - { - int i, ch; - - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) - { - ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; - buf[i] = pstr->trans[ch]; - } - p = (const char *) buf; - } - else - p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; - mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen < (size_t) -2, 1)) - { - wchar_t wcu = wc; - if (iswlower (wc)) - { - size_t mbcdlen; - - wcu = towupper (wc); - mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) - memcpy (pstr->mbs + byte_idx, buf, mbclen); - else if (mbcdlen != (size_t) -1) - { - size_t i; - - if (byte_idx + mbcdlen > pstr->bufs_len) - { - pstr->cur_state = prev_st; - break; - } - - if (pstr->offsets == NULL) - { - pstr->offsets = re_malloc (Idx, pstr->bufs_len); - - if (pstr->offsets == NULL) - return REG_ESPACE; - } - if (!pstr->offsets_needed) - { - for (i = 0; i < (size_t) byte_idx; ++i) - pstr->offsets[i] = i; - pstr->offsets_needed = 1; - } - - memcpy (pstr->mbs + byte_idx, buf, mbcdlen); - pstr->wcs[byte_idx] = wcu; - pstr->offsets[byte_idx] = src_idx; - for (i = 1; i < mbcdlen; ++i) - { - pstr->offsets[byte_idx + i] - = src_idx + (i < mbclen ? i : mbclen - 1); - pstr->wcs[byte_idx + i] = WEOF; - } - pstr->len += mbcdlen - mbclen; - if (pstr->raw_stop > src_idx) - pstr->stop += mbcdlen - mbclen; - end_idx = (pstr->bufs_len > pstr->len) - ? pstr->len : pstr->bufs_len; - byte_idx += mbcdlen; - src_idx += mbclen; - continue; - } - else - memcpy (pstr->mbs + byte_idx, p, mbclen); - } - else - memcpy (pstr->mbs + byte_idx, p, mbclen); - - if (BE (pstr->offsets_needed != 0, 0)) - { - size_t i; - for (i = 0; i < mbclen; ++i) - pstr->offsets[byte_idx + i] = src_idx + i; - } - src_idx += mbclen; - - pstr->wcs[byte_idx++] = wcu; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - else if (mbclen == (size_t) -1 || mbclen == 0) - { - /* It is an invalid character or '\0'. Just use the byte. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; - - if (BE (pstr->trans != NULL, 0)) - ch = pstr->trans [ch]; - pstr->mbs[byte_idx] = ch; - - if (BE (pstr->offsets_needed != 0, 0)) - pstr->offsets[byte_idx] = src_idx; - ++src_idx; - - /* And also cast it to wide char. */ - pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = src_idx; - return REG_NOERROR; -} - -/* Skip characters until the index becomes greater than NEW_RAW_IDX. - Return the index. */ - -static Idx -internal_function -re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) -{ - mbstate_t prev_st; - Idx rawbuf_idx; - size_t mbclen; - wint_t wc = WEOF; - - /* Skip the characters which are not necessary to check. */ - for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; - rawbuf_idx < new_raw_idx;) - { - wchar_t wc2; - Idx remain_len; - remain_len = pstr->len - rawbuf_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, - remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a single byte character. */ - if (mbclen == 0 || remain_len == 0) - wc = L'\0'; - else - wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); - mbclen = 1; - pstr->cur_state = prev_st; - } - else - wc = wc2; - /* Then proceed the next character. */ - rawbuf_idx += mbclen; - } - *last_wc = wc; - return rawbuf_idx; -} -#endif /* RE_ENABLE_I18N */ - -/* Build the buffer PSTR->MBS, and apply the translation if we need. - This function is used in case of REG_ICASE. */ - -static void -internal_function -build_upper_buffer (re_string_t *pstr) -{ - Idx char_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; - if (BE (pstr->trans != NULL, 0)) - ch = pstr->trans[ch]; - if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); - else - pstr->mbs[char_idx] = ch; - } - pstr->valid_len = char_idx; - pstr->valid_raw_len = char_idx; -} - -/* Apply TRANS to the buffer in PSTR. */ - -static void -internal_function -re_string_translate_buffer (re_string_t *pstr) -{ - Idx buf_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; - pstr->mbs[buf_idx] = pstr->trans[ch]; - } - - pstr->valid_len = buf_idx; - pstr->valid_raw_len = buf_idx; -} - -/* This function re-construct the buffers. - Concretely, convert to wide character in case of pstr->mb_cur_max > 1, - convert to upper case in case of REG_ICASE, apply translation. */ - -static reg_errcode_t -internal_function -re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) -{ - Idx offset; - - if (BE (pstr->raw_mbs_idx <= idx, 0)) - offset = idx - pstr->raw_mbs_idx; - else - { - /* Reset buffer. */ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); -#endif /* RE_ENABLE_I18N */ - pstr->len = pstr->raw_len; - pstr->stop = pstr->raw_stop; - pstr->valid_len = 0; - pstr->raw_mbs_idx = 0; - pstr->valid_raw_len = 0; - pstr->offsets_needed = 0; - pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF); - if (!pstr->mbs_allocated) - pstr->mbs = (unsigned char *) pstr->raw_mbs; - offset = idx; - } - - if (BE (offset != 0, 1)) - { - /* Should the already checked characters be kept? */ - if (BE (offset < pstr->valid_raw_len, 1)) - { - /* Yes, move them to the front of the buffer. */ -#ifdef RE_ENABLE_I18N - if (BE (pstr->offsets_needed, 0)) - { - Idx low = 0, high = pstr->valid_len, mid; - do - { - mid = (high + low) / 2; - if (pstr->offsets[mid] > offset) - high = mid; - else if (pstr->offsets[mid] < offset) - low = mid + 1; - else - break; - } - while (low < high); - if (pstr->offsets[mid] < offset) - ++mid; - pstr->tip_context = re_string_context_at (pstr, mid - 1, - eflags); - /* This can be quite complicated, so handle specially - only the common and easy case where the character with - different length representation of lower and upper - case is present at or after offset. */ - if (pstr->valid_len > offset - && mid == offset && pstr->offsets[mid] == offset) - { - memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wint_t)); - memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); - pstr->valid_len -= offset; - pstr->valid_raw_len -= offset; - for (low = 0; low < pstr->valid_len; low++) - pstr->offsets[low] = pstr->offsets[low + offset] - offset; - } - else - { - /* Otherwise, just find out how long the partial multibyte - character at offset is and fill it with WEOF/255. */ - pstr->len = pstr->raw_len - idx + offset; - pstr->stop = pstr->raw_stop - idx + offset; - pstr->offsets_needed = 0; - while (mid > 0 && pstr->offsets[mid - 1] == offset) - --mid; - while (mid < pstr->valid_len) - if (pstr->wcs[mid] != WEOF) - break; - else - ++mid; - if (mid == pstr->valid_len) - pstr->valid_len = 0; - else - { - pstr->valid_len = pstr->offsets[mid] - offset; - if (pstr->valid_len) - { - for (low = 0; low < pstr->valid_len; ++low) - pstr->wcs[low] = WEOF; - memset (pstr->mbs, 255, pstr->valid_len); - } - } - pstr->valid_raw_len = pstr->valid_len; - } - } - else -#endif - { - pstr->tip_context = re_string_context_at (pstr, offset - 1, - eflags); -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wint_t)); -#endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) - memmove (pstr->mbs, pstr->mbs + offset, - pstr->valid_len - offset); - pstr->valid_len -= offset; - pstr->valid_raw_len -= offset; -#if DEBUG - assert (pstr->valid_len > 0); -#endif - } - } - else - { - /* No, skip all characters until IDX. */ - Idx prev_valid_len = pstr->valid_len; - -#ifdef RE_ENABLE_I18N - if (BE (pstr->offsets_needed, 0)) - { - pstr->len = pstr->raw_len - idx + offset; - pstr->stop = pstr->raw_stop - idx + offset; - pstr->offsets_needed = 0; - } -#endif - pstr->valid_len = 0; -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - Idx wcs_idx; - wint_t wc = WEOF; - - if (pstr->is_utf8) - { - const unsigned char *raw, *p, *end; - - /* Special case UTF-8. Multi-byte chars start with any - byte other than 0x80 - 0xbf. */ - raw = pstr->raw_mbs + pstr->raw_mbs_idx; - end = raw + (offset - pstr->mb_cur_max); - if (end < pstr->raw_mbs) - end = pstr->raw_mbs; - p = raw + offset - 1; -#ifdef _LIBC - /* We know the wchar_t encoding is UCS4, so for the simple - case, ASCII characters, skip the conversion step. */ - if (isascii (*p) && BE (pstr->trans == NULL, 1)) - { - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); - /* pstr->valid_len = 0; */ - wc = (wchar_t) *p; - } - else -#endif - for (; p >= end; --p) - if ((*p & 0xc0) != 0x80) - { - mbstate_t cur_state; - wchar_t wc2; - Idx mlen = raw + pstr->len - p; - unsigned char buf[6]; - size_t mbclen; - - if (BE (pstr->trans != NULL, 0)) - { - int i = mlen < 6 ? mlen : 6; - while (--i >= 0) - buf[i] = pstr->trans[p[i]]; - } - /* XXX Don't use mbrtowc, we know which conversion - to use (UTF-8 -> UCS4). */ - memset (&cur_state, 0, sizeof (cur_state)); - mbclen = mbrtowc (&wc2, (const char *) p, mlen, - &cur_state); - if (raw + offset - p <= mbclen - && mbclen < (size_t) -2) - { - memset (&pstr->cur_state, '\0', - sizeof (mbstate_t)); - pstr->valid_len = mbclen - (raw + offset - p); - wc = wc2; - } - break; - } - } - - if (wc == WEOF) - pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; - if (wc == WEOF) - pstr->tip_context - = re_string_context_at (pstr, prev_valid_len - 1, eflags); - else - pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) - && IS_WIDE_WORD_CHAR (wc)) - ? CONTEXT_WORD - : ((IS_WIDE_NEWLINE (wc) - && pstr->newline_anchor) - ? CONTEXT_NEWLINE : 0)); - if (BE (pstr->valid_len, 0)) - { - for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) - pstr->wcs[wcs_idx] = WEOF; - if (pstr->mbs_allocated) - memset (pstr->mbs, 255, pstr->valid_len); - } - pstr->valid_raw_len = pstr->valid_len; - } - else -#endif /* RE_ENABLE_I18N */ - { - int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; - pstr->valid_raw_len = 0; - if (pstr->trans) - c = pstr->trans[c]; - pstr->tip_context = (bitset_contain (pstr->word_char, c) - ? CONTEXT_WORD - : ((IS_NEWLINE (c) && pstr->newline_anchor) - ? CONTEXT_NEWLINE : 0)); - } - } - if (!BE (pstr->mbs_allocated, 0)) - pstr->mbs += offset; - } - pstr->raw_mbs_idx = idx; - pstr->len -= offset; - pstr->stop -= offset; - - /* Then build the buffers. */ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - if (pstr->icase) - { - reg_errcode_t ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - else - build_wcs_buffer (pstr); - } - else -#endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) - { - if (pstr->icase) - build_upper_buffer (pstr); - else if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - else - pstr->valid_len = pstr->len; - - pstr->cur_idx = 0; - return REG_NOERROR; -} - -static unsigned char -internal_function __attribute ((pure)) -re_string_peek_byte_case (const re_string_t *pstr, Idx idx) -{ - int ch; - Idx off; - - /* Handle the common (easiest) cases first. */ - if (BE (!pstr->mbs_allocated, 1)) - return re_string_peek_byte (pstr, idx); - -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1 - && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) - return re_string_peek_byte (pstr, idx); -#endif - - off = pstr->cur_idx + idx; -#ifdef RE_ENABLE_I18N - if (pstr->offsets_needed) - off = pstr->offsets[off]; -#endif - - ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - -#ifdef RE_ENABLE_I18N - /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I - this function returns CAPITAL LETTER I instead of first byte of - DOTLESS SMALL LETTER I. The latter would confuse the parser, - since peek_byte_case doesn't advance cur_idx in any way. */ - if (pstr->offsets_needed && !isascii (ch)) - return re_string_peek_byte (pstr, idx); -#endif - - return ch; -} - -static unsigned char -internal_function __attribute ((pure)) -re_string_fetch_byte_case (re_string_t *pstr) -{ - if (BE (!pstr->mbs_allocated, 1)) - return re_string_fetch_byte (pstr); - -#ifdef RE_ENABLE_I18N - if (pstr->offsets_needed) - { - Idx off; - int ch; - - /* For tr_TR.UTF-8 [[:islower:]] there is - [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip - in that case the whole multi-byte character and return - the original letter. On the other side, with - [[: DOTLESS SMALL LETTER I return [[:I, as doing - anything else would complicate things too much. */ - - if (!re_string_first_byte (pstr, pstr->cur_idx)) - return re_string_fetch_byte (pstr); - - off = pstr->offsets[pstr->cur_idx]; - ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - - if (! isascii (ch)) - return re_string_fetch_byte (pstr); - - re_string_skip_bytes (pstr, - re_string_char_size_at (pstr, pstr->cur_idx)); - return ch; - } -#endif - - return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; -} - -static void -internal_function -re_string_destruct (re_string_t *pstr) -{ -#ifdef RE_ENABLE_I18N - re_free (pstr->wcs); - re_free (pstr->offsets); -#endif /* RE_ENABLE_I18N */ - if (pstr->mbs_allocated) - re_free (pstr->mbs); -} - -/* Return the context at IDX in INPUT. */ - -static unsigned int -internal_function -re_string_context_at (const re_string_t *input, Idx idx, int eflags) -{ - int c; - if (BE (! REG_VALID_INDEX (idx), 0)) - /* In this case, we use the value stored in input->tip_context, - since we can't know the character in input->mbs[-1] here. */ - return input->tip_context; - if (BE (idx == input->len, 0)) - return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF - : CONTEXT_NEWLINE | CONTEXT_ENDBUF); -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc; - Idx wc_idx = idx; - while(input->wcs[wc_idx] == WEOF) - { -#ifdef DEBUG - /* It must not happen. */ - assert (REG_VALID_INDEX (wc_idx)); -#endif - --wc_idx; - if (! REG_VALID_INDEX (wc_idx)) - return input->tip_context; - } - wc = input->wcs[wc_idx]; - if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) - return CONTEXT_WORD; - return (IS_WIDE_NEWLINE (wc) && input->newline_anchor - ? CONTEXT_NEWLINE : 0); - } - else -#endif - { - c = re_string_byte_at (input, idx); - if (bitset_contain (input->word_char, c)) - return CONTEXT_WORD; - return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; - } -} - -/* Functions for set operation. */ - -static reg_errcode_t -internal_function -re_node_set_alloc (re_node_set *set, Idx size) -{ - set->alloc = size; - set->nelem = 0; - set->elems = re_malloc (Idx, size); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -re_node_set_init_1 (re_node_set *set, Idx elem) -{ - set->alloc = 1; - set->nelem = 1; - set->elems = re_malloc (Idx, 1); - if (BE (set->elems == NULL, 0)) - { - set->alloc = set->nelem = 0; - return REG_ESPACE; - } - set->elems[0] = elem; - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2) -{ - set->alloc = 2; - set->elems = re_malloc (Idx, 2); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - if (elem1 == elem2) - { - set->nelem = 1; - set->elems[0] = elem1; - } - else - { - set->nelem = 2; - if (elem1 < elem2) - { - set->elems[0] = elem1; - set->elems[1] = elem2; - } - else - { - set->elems[0] = elem2; - set->elems[1] = elem1; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -re_node_set_init_copy (re_node_set *dest, const re_node_set *src) -{ - dest->nelem = src->nelem; - if (src->nelem > 0) - { - dest->alloc = dest->nelem; - dest->elems = re_malloc (Idx, dest->alloc); - if (BE (dest->elems == NULL, 0)) - { - dest->alloc = dest->nelem = 0; - return REG_ESPACE; - } - memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); - } - else - re_node_set_init_empty (dest); - return REG_NOERROR; -} - -/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. - Note: We assume dest->elems is NULL, when dest->alloc is 0. */ - -static reg_errcode_t -internal_function -re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, - const re_node_set *src2) -{ - Idx i1, i2, is, id, delta, sbase; - if (src1->nelem == 0 || src2->nelem == 0) - return REG_NOERROR; - - /* We need dest->nelem + 2 * elems_in_intersection; this is a - conservative estimate. */ - if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) - { - Idx new_alloc = src1->nelem + src2->nelem + dest->alloc; - Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc); - if (BE (new_elems == NULL, 0)) - return REG_ESPACE; - dest->elems = new_elems; - dest->alloc = new_alloc; - } - - /* Find the items in the intersection of SRC1 and SRC2, and copy - into the top of DEST those that are not already in DEST itself. */ - sbase = dest->nelem + src1->nelem + src2->nelem; - i1 = src1->nelem - 1; - i2 = src2->nelem - 1; - id = dest->nelem - 1; - for (;;) - { - if (src1->elems[i1] == src2->elems[i2]) - { - /* Try to find the item in DEST. Maybe we could binary search? */ - while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1]) - --id; - - if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1]) - dest->elems[--sbase] = src1->elems[i1]; - - if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2)) - break; - } - - /* Lower the highest of the two items. */ - else if (src1->elems[i1] < src2->elems[i2]) - { - if (! REG_VALID_INDEX (--i2)) - break; - } - else - { - if (! REG_VALID_INDEX (--i1)) - break; - } - } - - id = dest->nelem - 1; - is = dest->nelem + src1->nelem + src2->nelem - 1; - delta = is - sbase + 1; - - /* Now copy. When DELTA becomes zero, the remaining - DEST elements are already in place; this is more or - less the same loop that is in re_node_set_merge. */ - dest->nelem += delta; - if (delta > 0 && REG_VALID_INDEX (id)) - for (;;) - { - if (dest->elems[is] > dest->elems[id]) - { - /* Copy from the top. */ - dest->elems[id + delta--] = dest->elems[is--]; - if (delta == 0) - break; - } - else - { - /* Slide from the bottom. */ - dest->elems[id + delta] = dest->elems[id]; - if (! REG_VALID_INDEX (--id)) - break; - } - } - - /* Copy remaining SRC elements. */ - memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx)); - - return REG_NOERROR; -} - -/* Calculate the union set of the sets SRC1 and SRC2. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -internal_function -re_node_set_init_union (re_node_set *dest, const re_node_set *src1, - const re_node_set *src2) -{ - Idx i1, i2, id; - if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) - { - dest->alloc = src1->nelem + src2->nelem; - dest->elems = re_malloc (Idx, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } - else - { - if (src1 != NULL && src1->nelem > 0) - return re_node_set_init_copy (dest, src1); - else if (src2 != NULL && src2->nelem > 0) - return re_node_set_init_copy (dest, src2); - else - re_node_set_init_empty (dest); - return REG_NOERROR; - } - for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) - { - if (src1->elems[i1] > src2->elems[i2]) - { - dest->elems[id++] = src2->elems[i2++]; - continue; - } - if (src1->elems[i1] == src2->elems[i2]) - ++i2; - dest->elems[id++] = src1->elems[i1++]; - } - if (i1 < src1->nelem) - { - memcpy (dest->elems + id, src1->elems + i1, - (src1->nelem - i1) * sizeof (Idx)); - id += src1->nelem - i1; - } - else if (i2 < src2->nelem) - { - memcpy (dest->elems + id, src2->elems + i2, - (src2->nelem - i2) * sizeof (Idx)); - id += src2->nelem - i2; - } - dest->nelem = id; - return REG_NOERROR; -} - -/* Calculate the union set of the sets DEST and SRC. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -internal_function -re_node_set_merge (re_node_set *dest, const re_node_set *src) -{ - Idx is, id, sbase, delta; - if (src == NULL || src->nelem == 0) - return REG_NOERROR; - if (dest->alloc < 2 * src->nelem + dest->nelem) - { - Idx new_alloc = 2 * (src->nelem + dest->alloc); - Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc); - if (BE (new_buffer == NULL, 0)) - return REG_ESPACE; - dest->elems = new_buffer; - dest->alloc = new_alloc; - } - - if (BE (dest->nelem == 0, 0)) - { - dest->nelem = src->nelem; - memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); - return REG_NOERROR; - } - - /* Copy into the top of DEST the items of SRC that are not - found in DEST. Maybe we could binary search in DEST? */ - for (sbase = dest->nelem + 2 * src->nelem, - is = src->nelem - 1, id = dest->nelem - 1; - REG_VALID_INDEX (is) && REG_VALID_INDEX (id); ) - { - if (dest->elems[id] == src->elems[is]) - is--, id--; - else if (dest->elems[id] < src->elems[is]) - dest->elems[--sbase] = src->elems[is--]; - else /* if (dest->elems[id] > src->elems[is]) */ - --id; - } - - if (REG_VALID_INDEX (is)) - { - /* If DEST is exhausted, the remaining items of SRC must be unique. */ - sbase -= is + 1; - memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx)); - } - - id = dest->nelem - 1; - is = dest->nelem + 2 * src->nelem - 1; - delta = is - sbase + 1; - if (delta == 0) - return REG_NOERROR; - - /* Now copy. When DELTA becomes zero, the remaining - DEST elements are already in place. */ - dest->nelem += delta; - for (;;) - { - if (dest->elems[is] > dest->elems[id]) - { - /* Copy from the top. */ - dest->elems[id + delta--] = dest->elems[is--]; - if (delta == 0) - break; - } - else - { - /* Slide from the bottom. */ - dest->elems[id + delta] = dest->elems[id]; - if (! REG_VALID_INDEX (--id)) - { - /* Copy remaining SRC elements. */ - memcpy (dest->elems, dest->elems + sbase, - delta * sizeof (Idx)); - break; - } - } - } - - return REG_NOERROR; -} - -/* Insert the new element ELEM to the re_node_set* SET. - SET should not already have ELEM. - Return true if successful. */ - -static bool -internal_function -re_node_set_insert (re_node_set *set, Idx elem) -{ - Idx idx; - /* In case the set is empty. */ - if (set->alloc == 0) - return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1); - - if (BE (set->nelem, 0) == 0) - { - /* We already guaranteed above that set->alloc != 0. */ - set->elems[0] = elem; - ++set->nelem; - return true; - } - - /* Realloc if we need. */ - if (set->alloc == set->nelem) - { - Idx *new_elems; - set->alloc = set->alloc * 2; - new_elems = re_realloc (set->elems, Idx, set->alloc); - if (BE (new_elems == NULL, 0)) - return false; - set->elems = new_elems; - } - - /* Move the elements which follows the new element. Test the - first element separately to skip a check in the inner loop. */ - if (elem < set->elems[0]) - { - idx = 0; - for (idx = set->nelem; idx > 0; idx--) - set->elems[idx] = set->elems[idx - 1]; - } - else - { - for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) - set->elems[idx] = set->elems[idx - 1]; - } - - /* Insert the new element. */ - set->elems[idx] = elem; - ++set->nelem; - return true; -} - -/* Insert the new element ELEM to the re_node_set* SET. - SET should not already have any element greater than or equal to ELEM. - Return true if successful. */ - -static bool -internal_function -re_node_set_insert_last (re_node_set *set, Idx elem) -{ - /* Realloc if we need. */ - if (set->alloc == set->nelem) - { - Idx *new_elems; - set->alloc = (set->alloc + 1) * 2; - new_elems = re_realloc (set->elems, Idx, set->alloc); - if (BE (new_elems == NULL, 0)) - return false; - set->elems = new_elems; - } - - /* Insert the new element. */ - set->elems[set->nelem++] = elem; - return true; -} - -/* Compare two node sets SET1 and SET2. - Return true if SET1 and SET2 are equivalent. */ - -static bool -internal_function __attribute ((pure)) -re_node_set_compare (const re_node_set *set1, const re_node_set *set2) -{ - Idx i; - if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) - return false; - for (i = set1->nelem ; REG_VALID_INDEX (--i) ; ) - if (set1->elems[i] != set2->elems[i]) - return false; - return true; -} - -/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ - -static Idx -internal_function __attribute ((pure)) -re_node_set_contains (const re_node_set *set, Idx elem) -{ - __re_size_t idx, right, mid; - if (! REG_VALID_NONZERO_INDEX (set->nelem)) - return 0; - - /* Binary search the element. */ - idx = 0; - right = set->nelem - 1; - while (idx < right) - { - mid = (idx + right) / 2; - if (set->elems[mid] < elem) - idx = mid + 1; - else - right = mid; - } - return set->elems[idx] == elem ? idx + 1 : 0; -} - -static void -internal_function -re_node_set_remove_at (re_node_set *set, Idx idx) -{ - if (idx < 0 || idx >= set->nelem) - return; - --set->nelem; - for (; idx < set->nelem; idx++) - set->elems[idx] = set->elems[idx + 1]; -} - - -/* Add the token TOKEN to dfa->nodes, and return the index of the token. - Or return REG_MISSING if an error occurred. */ - -static Idx -internal_function -re_dfa_add_node (re_dfa_t *dfa, re_token_t token) -{ - if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) - { - size_t new_nodes_alloc = dfa->nodes_alloc * 2; - Idx *new_nexts, *new_indices; - re_node_set *new_edests, *new_eclosures; - re_token_t *new_nodes; - size_t max_object_size = - MAX (sizeof (re_token_t), - MAX (sizeof (re_node_set), - sizeof (Idx))); - - /* Avoid overflows. */ - if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0)) - return REG_MISSING; - - new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); - if (BE (new_nodes == NULL, 0)) - return REG_MISSING; - dfa->nodes = new_nodes; - new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); - new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); - new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); - new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); - if (BE (new_nexts == NULL || new_indices == NULL - || new_edests == NULL || new_eclosures == NULL, 0)) - return REG_MISSING; - dfa->nexts = new_nexts; - dfa->org_indices = new_indices; - dfa->edests = new_edests; - dfa->eclosures = new_eclosures; - dfa->nodes_alloc = new_nodes_alloc; - } - dfa->nodes[dfa->nodes_len] = token; - dfa->nodes[dfa->nodes_len].constraint = 0; -#ifdef RE_ENABLE_I18N - { - int type = token.type; - dfa->nodes[dfa->nodes_len].accept_mb = - (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; - } -#endif - dfa->nexts[dfa->nodes_len] = REG_MISSING; - re_node_set_init_empty (dfa->edests + dfa->nodes_len); - re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); - return dfa->nodes_len++; -} - -static inline re_hashval_t -internal_function -calc_state_hash (const re_node_set *nodes, unsigned int context) -{ - re_hashval_t hash = nodes->nelem + context; - Idx i; - for (i = 0 ; i < nodes->nelem ; i++) - hash += nodes->elems[i]; - return hash; -} - -/* Search for the state whose node_set is equivalent to NODES. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t * -internal_function -re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, - const re_node_set *nodes) -{ - re_hashval_t hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - Idx i; -#ifdef lint - /* Suppress bogus uninitialized-variable warnings. */ - *err = REG_NOERROR; -#endif - if (BE (nodes->nelem == 0, 0)) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, 0); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (hash != state->hash) - continue; - if (re_node_set_compare (&state->nodes, nodes)) - return state; - } - - /* There are no appropriate state in the dfa, create the new one. */ - new_state = create_ci_newstate (dfa, nodes, hash); - if (BE (new_state == NULL, 0)) - *err = REG_ESPACE; - - return new_state; -} - -/* Search for the state whose node_set is equivalent to NODES and - whose context is equivalent to CONTEXT. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t * -internal_function -re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, - const re_node_set *nodes, unsigned int context) -{ - re_hashval_t hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - Idx i; -#ifdef lint - /* Suppress bogus uninitialized-variable warnings. */ - *err = REG_NOERROR; -#endif - if (nodes->nelem == 0) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, context); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (state->hash == hash - && state->context == context - && re_node_set_compare (state->entrance_nodes, nodes)) - return state; - } - /* There are no appropriate state in `dfa', create the new one. */ - new_state = create_cd_newstate (dfa, nodes, context, hash); - if (BE (new_state == NULL, 0)) - *err = REG_ESPACE; - - return new_state; -} - -/* Finish initialization of the new state NEWSTATE, and using its hash value - HASH put in the appropriate bucket of DFA's state table. Return value - indicates the error code if failed. */ - -static reg_errcode_t -register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, - re_hashval_t hash) -{ - struct re_state_table_entry *spot; - reg_errcode_t err; - Idx i; - - newstate->hash = hash; - err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); - if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; - for (i = 0; i < newstate->nodes.nelem; i++) - { - Idx elem = newstate->nodes.elems[i]; - if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) - if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0)) - return REG_ESPACE; - } - - spot = dfa->state_table + (hash & dfa->state_hash_mask); - if (BE (spot->alloc <= spot->num, 0)) - { - Idx new_alloc = 2 * spot->num + 2; - re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, - new_alloc); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - spot->array = new_array; - spot->alloc = new_alloc; - } - spot->array[spot->num++] = newstate; - return REG_NOERROR; -} - -static void -free_state (re_dfastate_t *state) -{ - re_node_set_free (&state->non_eps_nodes); - re_node_set_free (&state->inveclosure); - if (state->entrance_nodes != &state->nodes) - { - re_node_set_free (state->entrance_nodes); - re_free (state->entrance_nodes); - } - re_node_set_free (&state->nodes); - re_free (state->word_trtable); - re_free (state->trtable); - re_free (state); -} - -/* Create the new state which is independ of contexts. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -internal_function -create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, - re_hashval_t hash) -{ - Idx i; - reg_errcode_t err; - re_dfastate_t *newstate; - - newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) - return NULL; - err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_free (newstate); - return NULL; - } - - newstate->entrance_nodes = &newstate->nodes; - for (i = 0 ; i < nodes->nelem ; i++) - { - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (type == CHARACTER && !node->constraint) - continue; -#ifdef RE_ENABLE_I18N - newstate->accept_mb |= node->accept_mb; -#endif /* RE_ENABLE_I18N */ - - /* If the state has the halt node, the state is a halt state. */ - if (type == END_OF_RE) - newstate->halt = 1; - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR || node->constraint) - newstate->has_constraint = 1; - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} - -/* Create the new state which is depend on the context CONTEXT. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -internal_function -create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, - unsigned int context, re_hashval_t hash) -{ - Idx i, nctx_nodes = 0; - reg_errcode_t err; - re_dfastate_t *newstate; - - newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) - return NULL; - err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_free (newstate); - return NULL; - } - - newstate->context = context; - newstate->entrance_nodes = &newstate->nodes; - - for (i = 0 ; i < nodes->nelem ; i++) - { - unsigned int constraint = 0; - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (node->constraint) - constraint = node->constraint; - - if (type == CHARACTER && !constraint) - continue; -#ifdef RE_ENABLE_I18N - newstate->accept_mb |= node->accept_mb; -#endif /* RE_ENABLE_I18N */ - - /* If the state has the halt node, the state is a halt state. */ - if (type == END_OF_RE) - newstate->halt = 1; - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR) - constraint = node->opr.ctx_type; - - if (constraint) - { - if (newstate->entrance_nodes == &newstate->nodes) - { - newstate->entrance_nodes = re_malloc (re_node_set, 1); - if (BE (newstate->entrance_nodes == NULL, 0)) - { - free_state (newstate); - return NULL; - } - re_node_set_init_copy (newstate->entrance_nodes, nodes); - nctx_nodes = 0; - newstate->has_constraint = 1; - } - - if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) - { - re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); - ++nctx_nodes; - } - } - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} diff --git a/lib/regex_internal.h b/lib/regex_internal.h deleted file mode 100644 index b7a8d22..0000000 --- a/lib/regex_internal.h +++ /dev/null @@ -1,864 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _REGEX_INTERNAL_H -#define _REGEX_INTERNAL_H 1 - -#include <assert.h> -#include <ctype.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC -# include <langinfo.h> -#endif -#if defined HAVE_LOCALE_H || defined _LIBC -# include <locale.h> -#endif - -#include <wchar.h> -#include <wctype.h> -#include <stdint.h> -#if defined _LIBC -# include <bits/libc-lock.h> -#else -# if defined __libc_lock_init -# undef __libc_lock_init -# endif -# define __libc_lock_init(NAME) do { } while (0) -# if defined __libc_lock_lock -# undef __libc_lock_lock -# endif -# define __libc_lock_lock(NAME) do { } while (0) -# if defined __libc_lock_unlock -# undef __libc_lock_unlock -# endif -# define __libc_lock_unlock(NAME) do { } while (0) -#endif - -/* In case that the system doesn't have isblank(). */ -#if !defined _LIBC && !HAVE_DECL_ISBLANK && !defined isblank -# define isblank(ch) ((ch) == ' ' || (ch) == '\t') -#endif - -#ifdef _LIBC -# ifndef _RE_DEFINE_LOCALE_FUNCTIONS -# define _RE_DEFINE_LOCALE_FUNCTIONS 1 -# include <locale/localeinfo.h> -# include <locale/elem-hash.h> -# include <locale/coll-lookup.h> -# endif -#endif - -/* This is for other GNU distributions with internationalized messages. */ -#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC -# include <libintl.h> -# ifdef _LIBC -# undef gettext -# define gettext(msgid) \ - INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) -# endif -#else -# define gettext(msgid) (msgid) -#endif - -#ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -#endif - -/* For loser systems without the definition. */ -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC -# define RE_ENABLE_I18N -#endif - -#if __GNUC__ >= 3 -# define BE(expr, val) __builtin_expect (expr, val) -#else -# define BE(expr, val) (expr) -# ifdef _LIBC -# define inline -# endif -#endif - -/* Number of ASCII characters. */ -#define ASCII_CHARS 0x80 - -/* Number of single byte characters. */ -#define SBC_MAX (UCHAR_MAX + 1) - -#define COLL_ELEM_LEN_MAX 8 - -/* The character which represents newline. */ -#define NEWLINE_CHAR '\n' -#define WIDE_NEWLINE_CHAR L'\n' - -/* Rename to standard API for using out of glibc. */ -#ifndef _LIBC -# define __wctype wctype -# define __iswctype iswctype -# define __btowc btowc -# define __wcrtomb wcrtomb -# define __regfree regfree -# define attribute_hidden -#endif /* not _LIBC */ - -#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define __attribute(arg) __attribute__ (arg) -#else -# define __attribute(arg) -#endif - -typedef __re_idx_t Idx; - -/* Special return value for failure to match. */ -#define REG_MISSING ((Idx) -1) - -/* Special return value for internal error. */ -#define REG_ERROR ((Idx) -2) - -/* Test whether N is a valid index, and is not one of the above. */ -#ifdef _REGEX_LARGE_OFFSETS -# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR) -#else -# define REG_VALID_INDEX(n) (0 <= (n)) -#endif - -/* Test whether N is a valid nonzero index. */ -#ifdef _REGEX_LARGE_OFFSETS -# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1)) -#else -# define REG_VALID_NONZERO_INDEX(n) (0 < (n)) -#endif - -/* A hash value, suitable for computing hash tables. */ -typedef __re_size_t re_hashval_t; - -/* An integer used to represent a set of bits. It must be unsigned, - and must be at least as wide as unsigned int. */ -typedef unsigned long int bitset_word_t; -/* All bits set in a bitset_word_t. */ -#define BITSET_WORD_MAX ULONG_MAX - -/* Number of bits in a bitset_word_t. For portability to hosts with - padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)'; - instead, deduce it directly from BITSET_WORD_MAX. Avoid - greater-than-32-bit integers and unconditional shifts by more than - 31 bits, as they're not portable. */ -#if BITSET_WORD_MAX == 0xffffffff -# define BITSET_WORD_BITS 32 -#elif BITSET_WORD_MAX >> 31 >> 5 == 1 -# define BITSET_WORD_BITS 36 -#elif BITSET_WORD_MAX >> 31 >> 16 == 1 -# define BITSET_WORD_BITS 48 -#elif BITSET_WORD_MAX >> 31 >> 28 == 1 -# define BITSET_WORD_BITS 60 -#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1 -# define BITSET_WORD_BITS 64 -#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1 -# define BITSET_WORD_BITS 72 -#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1 -# define BITSET_WORD_BITS 128 -#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1 -# define BITSET_WORD_BITS 256 -#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1 -# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */ -# if BITSET_WORD_BITS <= SBC_MAX -# error "Invalid SBC_MAX" -# endif -#elif BITSET_WORD_MAX == (0xffffffff + 2) * 0xffffffff -/* Work around a bug in 64-bit PGC (before version 6.1-2), where the - preprocessor mishandles large unsigned values as if they were signed. */ -# define BITSET_WORD_BITS 64 -#else -# error "Add case for new bitset_word_t size" -#endif - -/* Number of bitset_word_t values in a bitset_t. */ -#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) - -typedef bitset_word_t bitset_t[BITSET_WORDS]; -typedef bitset_word_t *re_bitset_ptr_t; -typedef const bitset_word_t *re_const_bitset_ptr_t; - -#define PREV_WORD_CONSTRAINT 0x0001 -#define PREV_NOTWORD_CONSTRAINT 0x0002 -#define NEXT_WORD_CONSTRAINT 0x0004 -#define NEXT_NOTWORD_CONSTRAINT 0x0008 -#define PREV_NEWLINE_CONSTRAINT 0x0010 -#define NEXT_NEWLINE_CONSTRAINT 0x0020 -#define PREV_BEGBUF_CONSTRAINT 0x0040 -#define NEXT_ENDBUF_CONSTRAINT 0x0080 -#define WORD_DELIM_CONSTRAINT 0x0100 -#define NOT_WORD_DELIM_CONSTRAINT 0x0200 - -typedef enum -{ - INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, - INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, - LINE_FIRST = PREV_NEWLINE_CONSTRAINT, - LINE_LAST = NEXT_NEWLINE_CONSTRAINT, - BUF_FIRST = PREV_BEGBUF_CONSTRAINT, - BUF_LAST = NEXT_ENDBUF_CONSTRAINT, - WORD_DELIM = WORD_DELIM_CONSTRAINT, - NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT -} re_context_type; - -typedef struct -{ - Idx alloc; - Idx nelem; - Idx *elems; -} re_node_set; - -typedef enum -{ - NON_TYPE = 0, - - /* Node type, These are used by token, node, tree. */ - CHARACTER = 1, - END_OF_RE = 2, - SIMPLE_BRACKET = 3, - OP_BACK_REF = 4, - OP_PERIOD = 5, -#ifdef RE_ENABLE_I18N - COMPLEX_BRACKET = 6, - OP_UTF8_PERIOD = 7, -#endif /* RE_ENABLE_I18N */ - - /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used - when the debugger shows values of this enum type. */ -#define EPSILON_BIT 8 - OP_OPEN_SUBEXP = EPSILON_BIT | 0, - OP_CLOSE_SUBEXP = EPSILON_BIT | 1, - OP_ALT = EPSILON_BIT | 2, - OP_DUP_ASTERISK = EPSILON_BIT | 3, - ANCHOR = EPSILON_BIT | 4, - - /* Tree type, these are used only by tree. */ - CONCAT = 16, - SUBEXP = 17, - - /* Token type, these are used only by token. */ - OP_DUP_PLUS = 18, - OP_DUP_QUESTION, - OP_OPEN_BRACKET, - OP_CLOSE_BRACKET, - OP_CHARSET_RANGE, - OP_OPEN_DUP_NUM, - OP_CLOSE_DUP_NUM, - OP_NON_MATCH_LIST, - OP_OPEN_COLL_ELEM, - OP_CLOSE_COLL_ELEM, - OP_OPEN_EQUIV_CLASS, - OP_CLOSE_EQUIV_CLASS, - OP_OPEN_CHAR_CLASS, - OP_CLOSE_CHAR_CLASS, - OP_WORD, - OP_NOTWORD, - OP_SPACE, - OP_NOTSPACE, - BACK_SLASH - -} re_token_type_t; - -#ifdef RE_ENABLE_I18N -typedef struct -{ - /* Multibyte characters. */ - wchar_t *mbchars; - - /* Collating symbols. */ -# ifdef _LIBC - int32_t *coll_syms; -# endif - - /* Equivalence classes. */ -# ifdef _LIBC - int32_t *equiv_classes; -# endif - - /* Range expressions. */ -# ifdef _LIBC - uint32_t *range_starts; - uint32_t *range_ends; -# else /* not _LIBC */ - wchar_t *range_starts; - wchar_t *range_ends; -# endif /* not _LIBC */ - - /* Character classes. */ - wctype_t *char_classes; - - /* If this character set is the non-matching list. */ - unsigned int non_match : 1; - - /* # of multibyte characters. */ - Idx nmbchars; - - /* # of collating symbols. */ - Idx ncoll_syms; - - /* # of equivalence classes. */ - Idx nequiv_classes; - - /* # of range expressions. */ - Idx nranges; - - /* # of character classes. */ - Idx nchar_classes; -} re_charset_t; -#endif /* RE_ENABLE_I18N */ - -typedef struct -{ - union - { - unsigned char c; /* for CHARACTER */ - re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; /* for COMPLEX_BRACKET */ -#endif /* RE_ENABLE_I18N */ - Idx idx; /* for BACK_REF */ - re_context_type ctx_type; /* for ANCHOR */ - } opr; -#if __GNUC__ >= 2 && !__STRICT_ANSI__ - re_token_type_t type : 8; -#else - re_token_type_t type; -#endif - unsigned int constraint : 10; /* context constraint */ - unsigned int duplicated : 1; - unsigned int opt_subexp : 1; -#ifdef RE_ENABLE_I18N - unsigned int accept_mb : 1; - /* These 2 bits can be moved into the union if needed (e.g. if running out - of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ - unsigned int mb_partial : 1; -#endif - unsigned int word_char : 1; -} re_token_t; - -#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) - -struct re_string_t -{ - /* Indicate the raw buffer which is the original string passed as an - argument of regexec(), re_search(), etc.. */ - const unsigned char *raw_mbs; - /* Store the multibyte string. In case of "case insensitive mode" like - REG_ICASE, upper cases of the string are stored, otherwise MBS points - the same address that RAW_MBS points. */ - unsigned char *mbs; -#ifdef RE_ENABLE_I18N - /* Store the wide character string which is corresponding to MBS. */ - wint_t *wcs; - Idx *offsets; - mbstate_t cur_state; -#endif - /* Index in RAW_MBS. Each character mbs[i] corresponds to - raw_mbs[raw_mbs_idx + i]. */ - Idx raw_mbs_idx; - /* The length of the valid characters in the buffers. */ - Idx valid_len; - /* The corresponding number of bytes in raw_mbs array. */ - Idx valid_raw_len; - /* The length of the buffers MBS and WCS. */ - Idx bufs_len; - /* The index in MBS, which is updated by re_string_fetch_byte. */ - Idx cur_idx; - /* length of RAW_MBS array. */ - Idx raw_len; - /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ - Idx len; - /* End of the buffer may be shorter than its length in the cases such - as re_match_2, re_search_2. Then, we use STOP for end of the buffer - instead of LEN. */ - Idx raw_stop; - /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ - Idx stop; - - /* The context of mbs[0]. We store the context independently, since - the context of mbs[0] may be different from raw_mbs[0], which is - the beginning of the input string. */ - unsigned int tip_context; - /* The translation passed as a part of an argument of re_compile_pattern. */ - RE_TRANSLATE_TYPE trans; - /* Copy of re_dfa_t's word_char. */ - re_const_bitset_ptr_t word_char; - /* true if REG_ICASE. */ - unsigned char icase; - unsigned char is_utf8; - unsigned char map_notascii; - unsigned char mbs_allocated; - unsigned char offsets_needed; - unsigned char newline_anchor; - unsigned char word_ops_used; - int mb_cur_max; -}; -typedef struct re_string_t re_string_t; - - -struct re_dfa_t; -typedef struct re_dfa_t re_dfa_t; - -#ifndef _LIBC -# ifdef __i386__ -# define internal_function __attribute ((regparm (3), stdcall)) -# else -# define internal_function -# endif -#endif - -static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, - Idx new_buf_len) - internal_function; -#ifdef RE_ENABLE_I18N -static void build_wcs_buffer (re_string_t *pstr) internal_function; -static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) - internal_function; -#endif /* RE_ENABLE_I18N */ -static void build_upper_buffer (re_string_t *pstr) internal_function; -static void re_string_translate_buffer (re_string_t *pstr) internal_function; -static unsigned int re_string_context_at (const re_string_t *input, Idx idx, - int eflags) - internal_function __attribute ((pure)); -#define re_string_peek_byte(pstr, offset) \ - ((pstr)->mbs[(pstr)->cur_idx + offset]) -#define re_string_fetch_byte(pstr) \ - ((pstr)->mbs[(pstr)->cur_idx++]) -#define re_string_first_byte(pstr, idx) \ - ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) -#define re_string_is_single_byte_char(pstr, idx) \ - ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ - || (pstr)->wcs[(idx) + 1] != WEOF)) -#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) -#define re_string_cur_idx(pstr) ((pstr)->cur_idx) -#define re_string_get_buffer(pstr) ((pstr)->mbs) -#define re_string_length(pstr) ((pstr)->len) -#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) -#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) -#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) - -#include <alloca.h> - -#ifndef _LIBC -# if HAVE_ALLOCA -/* The OS usually guarantees only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - allocate anything larger than 4096 bytes. Also care for the possibility - of a few compiler-allocated temporary stack slots. */ -# define __libc_use_alloca(n) ((n) < 4032) -# else -/* alloca is implemented with malloc, so just use malloc. */ -# define __libc_use_alloca(n) 0 -# endif -#endif - -#ifndef MAX -# define MAX(a,b) ((a) < (b) ? (b) : (a)) -#endif - -#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) -#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) -#define re_free(p) free (p) - -struct bin_tree_t -{ - struct bin_tree_t *parent; - struct bin_tree_t *left; - struct bin_tree_t *right; - struct bin_tree_t *first; - struct bin_tree_t *next; - - re_token_t token; - - /* `node_idx' is the index in dfa->nodes, if `type' == 0. - Otherwise `type' indicate the type of this node. */ - Idx node_idx; -}; -typedef struct bin_tree_t bin_tree_t; - -#define BIN_TREE_STORAGE_SIZE \ - ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) - -struct bin_tree_storage_t -{ - struct bin_tree_storage_t *next; - bin_tree_t data[BIN_TREE_STORAGE_SIZE]; -}; -typedef struct bin_tree_storage_t bin_tree_storage_t; - -#define CONTEXT_WORD 1 -#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) -#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) -#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) - -#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) -#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) -#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) -#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) -#define IS_ORDINARY_CONTEXT(c) ((c) == 0) - -#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') -#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) -#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') -#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) - -#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ - ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ - || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) - -#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ - ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ - || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) - -struct re_dfastate_t -{ - re_hashval_t hash; - re_node_set nodes; - re_node_set non_eps_nodes; - re_node_set inveclosure; - re_node_set *entrance_nodes; - struct re_dfastate_t **trtable, **word_trtable; - unsigned int context : 4; - unsigned int halt : 1; - /* If this state can accept `multi byte'. - Note that we refer to multibyte characters, and multi character - collating elements as `multi byte'. */ - unsigned int accept_mb : 1; - /* If this state has backreference node(s). */ - unsigned int has_backref : 1; - unsigned int has_constraint : 1; -}; -typedef struct re_dfastate_t re_dfastate_t; - -struct re_state_table_entry -{ - Idx num; - Idx alloc; - re_dfastate_t **array; -}; - -/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ - -typedef struct -{ - Idx next_idx; - Idx alloc; - re_dfastate_t **array; -} state_array_t; - -/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ - -typedef struct -{ - Idx node; - Idx str_idx; /* The position NODE match at. */ - state_array_t path; -} re_sub_match_last_t; - -/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. - And information about the node, whose type is OP_CLOSE_SUBEXP, - corresponding to NODE is stored in LASTS. */ - -typedef struct -{ - Idx str_idx; - Idx node; - state_array_t *path; - Idx alasts; /* Allocation size of LASTS. */ - Idx nlasts; /* The number of LASTS. */ - re_sub_match_last_t **lasts; -} re_sub_match_top_t; - -struct re_backref_cache_entry -{ - Idx node; - Idx str_idx; - Idx subexp_from; - Idx subexp_to; - char more; - char unused; - unsigned short int eps_reachable_subexps_map; -}; - -typedef struct -{ - /* The string object corresponding to the input string. */ - re_string_t input; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - const re_dfa_t *const dfa; -#else - const re_dfa_t *dfa; -#endif - /* EFLAGS of the argument of regexec. */ - int eflags; - /* Where the matching ends. */ - Idx match_last; - Idx last_node; - /* The state log used by the matcher. */ - re_dfastate_t **state_log; - Idx state_log_top; - /* Back reference cache. */ - Idx nbkref_ents; - Idx abkref_ents; - struct re_backref_cache_entry *bkref_ents; - int max_mb_elem_len; - Idx nsub_tops; - Idx asub_tops; - re_sub_match_top_t **sub_tops; -} re_match_context_t; - -typedef struct -{ - re_dfastate_t **sifted_states; - re_dfastate_t **limited_states; - Idx last_node; - Idx last_str_idx; - re_node_set limits; -} re_sift_context_t; - -struct re_fail_stack_ent_t -{ - Idx idx; - Idx node; - regmatch_t *regs; - re_node_set eps_via_nodes; -}; - -struct re_fail_stack_t -{ - Idx num; - Idx alloc; - struct re_fail_stack_ent_t *stack; -}; - -struct re_dfa_t -{ - re_token_t *nodes; - size_t nodes_alloc; - size_t nodes_len; - Idx *nexts; - Idx *org_indices; - re_node_set *edests; - re_node_set *eclosures; - re_node_set *inveclosures; - struct re_state_table_entry *state_table; - re_dfastate_t *init_state; - re_dfastate_t *init_state_word; - re_dfastate_t *init_state_nl; - re_dfastate_t *init_state_begbuf; - bin_tree_t *str_tree; - bin_tree_storage_t *str_tree_storage; - re_bitset_ptr_t sb_char; - int str_tree_storage_idx; - - /* number of subexpressions `re_nsub' is in regex_t. */ - re_hashval_t state_hash_mask; - Idx init_node; - Idx nbackref; /* The number of backreference in this dfa. */ - - /* Bitmap expressing which backreference is used. */ - bitset_word_t used_bkref_map; - bitset_word_t completed_bkref_map; - - unsigned int has_plural_match : 1; - /* If this dfa has "multibyte node", which is a backreference or - a node which can accept multibyte character or multi character - collating element. */ - unsigned int has_mb_node : 1; - unsigned int is_utf8 : 1; - unsigned int map_notascii : 1; - unsigned int word_ops_used : 1; - int mb_cur_max; - bitset_t word_char; - reg_syntax_t syntax; - Idx *subexp_map; -#ifdef DEBUG - char* re_str; -#endif -#ifdef _LIBC - __libc_lock_define (, lock) -#endif -}; - -#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) -#define re_node_set_remove(set,id) \ - (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) -#define re_node_set_empty(p) ((p)->nelem = 0) -#define re_node_set_free(set) re_free ((set)->elems) - - -typedef enum -{ - SB_CHAR, - MB_CHAR, - EQUIV_CLASS, - COLL_SYM, - CHAR_CLASS -} bracket_elem_type; - -typedef struct -{ - bracket_elem_type type; - union - { - unsigned char ch; - unsigned char *name; - wchar_t wch; - } opr; -} bracket_elem_t; - - -/* Inline functions for bitset_t operation. */ - -static inline void -bitset_set (bitset_t set, Idx i) -{ - set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; -} - -static inline void -bitset_clear (bitset_t set, Idx i) -{ - set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); -} - -static inline bool -bitset_contain (const bitset_t set, Idx i) -{ - return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; -} - -static inline void -bitset_empty (bitset_t set) -{ - memset (set, '\0', sizeof (bitset_t)); -} - -static inline void -bitset_set_all (bitset_t set) -{ - memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); - if (SBC_MAX % BITSET_WORD_BITS != 0) - set[BITSET_WORDS - 1] = - ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; -} - -static inline void -bitset_copy (bitset_t dest, const bitset_t src) -{ - memcpy (dest, src, sizeof (bitset_t)); -} - -static inline void -bitset_not (bitset_t set) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) - set[bitset_i] = ~set[bitset_i]; - if (SBC_MAX % BITSET_WORD_BITS != 0) - set[BITSET_WORDS - 1] = - ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) - & ~set[BITSET_WORDS - 1]); -} - -static inline void -bitset_merge (bitset_t dest, const bitset_t src) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) - dest[bitset_i] |= src[bitset_i]; -} - -static inline void -bitset_mask (bitset_t dest, const bitset_t src) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) - dest[bitset_i] &= src[bitset_i]; -} - -#ifdef RE_ENABLE_I18N -/* Inline functions for re_string. */ -static inline int -internal_function __attribute ((pure)) -re_string_char_size_at (const re_string_t *pstr, Idx idx) -{ - int byte_idx; - if (pstr->mb_cur_max == 1) - return 1; - for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) - if (pstr->wcs[idx + byte_idx] != WEOF) - break; - return byte_idx; -} - -static inline wint_t -internal_function __attribute ((pure)) -re_string_wchar_at (const re_string_t *pstr, Idx idx) -{ - if (pstr->mb_cur_max == 1) - return (wint_t) pstr->mbs[idx]; - return (wint_t) pstr->wcs[idx]; -} - -static int -internal_function __attribute ((pure)) -re_string_elem_size_at (const re_string_t *pstr, Idx idx) -{ -# ifdef _LIBC - const unsigned char *p, *extra; - const int32_t *table, *indirect; - int32_t tmp; -# include <locale/weight.h> - uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - - if (nrules != 0) - { - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - p = pstr->mbs + idx; - tmp = findidx (&p); - return p - pstr->mbs - idx; - } - else -# endif /* _LIBC */ - return 1; -} -#endif /* RE_ENABLE_I18N */ - -#endif /* _REGEX_INTERNAL_H */ diff --git a/lib/regexec.c b/lib/regexec.c deleted file mode 100644 index cba5ea2..0000000 --- a/lib/regexec.c +++ /dev/null @@ -1,4398 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, - Idx n) internal_function; -static void match_ctx_clean (re_match_context_t *mctx) internal_function; -static void match_ctx_free (re_match_context_t *cache) internal_function; -static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, - Idx str_idx, Idx from, Idx to) - internal_function; -static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) - internal_function; -static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, - Idx str_idx) internal_function; -static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, - Idx node, Idx str_idx) - internal_function; -static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, Idx last_node, - Idx last_str_idx) - internal_function; -static reg_errcode_t re_search_internal (const regex_t *preg, - const char *string, Idx length, - Idx start, Idx last_start, Idx stop, - size_t nmatch, regmatch_t pmatch[], - int eflags) internal_function; -static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, Idx length1, - const char *string2, Idx length2, - Idx start, regoff_t range, - struct re_registers *regs, - Idx stop, bool ret_len) internal_function; -static regoff_t re_search_stub (struct re_pattern_buffer *bufp, - const char *string, Idx length, Idx start, - regoff_t range, Idx stop, - struct re_registers *regs, - bool ret_len) internal_function; -static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - Idx nregs, int regs_allocated) - internal_function; -static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) - internal_function; -static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, - Idx *p_match_first) internal_function; -static Idx check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, Idx idx) - internal_function; -static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, - regmatch_t *prev_idx_match, Idx cur_node, - Idx cur_idx, Idx nmatch) internal_function; -static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, - Idx str_idx, Idx dest_node, Idx nregs, - regmatch_t *regs, - re_node_set *eps_via_nodes) - internal_function; -static reg_errcode_t set_regs (const regex_t *preg, - const re_match_context_t *mctx, - size_t nmatch, regmatch_t *pmatch, - bool fl_backtrack) internal_function; -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) - internal_function; - -#ifdef RE_ENABLE_I18N -static int sift_states_iter_mb (const re_match_context_t *mctx, - re_sift_context_t *sctx, - Idx node_idx, Idx str_idx, Idx max_str_idx) - internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, - re_sift_context_t *sctx) - internal_function; -static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, - re_sift_context_t *sctx, Idx str_idx, - re_node_set *cur_dest) - internal_function; -static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, - re_sift_context_t *sctx, - Idx str_idx, - re_node_set *dest_nodes) - internal_function; -static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates) - internal_function; -static bool check_dst_limits (const re_match_context_t *mctx, - const re_node_set *limits, - Idx dst_node, Idx dst_idx, Idx src_node, - Idx src_idx) internal_function; -static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, - int boundaries, Idx subexp_idx, - Idx from_node, Idx bkref_idx) - internal_function; -static int check_dst_limits_calc_pos (const re_match_context_t *mctx, - Idx limit, Idx subexp_idx, - Idx node, Idx str_idx, - Idx bkref_idx) internal_function; -static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates, - re_node_set *limits, - struct re_backref_cache_entry *bkref_ents, - Idx str_idx) internal_function; -static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, - re_sift_context_t *sctx, - Idx str_idx, const re_node_set *candidates) - internal_function; -static reg_errcode_t merge_state_array (const re_dfa_t *dfa, - re_dfastate_t **dst, - re_dfastate_t **src, Idx num) - internal_function; -static re_dfastate_t *find_recover_state (reg_errcode_t *err, - re_match_context_t *mctx) internal_function; -static re_dfastate_t *transit_state (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *state) internal_function; -static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *next_state) - internal_function; -static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, - re_node_set *cur_nodes, - Idx str_idx) internal_function; -#if 0 -static re_dfastate_t *transit_state_sb (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *pstate) - internal_function; -#endif -#ifdef RE_ENABLE_I18N -static reg_errcode_t transit_state_mb (re_match_context_t *mctx, - re_dfastate_t *pstate) - internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, - const re_node_set *nodes) - internal_function; -static reg_errcode_t get_subexp (re_match_context_t *mctx, - Idx bkref_node, Idx bkref_str_idx) - internal_function; -static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, - const re_sub_match_top_t *sub_top, - re_sub_match_last_t *sub_last, - Idx bkref_node, Idx bkref_str) - internal_function; -static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - Idx subexp_idx, int type) internal_function; -static reg_errcode_t check_arrival (re_match_context_t *mctx, - state_array_t *path, Idx top_node, - Idx top_str, Idx last_node, Idx last_str, - int type) internal_function; -static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, - Idx str_idx, - re_node_set *cur_nodes, - re_node_set *next_nodes) - internal_function; -static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, - re_node_set *cur_nodes, - Idx ex_subexp, int type) - internal_function; -static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, - re_node_set *dst_nodes, - Idx target, Idx ex_subexp, - int type) internal_function; -static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, - re_node_set *cur_nodes, Idx cur_str, - Idx subexp_num, int type) - internal_function; -static bool build_trtable (const re_dfa_t *dfa, - re_dfastate_t *state) internal_function; -#ifdef RE_ENABLE_I18N -static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, - const re_string_t *input, Idx idx) - internal_function; -# ifdef _LIBC -static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len) - internal_function; -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ -static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, - const re_dfastate_t *state, - re_node_set *states_node, - bitset_t *states_ch) internal_function; -static bool check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, Idx idx) - internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) - internal_function; - -/* Entry point for POSIX code. */ - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *_Restrict_ preg; - const char *_Restrict_ string; - size_t nmatch; - regmatch_t pmatch[_Restrict_arr_]; - int eflags; -{ - reg_errcode_t err; - Idx start, length; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; -#endif - - if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) - return REG_BADPAT; - - if (eflags & REG_STARTEND) - { - start = pmatch[0].rm_so; - length = pmatch[0].rm_eo; - } - else - { - start = 0; - length = strlen (string); - } - - __libc_lock_lock (dfa->lock); - if (preg->no_sub) - err = re_search_internal (preg, string, length, start, length, - length, 0, NULL, eflags); - else - err = re_search_internal (preg, string, length, start, length, - length, nmatch, pmatch, eflags); - __libc_lock_unlock (dfa->lock); - return err != REG_NOERROR; -} - -#ifdef _LIBC -# include <shlib-compat.h> -versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); - -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) -__typeof__ (__regexec) __compat_regexec; - -int -attribute_compat_text_section -__compat_regexec (const regex_t *_Restrict_ preg, - const char *_Restrict_ string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ - return regexec (preg, string, nmatch, pmatch, - eflags & (REG_NOTBOL | REG_NOTEOL)); -} -compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); -# endif -#endif - -/* Entry points for GNU code. */ - -/* re_match, re_search, re_match_2, re_search_2 - - The former two functions operate on STRING with length LENGTH, - while the later two operate on concatenation of STRING1 and STRING2 - with lengths LENGTH1 and LENGTH2, respectively. - - re_match() matches the compiled pattern in BUFP against the string, - starting at index START. - - re_search() first tries matching at index START, then it tries to match - starting from index START + 1, and so on. The last start position tried - is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same - way as re_match().) - - The parameter STOP of re_{match,search}_2 specifies that no match exceeding - the first STOP characters of the concatenation of the strings should be - concerned. - - If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match - and all groups is stored in REGS. (For the "_2" variants, the offsets are - computed relative to the concatenation, not relative to the individual - strings.) - - On success, re_match* functions return the length of the match, re_search* - return the position of the start of the match. Return value -1 means no - match was found and -2 indicates an internal error. */ - -regoff_t -re_match (bufp, string, length, start, regs) - struct re_pattern_buffer *bufp; - const char *string; - Idx length, start; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, 0, length, regs, true); -} -#ifdef _LIBC -weak_alias (__re_match, re_match) -#endif - -regoff_t -re_search (bufp, string, length, start, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - Idx length, start; - regoff_t range; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, range, length, regs, - false); -} -#ifdef _LIBC -weak_alias (__re_search, re_search) -#endif - -regoff_t -re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - Idx length1, length2, start, stop; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, 0, regs, stop, true); -} -#ifdef _LIBC -weak_alias (__re_match_2, re_match_2) -#endif - -regoff_t -re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - Idx length1, length2, start, stop; - regoff_t range; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, range, regs, stop, false); -} -#ifdef _LIBC -weak_alias (__re_search_2, re_search_2) -#endif - -static regoff_t -internal_function -re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, Idx length1, - const char *string2, Idx length2, - Idx start, regoff_t range, struct re_registers *regs, - Idx stop, bool ret_len) -{ - const char *str; - regoff_t rval; - Idx len = length1 + length2; - char *s = NULL; - - if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) - return -2; - - /* Concatenate the strings. */ - if (length2 > 0) - if (length1 > 0) - { - s = re_malloc (char, len); - - if (BE (s == NULL, 0)) - return -2; -#ifdef _LIBC - memcpy (__mempcpy (s, string1, length1), string2, length2); -#else - memcpy (s, string1, length1); - memcpy (s + length1, string2, length2); -#endif - str = s; - } - else - str = string2; - else - str = string1; - - rval = re_search_stub (bufp, str, len, start, range, stop, regs, - ret_len); - re_free (s); - return rval; -} - -/* The parameters have the same meaning as those of re_search. - Additional parameters: - If RET_LEN is true the length of the match is returned (re_match style); - otherwise the position of the match is returned. */ - -static regoff_t -internal_function -re_search_stub (struct re_pattern_buffer *bufp, - const char *string, Idx length, - Idx start, regoff_t range, Idx stop, struct re_registers *regs, - bool ret_len) -{ - reg_errcode_t result; - regmatch_t *pmatch; - Idx nregs; - regoff_t rval; - int eflags = 0; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; -#endif - Idx last_start = start + range; - - /* Check for out-of-range. */ - if (BE (start < 0 || start > length, 0)) - return -1; - if (BE (length < last_start || (0 <= range && last_start < start), 0)) - last_start = length; - else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) - last_start = 0; - - __libc_lock_lock (dfa->lock); - - eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; - eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; - - /* Compile fastmap if we haven't yet. */ - if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) - re_compile_fastmap (bufp); - - if (BE (bufp->no_sub, 0)) - regs = NULL; - - /* We need at least 1 register. */ - if (regs == NULL) - nregs = 1; - else if (BE (bufp->regs_allocated == REGS_FIXED - && regs->num_regs <= bufp->re_nsub, 0)) - { - nregs = regs->num_regs; - if (BE (nregs < 1, 0)) - { - /* Nothing can be copied to regs. */ - regs = NULL; - nregs = 1; - } - } - else - nregs = bufp->re_nsub + 1; - pmatch = re_malloc (regmatch_t, nregs); - if (BE (pmatch == NULL, 0)) - { - rval = -2; - goto out; - } - - result = re_search_internal (bufp, string, length, start, last_start, stop, - nregs, pmatch, eflags); - - rval = 0; - - /* I hope we needn't fill ther regs with -1's when no match was found. */ - if (result != REG_NOERROR) - rval = -1; - else if (regs != NULL) - { - /* If caller wants register contents data back, copy them. */ - bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, - bufp->regs_allocated); - if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) - rval = -2; - } - - if (BE (rval == 0, 1)) - { - if (ret_len) - { - assert (pmatch[0].rm_so == start); - rval = pmatch[0].rm_eo - start; - } - else - rval = pmatch[0].rm_so; - } - re_free (pmatch); - out: - __libc_lock_unlock (dfa->lock); - return rval; -} - -static unsigned int -internal_function -re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, - int regs_allocated) -{ - int rval = REGS_REALLOCATE; - Idx i; - Idx need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code - uses. */ - - /* Have the register data arrays been allocated? */ - if (regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. */ - regs->start = re_malloc (regoff_t, need_regs); - if (BE (regs->start == NULL, 0)) - return REGS_UNALLOCATED; - regs->end = re_malloc (regoff_t, need_regs); - if (BE (regs->end == NULL, 0)) - { - re_free (regs->start); - return REGS_UNALLOCATED; - } - regs->num_regs = need_regs; - } - else if (regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (BE (need_regs > regs->num_regs, 0)) - { - regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); - regoff_t *new_end; - if (BE (new_start == NULL, 0)) - return REGS_UNALLOCATED; - new_end = re_realloc (regs->end, regoff_t, need_regs); - if (BE (new_end == NULL, 0)) - { - re_free (new_start); - return REGS_UNALLOCATED; - } - regs->start = new_start; - regs->end = new_end; - regs->num_regs = need_regs; - } - } - else - { - assert (regs_allocated == REGS_FIXED); - /* This function may not be called with REGS_FIXED and nregs too big. */ - assert (regs->num_regs >= nregs); - rval = REGS_FIXED; - } - - /* Copy the regs. */ - for (i = 0; i < nregs; ++i) - { - regs->start[i] = pmatch[i].rm_so; - regs->end[i] = pmatch[i].rm_eo; - } - for ( ; i < regs->num_regs; ++i) - regs->start[i] = regs->end[i] = -1; - - return rval; -} - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - __re_size_t num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = NULL; - } -} -#ifdef _LIBC -weak_alias (__re_set_registers, re_set_registers) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC -int -# ifdef _LIBC -weak_function -# endif -re_exec (s) - const char *s; -{ - return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); -} -#endif /* _REGEX_RE_COMP */ - -/* Internal entry point. */ - -/* Searches for a compiled pattern PREG in the string STRING, whose - length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - meaning as with regexec. LAST_START is START + RANGE, where - START and RANGE have the same meaning as with re_search. - Return REG_NOERROR if we find a match, and REG_NOMATCH if not, - otherwise return the error code. - Note: We assume front end functions already check ranges. - (0 <= LAST_START && LAST_START <= LENGTH) */ - -static reg_errcode_t -internal_function -re_search_internal (const regex_t *preg, - const char *string, Idx length, - Idx start, Idx last_start, Idx stop, - size_t nmatch, regmatch_t pmatch[], - int eflags) -{ - reg_errcode_t err; - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; - Idx left_lim, right_lim; - int incr; - bool fl_longest_match; - int match_kind; - Idx match_first; - Idx match_last = REG_MISSING; - Idx extra_nmatch; - bool sb; - int ch; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - re_match_context_t mctx = { .dfa = dfa }; -#else - re_match_context_t mctx; -#endif - char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate - && start != last_start && !preg->can_be_null) - ? preg->fastmap : NULL); - RE_TRANSLATE_TYPE t = preg->translate; - -#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) - memset (&mctx, '\0', sizeof (re_match_context_t)); - mctx.dfa = dfa; -#endif - - extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; - nmatch -= extra_nmatch; - - /* Check if the DFA haven't been compiled. */ - if (BE (preg->used == 0 || dfa->init_state == NULL - || dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return REG_NOMATCH; - -#ifdef DEBUG - /* We assume front-end functions already check them. */ - assert (0 <= last_start && last_start <= length); -#endif - - /* If initial states with non-begbuf contexts have no elements, - the regex must be anchored. If preg->newline_anchor is set, - we'll never use init_state_nl, so do not check it. */ - if (dfa->init_state->nodes.nelem == 0 - && dfa->init_state_word->nodes.nelem == 0 - && (dfa->init_state_nl->nodes.nelem == 0 - || !preg->newline_anchor)) - { - if (start != 0 && last_start != 0) - return REG_NOMATCH; - start = last_start = 0; - } - - /* We must check the longest matching, if nmatch > 0. */ - fl_longest_match = (nmatch != 0 || dfa->nbackref); - - err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, dfa); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - mctx.input.stop = stop; - mctx.input.raw_stop = stop; - mctx.input.newline_anchor = preg->newline_anchor; - - err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* We will log all the DFA states through which the dfa pass, - if nmatch > 1, or this dfa has "multibyte node", which is a - back-reference or a node which can accept multibyte character or - multi character collating element. */ - if (nmatch > 1 || dfa->has_mb_node) - { - /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) - { - err = REG_ESPACE; - goto free_return; - } - - mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); - if (BE (mctx.state_log == NULL, 0)) - { - err = REG_ESPACE; - goto free_return; - } - } - else - mctx.state_log = NULL; - - match_first = start; - mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - - /* Check incrementally whether of not the input string match. */ - incr = (last_start < start) ? -1 : 1; - left_lim = (last_start < start) ? last_start : start; - right_lim = (last_start < start) ? start : last_start; - sb = dfa->mb_cur_max == 1; - match_kind = - (fastmap - ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) - | (start <= last_start ? 2 : 0) - | (t != NULL ? 1 : 0)) - : 8); - - for (;; match_first += incr) - { - err = REG_NOMATCH; - if (match_first < left_lim || right_lim < match_first) - goto free_return; - - /* Advance as rapidly as possible through the string, until we - find a plausible place to start matching. This may be done - with varying efficiency, so there are various possibilities: - only the most common of them are specialized, in order to - save on code size. We use a switch statement for speed. */ - switch (match_kind) - { - case 8: - /* No fastmap. */ - break; - - case 7: - /* Fastmap with single-byte translation, match forward. */ - while (BE (match_first < right_lim, 1) - && !fastmap[t[(unsigned char) string[match_first]]]) - ++match_first; - goto forward_match_found_start_or_reached_end; - - case 6: - /* Fastmap without translation, match forward. */ - while (BE (match_first < right_lim, 1) - && !fastmap[(unsigned char) string[match_first]]) - ++match_first; - - forward_match_found_start_or_reached_end: - if (BE (match_first == right_lim, 0)) - { - ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (!fastmap[t ? t[ch] : ch]) - goto free_return; - } - break; - - case 4: - case 5: - /* Fastmap without multi-byte translation, match backwards. */ - while (match_first >= left_lim) - { - ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (fastmap[t ? t[ch] : ch]) - break; - --match_first; - } - if (match_first < left_lim) - goto free_return; - break; - - default: - /* In this case, we can't determine easily the current byte, - since it might be a component byte of a multibyte - character. Then we use the constructed buffer instead. */ - for (;;) - { - /* If MATCH_FIRST is out of the valid range, reconstruct the - buffers. */ - __re_size_t offset = match_first - mctx.input.raw_mbs_idx; - if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) - { - err = re_string_reconstruct (&mctx.input, match_first, - eflags); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - offset = match_first - mctx.input.raw_mbs_idx; - } - /* If MATCH_FIRST is out of the buffer, leave it as '\0'. - Note that MATCH_FIRST must not be smaller than 0. */ - ch = (match_first >= length - ? 0 : re_string_byte_at (&mctx.input, offset)); - if (fastmap[ch]) - break; - match_first += incr; - if (match_first < left_lim || match_first > right_lim) - { - err = REG_NOMATCH; - goto free_return; - } - } - break; - } - - /* Reconstruct the buffers so that the matcher can assume that - the matching starts from the beginning of the buffer. */ - err = re_string_reconstruct (&mctx.input, match_first, eflags); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - -#ifdef RE_ENABLE_I18N - /* Don't consider this char as a possible match start if it part, - yet isn't the head, of a multibyte character. */ - if (!sb && !re_string_first_byte (&mctx.input, 0)) - continue; -#endif - - /* It seems to be appropriate one, then use the matcher. */ - /* We assume that the matching starts from 0. */ - mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; - match_last = check_matching (&mctx, fl_longest_match, - start <= last_start ? &match_first : NULL); - if (match_last != REG_MISSING) - { - if (BE (match_last == REG_ERROR, 0)) - { - err = REG_ESPACE; - goto free_return; - } - else - { - mctx.match_last = match_last; - if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) - { - re_dfastate_t *pstate = mctx.state_log[match_last]; - mctx.last_node = check_halt_state_context (&mctx, pstate, - match_last); - } - if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) - || dfa->nbackref) - { - err = prune_impossible_nodes (&mctx); - if (err == REG_NOERROR) - break; - if (BE (err != REG_NOMATCH, 0)) - goto free_return; - match_last = REG_MISSING; - } - else - break; /* We found a match. */ - } - } - - match_ctx_clean (&mctx); - } - -#ifdef DEBUG - assert (match_last != REG_MISSING); - assert (err == REG_NOERROR); -#endif - - /* Set pmatch[] if we need. */ - if (nmatch > 0) - { - Idx reg_idx; - - /* Initialize registers. */ - for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) - pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; - - /* Set the points where matching start/end. */ - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = mctx.match_last; - /* FIXME: This function should fail if mctx.match_last exceeds - the maximum possible regoff_t value. We need a new error - code REG_OVERFLOW. */ - - if (!preg->no_sub && nmatch > 1) - { - err = set_regs (preg, &mctx, nmatch, pmatch, - dfa->has_plural_match && dfa->nbackref > 0); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - - /* At last, add the offset to the each registers, since we slided - the buffers so that we could assume that the matching starts - from 0. */ - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so != -1) - { -#ifdef RE_ENABLE_I18N - if (BE (mctx.input.offsets_needed != 0, 0)) - { - pmatch[reg_idx].rm_so = - (pmatch[reg_idx].rm_so == mctx.input.valid_len - ? mctx.input.valid_raw_len - : mctx.input.offsets[pmatch[reg_idx].rm_so]); - pmatch[reg_idx].rm_eo = - (pmatch[reg_idx].rm_eo == mctx.input.valid_len - ? mctx.input.valid_raw_len - : mctx.input.offsets[pmatch[reg_idx].rm_eo]); - } -#else - assert (mctx.input.offsets_needed == 0); -#endif - pmatch[reg_idx].rm_so += match_first; - pmatch[reg_idx].rm_eo += match_first; - } - for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) - { - pmatch[nmatch + reg_idx].rm_so = -1; - pmatch[nmatch + reg_idx].rm_eo = -1; - } - - if (dfa->subexp_map) - for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) - if (dfa->subexp_map[reg_idx] != reg_idx) - { - pmatch[reg_idx + 1].rm_so - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; - pmatch[reg_idx + 1].rm_eo - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; - } - } - - free_return: - re_free (mctx.state_log); - if (dfa->nbackref) - match_ctx_free (&mctx); - re_string_destruct (&mctx.input); - return err; -} - -static reg_errcode_t -internal_function -prune_impossible_nodes (re_match_context_t *mctx) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx halt_node, match_last; - reg_errcode_t ret; - re_dfastate_t **sifted_states; - re_dfastate_t **lim_states = NULL; - re_sift_context_t sctx; -#ifdef DEBUG - assert (mctx->state_log != NULL); -#endif - match_last = mctx->match_last; - halt_node = mctx->last_node; - - /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) - return REG_ESPACE; - - sifted_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (sifted_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - if (dfa->nbackref) - { - lim_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (lim_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - while (1) - { - memset (lim_states, '\0', - sizeof (re_dfastate_t *) * (match_last + 1)); - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, - match_last); - ret = sift_states_backward (mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - if (sifted_states[0] != NULL || lim_states[0] != NULL) - break; - do - { - --match_last; - if (! REG_VALID_INDEX (match_last)) - { - ret = REG_NOMATCH; - goto free_return; - } - } while (mctx->state_log[match_last] == NULL - || !mctx->state_log[match_last]->halt); - halt_node = check_halt_state_context (mctx, - mctx->state_log[match_last], - match_last); - } - ret = merge_state_array (dfa, sifted_states, lim_states, - match_last + 1); - re_free (lim_states); - lim_states = NULL; - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - else - { - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); - ret = sift_states_backward (mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - re_free (mctx->state_log); - mctx->state_log = sifted_states; - sifted_states = NULL; - mctx->last_node = halt_node; - mctx->match_last = match_last; - ret = REG_NOERROR; - free_return: - re_free (sifted_states); - re_free (lim_states); - return ret; -} - -/* Acquire an initial state and return it. - We must select appropriate initial state depending on the context, - since initial states may have constraints like "\<", "^", etc.. */ - -static inline re_dfastate_t * -__attribute ((always_inline)) internal_function -acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, - Idx idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - if (dfa->init_state->has_constraint) - { - unsigned int context; - context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); - if (IS_WORD_CONTEXT (context)) - return dfa->init_state_word; - else if (IS_ORDINARY_CONTEXT (context)) - return dfa->init_state; - else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_begbuf; - else if (IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_nl; - else if (IS_BEGBUF_CONTEXT (context)) - { - /* It is relatively rare case, then calculate on demand. */ - return re_acquire_state_context (err, dfa, - dfa->init_state->entrance_nodes, - context); - } - else - /* Must not happen? */ - return dfa->init_state; - } - else - return dfa->init_state; -} - -/* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end. Return REG_MISSING if - there is no match, and return REG_ERROR in case of an error. - FL_LONGEST_MATCH means we want the POSIX longest matching. - If P_MATCH_FIRST is not NULL, and the match fails, it is set to the - next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current - index of the buffer. */ - -static Idx -internal_function -check_matching (re_match_context_t *mctx, bool fl_longest_match, - Idx *p_match_first) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - Idx match = 0; - Idx match_last = REG_MISSING; - Idx cur_str_idx = re_string_cur_idx (&mctx->input); - re_dfastate_t *cur_state; - bool at_init_state = p_match_first != NULL; - Idx next_start_idx = cur_str_idx; - - err = REG_NOERROR; - cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); - /* An initial state must not be NULL (invalid). */ - if (BE (cur_state == NULL, 0)) - { - assert (err == REG_ESPACE); - return REG_ERROR; - } - - if (mctx->state_log != NULL) - { - mctx->state_log[cur_str_idx] = cur_state; - - /* Check OP_OPEN_SUBEXP in the initial state in case that we use them - later. E.g. Processing back references. */ - if (BE (dfa->nbackref, 0)) - { - at_init_state = false; - err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (cur_state->has_backref) - { - err = transit_state_bkref (mctx, &cur_state->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - } - - /* If the RE accepts NULL string. */ - if (BE (cur_state->halt, 0)) - { - if (!cur_state->has_constraint - || check_halt_state_context (mctx, cur_state, cur_str_idx)) - { - if (!fl_longest_match) - return cur_str_idx; - else - { - match_last = cur_str_idx; - match = 1; - } - } - } - - while (!re_string_eoi (&mctx->input)) - { - re_dfastate_t *old_state = cur_state; - Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; - - if (BE (next_char_idx >= mctx->input.bufs_len, 0) - || (BE (next_char_idx >= mctx->input.valid_len, 0) - && mctx->input.valid_len < mctx->input.len)) - { - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - { - assert (err == REG_ESPACE); - return REG_ERROR; - } - } - - cur_state = transit_state (&err, mctx, cur_state); - if (mctx->state_log != NULL) - cur_state = merge_state_with_log (&err, mctx, cur_state); - - if (cur_state == NULL) - { - /* Reached the invalid state or an error. Try to recover a valid - state using the state log, if available and if we have not - already found a valid (even if not the longest) match. */ - if (BE (err != REG_NOERROR, 0)) - return REG_ERROR; - - if (mctx->state_log == NULL - || (match && !fl_longest_match) - || (cur_state = find_recover_state (&err, mctx)) == NULL) - break; - } - - if (BE (at_init_state, 0)) - { - if (old_state == cur_state) - next_start_idx = next_char_idx; - else - at_init_state = false; - } - - if (cur_state->halt) - { - /* Reached a halt state. - Check the halt state can satisfy the current context. */ - if (!cur_state->has_constraint - || check_halt_state_context (mctx, cur_state, - re_string_cur_idx (&mctx->input))) - { - /* We found an appropriate halt state. */ - match_last = re_string_cur_idx (&mctx->input); - match = 1; - - /* We found a match, do not modify match_first below. */ - p_match_first = NULL; - if (!fl_longest_match) - break; - } - } - } - - if (p_match_first) - *p_match_first += next_start_idx; - - return match_last; -} - -/* Check NODE match the current context. */ - -static bool -internal_function -check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) -{ - re_token_type_t type = dfa->nodes[node].type; - unsigned int constraint = dfa->nodes[node].constraint; - if (type != END_OF_RE) - return false; - if (!constraint) - return true; - if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) - return false; - return true; -} - -/* Check the halt state STATE match the current context. - Return 0 if not match, if the node, STATE has, is a halt node and - match the context, return the node. */ - -static Idx -internal_function -check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, Idx idx) -{ - Idx i; - unsigned int context; -#ifdef DEBUG - assert (state->halt); -#endif - context = re_string_context_at (&mctx->input, idx, mctx->eflags); - for (i = 0; i < state->nodes.nelem; ++i) - if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) - return state->nodes.elems[i]; - return 0; -} - -/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA - corresponding to the DFA). - Return the destination node, and update EPS_VIA_NODES; - return REG_MISSING in case of errors. */ - -static Idx -internal_function -proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, - Idx *pidx, Idx node, re_node_set *eps_via_nodes, - struct re_fail_stack_t *fs) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx i; - bool ok; - if (IS_EPSILON_NODE (dfa->nodes[node].type)) - { - re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; - re_node_set *edests = &dfa->edests[node]; - Idx dest_node; - ok = re_node_set_insert (eps_via_nodes, node); - if (BE (! ok, 0)) - return REG_ERROR; - /* Pick up a valid destination, or return REG_MISSING if none - is found. */ - for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i) - { - Idx candidate = edests->elems[i]; - if (!re_node_set_contains (cur_nodes, candidate)) - continue; - if (dest_node == REG_MISSING) - dest_node = candidate; - - else - { - /* In order to avoid infinite loop like "(a*)*", return the second - epsilon-transition if the first was already considered. */ - if (re_node_set_contains (eps_via_nodes, dest_node)) - return candidate; - - /* Otherwise, push the second epsilon-transition on the fail stack. */ - else if (fs != NULL - && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) - return REG_ERROR; - - /* We know we are going to exit. */ - break; - } - } - return dest_node; - } - else - { - Idx naccepted = 0; - re_token_type_t type = dfa->nodes[node].type; - -#ifdef RE_ENABLE_I18N - if (dfa->nodes[node].accept_mb) - naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); - else -#endif /* RE_ENABLE_I18N */ - if (type == OP_BACK_REF) - { - Idx subexp_idx = dfa->nodes[node].opr.idx + 1; - naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; - if (fs != NULL) - { - if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) - return REG_MISSING; - else if (naccepted) - { - char *buf = (char *) re_string_get_buffer (&mctx->input); - if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, - naccepted) != 0) - return REG_MISSING; - } - } - - if (naccepted == 0) - { - Idx dest_node; - ok = re_node_set_insert (eps_via_nodes, node); - if (BE (! ok, 0)) - return REG_ERROR; - dest_node = dfa->edests[node].elems[0]; - if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node)) - return dest_node; - } - } - - if (naccepted != 0 - || check_node_accept (mctx, dfa->nodes + node, *pidx)) - { - Idx dest_node = dfa->nexts[node]; - *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; - if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL - || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node))) - return REG_MISSING; - re_node_set_empty (eps_via_nodes); - return dest_node; - } - } - return REG_MISSING; -} - -static reg_errcode_t -internal_function -push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, - Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) -{ - reg_errcode_t err; - Idx num = fs->num++; - if (fs->num == fs->alloc) - { - struct re_fail_stack_ent_t *new_array; - new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) - * fs->alloc * 2)); - if (new_array == NULL) - return REG_ESPACE; - fs->alloc *= 2; - fs->stack = new_array; - } - fs->stack[num].idx = str_idx; - fs->stack[num].node = dest_node; - fs->stack[num].regs = re_malloc (regmatch_t, nregs); - if (fs->stack[num].regs == NULL) - return REG_ESPACE; - memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); - err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); - return err; -} - -static Idx -internal_function -pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, - regmatch_t *regs, re_node_set *eps_via_nodes) -{ - Idx num = --fs->num; - assert (REG_VALID_INDEX (num)); - *pidx = fs->stack[num].idx; - memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); - re_node_set_free (eps_via_nodes); - re_free (fs->stack[num].regs); - *eps_via_nodes = fs->stack[num].eps_via_nodes; - return fs->stack[num].node; -} - -/* Set the positions where the subexpressions are starts/ends to registers - PMATCH. - Note: We assume that pmatch[0] is already set, and - pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ - -static reg_errcode_t -internal_function -set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, - regmatch_t *pmatch, bool fl_backtrack) -{ - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; - Idx idx, cur_node; - re_node_set eps_via_nodes; - struct re_fail_stack_t *fs; - struct re_fail_stack_t fs_body = { 0, 2, NULL }; - regmatch_t *prev_idx_match; - bool prev_idx_match_malloced = false; - -#ifdef DEBUG - assert (nmatch > 1); - assert (mctx->state_log != NULL); -#endif - if (fl_backtrack) - { - fs = &fs_body; - fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); - if (fs->stack == NULL) - return REG_ESPACE; - } - else - fs = NULL; - - cur_node = dfa->init_node; - re_node_set_init_empty (&eps_via_nodes); - - if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) - prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); - else - { - prev_idx_match = re_malloc (regmatch_t, nmatch); - if (prev_idx_match == NULL) - { - free_fail_stack_return (fs); - return REG_ESPACE; - } - prev_idx_match_malloced = true; - } - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); - - for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) - { - update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); - - if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) - { - Idx reg_idx; - if (fs) - { - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) - break; - if (reg_idx == nmatch) - { - re_node_set_free (&eps_via_nodes); - if (prev_idx_match_malloced) - re_free (prev_idx_match); - return free_fail_stack_return (fs); - } - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - } - else - { - re_node_set_free (&eps_via_nodes); - if (prev_idx_match_malloced) - re_free (prev_idx_match); - return REG_NOERROR; - } - } - - /* Proceed to next node. */ - cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, - &eps_via_nodes, fs); - - if (BE (! REG_VALID_INDEX (cur_node), 0)) - { - if (BE (cur_node == REG_ERROR, 0)) - { - re_node_set_free (&eps_via_nodes); - if (prev_idx_match_malloced) - re_free (prev_idx_match); - free_fail_stack_return (fs); - return REG_ESPACE; - } - if (fs) - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - else - { - re_node_set_free (&eps_via_nodes); - if (prev_idx_match_malloced) - re_free (prev_idx_match); - return REG_NOMATCH; - } - } - } - re_node_set_free (&eps_via_nodes); - if (prev_idx_match_malloced) - re_free (prev_idx_match); - return free_fail_stack_return (fs); -} - -static reg_errcode_t -internal_function -free_fail_stack_return (struct re_fail_stack_t *fs) -{ - if (fs) - { - Idx fs_idx; - for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) - { - re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); - re_free (fs->stack[fs_idx].regs); - } - re_free (fs->stack); - } - return REG_NOERROR; -} - -static void -internal_function -update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, - regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) -{ - int type = dfa->nodes[cur_node].type; - if (type == OP_OPEN_SUBEXP) - { - Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; - - /* We are at the first node of this sub expression. */ - if (reg_num < nmatch) - { - pmatch[reg_num].rm_so = cur_idx; - pmatch[reg_num].rm_eo = -1; - } - } - else if (type == OP_CLOSE_SUBEXP) - { - Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; - if (reg_num < nmatch) - { - /* We are at the last node of this sub expression. */ - if (pmatch[reg_num].rm_so < cur_idx) - { - pmatch[reg_num].rm_eo = cur_idx; - /* This is a non-empty match or we are not inside an optional - subexpression. Accept this right away. */ - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); - } - else - { - if (dfa->nodes[cur_node].opt_subexp - && prev_idx_match[reg_num].rm_so != -1) - /* We transited through an empty match for an optional - subexpression, like (a?)*, and this is not the subexp's - first match. Copy back the old content of the registers - so that matches of an inner subexpression are undone as - well, like in ((a?))*. */ - memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); - else - /* We completed a subexpression, but it may be part of - an optional one, so do not update PREV_IDX_MATCH. */ - pmatch[reg_num].rm_eo = cur_idx; - } - } - } -} - -/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 - and sift the nodes in each states according to the following rules. - Updated state_log will be wrote to STATE_LOG. - - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... - 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': - i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. - 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': - i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ - -#define STATE_NODE_CONTAINS(state,node) \ - ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) - -static reg_errcode_t -internal_function -sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) -{ - reg_errcode_t err; - int null_cnt = 0; - Idx str_idx = sctx->last_str_idx; - re_node_set cur_dest; - -#ifdef DEBUG - assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); -#endif - - /* Build sifted state_log[str_idx]. It has the nodes which can epsilon - transit to the last_node and the last_node itself. */ - err = re_node_set_init_1 (&cur_dest, sctx->last_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* Then check each states in the state_log. */ - while (str_idx > 0) - { - /* Update counters. */ - null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; - if (null_cnt > mctx->max_mb_elem_len) - { - memset (sctx->sifted_states, '\0', - sizeof (re_dfastate_t *) * str_idx); - re_node_set_free (&cur_dest); - return REG_NOERROR; - } - re_node_set_empty (&cur_dest); - --str_idx; - - if (mctx->state_log[str_idx]) - { - err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - - /* Add all the nodes which satisfy the following conditions: - - It can epsilon transit to a node in CUR_DEST. - - It is in CUR_SRC. - And update state_log. */ - err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - err = REG_NOERROR; - free_return: - re_node_set_free (&cur_dest); - return err; -} - -static reg_errcode_t -internal_function -build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx str_idx, re_node_set *cur_dest) -{ - const re_dfa_t *const dfa = mctx->dfa; - const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; - Idx i; - - /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. - Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' - (with the epsilon nodes pre-filtered out). */ - for (i = 0; i < cur_src->nelem; i++) - { - Idx prev_node = cur_src->elems[i]; - int naccepted = 0; - bool ok; - -#ifdef DEBUG - re_token_type_t type = dfa->nodes[prev_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (dfa->nodes[prev_node].accept_mb) - naccepted = sift_states_iter_mb (mctx, sctx, prev_node, - str_idx, sctx->last_str_idx); -#endif /* RE_ENABLE_I18N */ - - /* We don't check backreferences here. - See update_cur_sifted_state(). */ - if (!naccepted - && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) - && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], - dfa->nexts[prev_node])) - naccepted = 1; - - if (naccepted == 0) - continue; - - if (sctx->limits.nelem) - { - Idx to_idx = str_idx + naccepted; - if (check_dst_limits (mctx, &sctx->limits, - dfa->nexts[prev_node], to_idx, - prev_node, str_idx)) - continue; - } - ok = re_node_set_insert (cur_dest, prev_node); - if (BE (! ok, 0)) - return REG_ESPACE; - } - - return REG_NOERROR; -} - -/* Helper functions. */ - -static reg_errcode_t -internal_function -clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) -{ - Idx top = mctx->state_log_top; - - if (next_state_log_idx >= mctx->input.bufs_len - || (next_state_log_idx >= mctx->input.valid_len - && mctx->input.valid_len < mctx->input.len)) - { - reg_errcode_t err; - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (top < next_state_log_idx) - { - memset (mctx->state_log + top + 1, '\0', - sizeof (re_dfastate_t *) * (next_state_log_idx - top)); - mctx->state_log_top = next_state_log_idx; - } - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, Idx num) -{ - Idx st_idx; - reg_errcode_t err; - for (st_idx = 0; st_idx < num; ++st_idx) - { - if (dst[st_idx] == NULL) - dst[st_idx] = src[st_idx]; - else if (src[st_idx] != NULL) - { - re_node_set merged_set; - err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, - &src[st_idx]->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); - re_node_set_free (&merged_set); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -update_cur_sifted_state (const re_match_context_t *mctx, - re_sift_context_t *sctx, Idx str_idx, - re_node_set *dest_nodes) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err = REG_NOERROR; - const re_node_set *candidates; - candidates = ((mctx->state_log[str_idx] == NULL) ? NULL - : &mctx->state_log[str_idx]->nodes); - - if (dest_nodes->nelem == 0) - sctx->sifted_states[str_idx] = NULL; - else - { - if (candidates) - { - /* At first, add the nodes which can epsilon transit to a node in - DEST_NODE. */ - err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* Then, check the limitations in the current sift_context. */ - if (sctx->limits.nelem) - { - err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, - mctx->bkref_ents, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - - sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (candidates && mctx->state_log[str_idx]->has_backref) - { - err = sift_states_bkref (mctx, sctx, str_idx, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, - const re_node_set *candidates) -{ - reg_errcode_t err = REG_NOERROR; - Idx i; - - re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (!state->inveclosure.alloc) - { - err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; - for (i = 0; i < dest_nodes->nelem; i++) - re_node_set_merge (&state->inveclosure, - dfa->inveclosures + dest_nodes->elems[i]); - } - return re_node_set_add_intersect (dest_nodes, candidates, - &state->inveclosure); -} - -static reg_errcode_t -internal_function -sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, - const re_node_set *candidates) -{ - Idx ecl_idx; - reg_errcode_t err; - re_node_set *inv_eclosure = dfa->inveclosures + node; - re_node_set except_nodes; - re_node_set_init_empty (&except_nodes); - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - Idx cur_node = inv_eclosure->elems[ecl_idx]; - if (cur_node == node) - continue; - if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) - { - Idx edst1 = dfa->edests[cur_node].elems[0]; - Idx edst2 = ((dfa->edests[cur_node].nelem > 1) - ? dfa->edests[cur_node].elems[1] : REG_MISSING); - if ((!re_node_set_contains (inv_eclosure, edst1) - && re_node_set_contains (dest_nodes, edst1)) - || (REG_VALID_NONZERO_INDEX (edst2) - && !re_node_set_contains (inv_eclosure, edst2) - && re_node_set_contains (dest_nodes, edst2))) - { - err = re_node_set_add_intersect (&except_nodes, candidates, - dfa->inveclosures + cur_node); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&except_nodes); - return err; - } - } - } - } - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - Idx cur_node = inv_eclosure->elems[ecl_idx]; - if (!re_node_set_contains (&except_nodes, cur_node)) - { - Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; - re_node_set_remove_at (dest_nodes, idx); - } - } - re_node_set_free (&except_nodes); - return REG_NOERROR; -} - -static bool -internal_function -check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, - Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx lim_idx, src_pos, dst_pos; - - Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); - Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - Idx subexp_idx; - struct re_backref_cache_entry *ent; - ent = mctx->bkref_ents + limits->elems[lim_idx]; - subexp_idx = dfa->nodes[ent->node].opr.idx; - - dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], - subexp_idx, dst_node, dst_idx, - dst_bkref_idx); - src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], - subexp_idx, src_node, src_idx, - src_bkref_idx); - - /* In case of: - <src> <dst> ( <subexp> ) - ( <subexp> ) <src> <dst> - ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ - if (src_pos == dst_pos) - continue; /* This is unrelated limitation. */ - else - return true; - } - return false; -} - -static int -internal_function -check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, - Idx subexp_idx, Idx from_node, Idx bkref_idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - const re_node_set *eclosures = dfa->eclosures + from_node; - Idx node_idx; - - /* Else, we are on the boundary: examine the nodes on the epsilon - closure. */ - for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) - { - Idx node = eclosures->elems[node_idx]; - switch (dfa->nodes[node].type) - { - case OP_BACK_REF: - if (bkref_idx != REG_MISSING) - { - struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; - do - { - Idx dst; - int cpos; - - if (ent->node != node) - continue; - - if (subexp_idx < BITSET_WORD_BITS - && !(ent->eps_reachable_subexps_map - & ((bitset_word_t) 1 << subexp_idx))) - continue; - - /* Recurse trying to reach the OP_OPEN_SUBEXP and - OP_CLOSE_SUBEXP cases below. But, if the - destination node is the same node as the source - node, don't recurse because it would cause an - infinite loop: a regex that exhibits this behavior - is ()\1*\1* */ - dst = dfa->edests[node].elems[0]; - if (dst == from_node) - { - if (boundaries & 1) - return -1; - else /* if (boundaries & 2) */ - return 0; - } - - cpos = - check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, - dst, bkref_idx); - if (cpos == -1 /* && (boundaries & 1) */) - return -1; - if (cpos == 0 && (boundaries & 2)) - return 0; - - if (subexp_idx < BITSET_WORD_BITS) - ent->eps_reachable_subexps_map - &= ~((bitset_word_t) 1 << subexp_idx); - } - while (ent++->more); - } - break; - - case OP_OPEN_SUBEXP: - if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) - return -1; - break; - - case OP_CLOSE_SUBEXP: - if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) - return 0; - break; - - default: - break; - } - } - - return (boundaries & 2) ? 1 : 0; -} - -static int -internal_function -check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, - Idx subexp_idx, Idx from_node, Idx str_idx, - Idx bkref_idx) -{ - struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; - int boundaries; - - /* If we are outside the range of the subexpression, return -1 or 1. */ - if (str_idx < lim->subexp_from) - return -1; - - if (lim->subexp_to < str_idx) - return 1; - - /* If we are within the subexpression, return 0. */ - boundaries = (str_idx == lim->subexp_from); - boundaries |= (str_idx == lim->subexp_to) << 1; - if (boundaries == 0) - return 0; - - /* Else, examine epsilon closure. */ - return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, - from_node, bkref_idx); -} - -/* Check the limitations of sub expressions LIMITS, and remove the nodes - which are against limitations from DEST_NODES. */ - -static reg_errcode_t -internal_function -check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, - const re_node_set *candidates, re_node_set *limits, - struct re_backref_cache_entry *bkref_ents, Idx str_idx) -{ - reg_errcode_t err; - Idx node_idx, lim_idx; - - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - Idx subexp_idx; - struct re_backref_cache_entry *ent; - ent = bkref_ents + limits->elems[lim_idx]; - - if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) - continue; /* This is unrelated limitation. */ - - subexp_idx = dfa->nodes[ent->node].opr.idx; - if (ent->subexp_to == str_idx) - { - Idx ops_node = REG_MISSING; - Idx cls_node = REG_MISSING; - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - Idx node = dest_nodes->elems[node_idx]; - re_token_type_t type = dfa->nodes[node].type; - if (type == OP_OPEN_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - ops_node = node; - else if (type == OP_CLOSE_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - cls_node = node; - } - - /* Check the limitation of the open subexpression. */ - /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ - if (REG_VALID_INDEX (ops_node)) - { - err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Check the limitation of the close subexpression. */ - if (REG_VALID_INDEX (cls_node)) - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - Idx node = dest_nodes->elems[node_idx]; - if (!re_node_set_contains (dfa->inveclosures + node, - cls_node) - && !re_node_set_contains (dfa->eclosures + node, - cls_node)) - { - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes (dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - --node_idx; - } - } - } - else /* (ent->subexp_to != str_idx) */ - { - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - Idx node = dest_nodes->elems[node_idx]; - re_token_type_t type = dfa->nodes[node].type; - if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) - { - if (subexp_idx != dfa->nodes[node].opr.idx) - continue; - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes (dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - } - } - return REG_NOERROR; -} - -static reg_errcode_t -internal_function -sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx str_idx, const re_node_set *candidates) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - Idx node_idx, node; - re_sift_context_t local_sctx; - Idx first_idx = search_cur_bkref_entry (mctx, str_idx); - - if (first_idx == REG_MISSING) - return REG_NOERROR; - - local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ - - for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) - { - Idx enabled_idx; - re_token_type_t type; - struct re_backref_cache_entry *entry; - node = candidates->elems[node_idx]; - type = dfa->nodes[node].type; - /* Avoid infinite loop for the REs like "()\1+". */ - if (node == sctx->last_node && str_idx == sctx->last_str_idx) - continue; - if (type != OP_BACK_REF) - continue; - - entry = mctx->bkref_ents + first_idx; - enabled_idx = first_idx; - do - { - Idx subexp_len; - Idx to_idx; - Idx dst_node; - bool ok; - re_dfastate_t *cur_state; - - if (entry->node != node) - continue; - subexp_len = entry->subexp_to - entry->subexp_from; - to_idx = str_idx + subexp_len; - dst_node = (subexp_len ? dfa->nexts[node] - : dfa->edests[node].elems[0]); - - if (to_idx > sctx->last_str_idx - || sctx->sifted_states[to_idx] == NULL - || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) - || check_dst_limits (mctx, &sctx->limits, node, - str_idx, dst_node, to_idx)) - continue; - - if (local_sctx.sifted_states == NULL) - { - local_sctx = *sctx; - err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.last_node = node; - local_sctx.last_str_idx = str_idx; - ok = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (! ok, 0)) - { - err = REG_ESPACE; - goto free_return; - } - cur_state = local_sctx.sifted_states[str_idx]; - err = sift_states_backward (mctx, &local_sctx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - if (sctx->limited_states != NULL) - { - err = merge_state_array (dfa, sctx->limited_states, - local_sctx.sifted_states, - str_idx + 1); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.sifted_states[str_idx] = cur_state; - re_node_set_remove (&local_sctx.limits, enabled_idx); - - /* mctx->bkref_ents may have changed, reload the pointer. */ - entry = mctx->bkref_ents + enabled_idx; - } - while (enabled_idx++, entry++->more); - } - err = REG_NOERROR; - free_return: - if (local_sctx.sifted_states != NULL) - { - re_node_set_free (&local_sctx.limits); - } - - return err; -} - - -#ifdef RE_ENABLE_I18N -static int -internal_function -sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx node_idx, Idx str_idx, Idx max_str_idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - int naccepted; - /* Check the node can accept `multi byte'. */ - naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); - if (naccepted > 0 && str_idx + naccepted <= max_str_idx && - !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], - dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the - destination was already thrown away, then the node - could't accept the current input `multi byte'. */ - naccepted = 0; - /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ - return naccepted; -} -#endif /* RE_ENABLE_I18N */ - - -/* Functions for state transition. */ - -/* Return the next state to which the current state STATE will transit by - accepting the current input byte, and update STATE_LOG if necessary. - If STATE can accept a multibyte char/collating element/back reference - update the destination of STATE_LOG. */ - -static re_dfastate_t * -internal_function -transit_state (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *state) -{ - re_dfastate_t **trtable; - unsigned char ch; - -#ifdef RE_ENABLE_I18N - /* If the current state can accept multibyte. */ - if (BE (state->accept_mb, 0)) - { - *err = transit_state_mb (mctx, state); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } -#endif /* RE_ENABLE_I18N */ - - /* Then decide the next state with the single byte. */ -#if 0 - if (0) - /* don't use transition table */ - return transit_state_sb (err, mctx, state); -#endif - - /* Use transition table */ - ch = re_string_fetch_byte (&mctx->input); - for (;;) - { - trtable = state->trtable; - if (BE (trtable != NULL, 1)) - return trtable[ch]; - - trtable = state->word_trtable; - if (BE (trtable != NULL, 1)) - { - unsigned int context; - context - = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input) - 1, - mctx->eflags); - if (IS_WORD_CONTEXT (context)) - return trtable[ch + SBC_MAX]; - else - return trtable[ch]; - } - - if (!build_trtable (mctx->dfa, state)) - { - *err = REG_ESPACE; - return NULL; - } - - /* Retry, we now have a transition table. */ - } -} - -/* Update the state_log if we need */ -re_dfastate_t * -internal_function -merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *next_state) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx cur_idx = re_string_cur_idx (&mctx->input); - - if (cur_idx > mctx->state_log_top) - { - mctx->state_log[cur_idx] = next_state; - mctx->state_log_top = cur_idx; - } - else if (mctx->state_log[cur_idx] == 0) - { - mctx->state_log[cur_idx] = next_state; - } - else - { - re_dfastate_t *pstate; - unsigned int context; - re_node_set next_nodes, *log_nodes, *table_nodes = NULL; - /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ - pstate = mctx->state_log[cur_idx]; - log_nodes = pstate->entrance_nodes; - if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, - log_nodes); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - else - next_nodes = *log_nodes; - /* Note: We already add the nodes of the initial state, - then we don't need to add them here. */ - - context = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input) - 1, - mctx->eflags); - next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - if (table_nodes != NULL) - re_node_set_free (&next_nodes); - } - - if (BE (dfa->nbackref, 0) && next_state != NULL) - { - /* Check OP_OPEN_SUBEXP in the current state in case that we use them - later. We must check them here, since the back references in the - next state might use them. */ - *err = check_subexp_matching_top (mctx, &next_state->nodes, - cur_idx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - - /* If the next state has back references. */ - if (next_state->has_backref) - { - *err = transit_state_bkref (mctx, &next_state->nodes); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - next_state = mctx->state_log[cur_idx]; - } - } - - return next_state; -} - -/* Skip bytes in the input that correspond to part of a - multi-byte match, then look in the log for a state - from which to restart matching. */ -static re_dfastate_t * -internal_function -find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) -{ - re_dfastate_t *cur_state; - do - { - Idx max = mctx->state_log_top; - Idx cur_str_idx = re_string_cur_idx (&mctx->input); - - do - { - if (++cur_str_idx > max) - return NULL; - re_string_skip_bytes (&mctx->input, 1); - } - while (mctx->state_log[cur_str_idx] == NULL); - - cur_state = merge_state_with_log (err, mctx, NULL); - } - while (*err == REG_NOERROR && cur_state == NULL); - return cur_state; -} - -/* Helper functions for transit_state. */ - -/* From the node set CUR_NODES, pick up the nodes whose types are - OP_OPEN_SUBEXP and which have corresponding back references in the regular - expression. And register them to use them later for evaluating the - correspoding back references. */ - -static reg_errcode_t -internal_function -check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, - Idx str_idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx node_idx; - reg_errcode_t err; - - /* TODO: This isn't efficient. - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) - { - Idx node = cur_nodes->elems[node_idx]; - if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < BITSET_WORD_BITS - && (dfa->used_bkref_map - & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) - { - err = match_ctx_add_subtop (mctx, node, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -#if 0 -/* Return the next state to which the current state STATE will transit by - accepting the current input byte. */ - -static re_dfastate_t * -transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *state) -{ - const re_dfa_t *const dfa = mctx->dfa; - re_node_set next_nodes; - re_dfastate_t *next_state; - Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); - unsigned int context; - - *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) - { - Idx cur_node = state->nodes.elems[node_cnt]; - if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) - { - *err = re_node_set_merge (&next_nodes, - dfa->eclosures + dfa->nexts[cur_node]); - if (BE (*err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return NULL; - } - } - } - context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); - next_state = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - re_node_set_free (&next_nodes); - re_string_skip_bytes (&mctx->input, 1); - return next_state; -} -#endif - -#ifdef RE_ENABLE_I18N -static reg_errcode_t -internal_function -transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - Idx i; - - for (i = 0; i < pstate->nodes.nelem; ++i) - { - re_node_set dest_nodes, *new_nodes; - Idx cur_node_idx = pstate->nodes.elems[i]; - int naccepted; - Idx dest_idx; - unsigned int context; - re_dfastate_t *dest_state; - - if (!dfa->nodes[cur_node_idx].accept_mb) - continue; - - if (dfa->nodes[cur_node_idx].constraint) - { - context = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input), - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, - context)) - continue; - } - - /* How many bytes the node can accept? */ - naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, - re_string_cur_idx (&mctx->input)); - if (naccepted == 0) - continue; - - /* The node can accepts `naccepted' bytes. */ - dest_idx = re_string_cur_idx (&mctx->input) + naccepted; - mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted - : mctx->max_mb_elem_len); - err = clean_state_log_if_needed (mctx, dest_idx); - if (BE (err != REG_NOERROR, 0)) - return err; -#ifdef DEBUG - assert (dfa->nexts[cur_node_idx] != REG_MISSING); -#endif - new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; - - dest_state = mctx->state_log[dest_idx]; - if (dest_state == NULL) - dest_nodes = *new_nodes; - else - { - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, new_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - context = re_string_context_at (&mctx->input, dest_idx - 1, - mctx->eflags); - mctx->state_log[dest_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - if (dest_state != NULL) - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} -#endif /* RE_ENABLE_I18N */ - -static reg_errcode_t -internal_function -transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - Idx i; - Idx cur_str_idx = re_string_cur_idx (&mctx->input); - - for (i = 0; i < nodes->nelem; ++i) - { - Idx dest_str_idx, prev_nelem, bkc_idx; - Idx node_idx = nodes->elems[i]; - unsigned int context; - const re_token_t *node = dfa->nodes + node_idx; - re_node_set *new_dest_nodes; - - /* Check whether `node' is a backreference or not. */ - if (node->type != OP_BACK_REF) - continue; - - if (node->constraint) - { - context = re_string_context_at (&mctx->input, cur_str_idx, - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - continue; - } - - /* `node' is a backreference. - Check the substring which the substring matched. */ - bkc_idx = mctx->nbkref_ents; - err = get_subexp (mctx, node_idx, cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* And add the epsilon closures (which is `new_dest_nodes') of - the backreference to appropriate state_log. */ -#ifdef DEBUG - assert (dfa->nexts[node_idx] != REG_MISSING); -#endif - for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) - { - Idx subexp_len; - re_dfastate_t *dest_state; - struct re_backref_cache_entry *bkref_ent; - bkref_ent = mctx->bkref_ents + bkc_idx; - if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) - continue; - subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; - new_dest_nodes = (subexp_len == 0 - ? dfa->eclosures + dfa->edests[node_idx].elems[0] - : dfa->eclosures + dfa->nexts[node_idx]); - dest_str_idx = (cur_str_idx + bkref_ent->subexp_to - - bkref_ent->subexp_from); - context = re_string_context_at (&mctx->input, dest_str_idx - 1, - mctx->eflags); - dest_state = mctx->state_log[dest_str_idx]; - prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 - : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ - if (dest_state == NULL) - { - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, new_dest_nodes, - context); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - else - { - re_node_set dest_nodes; - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, - new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&dest_nodes); - goto free_return; - } - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - /* We need to check recursively if the backreference can epsilon - transit. */ - if (subexp_len == 0 - && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) - { - err = check_subexp_matching_top (mctx, new_dest_nodes, - cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - err = transit_state_bkref (mctx, new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - } - } - err = REG_NOERROR; - free_return: - return err; -} - -/* Enumerate all the candidates which the backreference BKREF_NODE can match - at BKREF_STR_IDX, and register them by match_ctx_add_entry(). - Note that we might collect inappropriate candidates here. - However, the cost of checking them strictly here is too high, then we - delay these checking for prune_impossible_nodes(). */ - -static reg_errcode_t -internal_function -get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) -{ - const re_dfa_t *const dfa = mctx->dfa; - Idx subexp_num, sub_top_idx; - const char *buf = (const char *) re_string_get_buffer (&mctx->input); - /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ - Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); - if (cache_idx != REG_MISSING) - { - const struct re_backref_cache_entry *entry - = mctx->bkref_ents + cache_idx; - do - if (entry->node == bkref_node) - return REG_NOERROR; /* We already checked it. */ - while (entry++->more); - } - - subexp_num = dfa->nodes[bkref_node].opr.idx; - - /* For each sub expression */ - for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) - { - reg_errcode_t err; - re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; - re_sub_match_last_t *sub_last; - Idx sub_last_idx, sl_str, bkref_str_off; - - if (dfa->nodes[sub_top->node].opr.idx != subexp_num) - continue; /* It isn't related. */ - - sl_str = sub_top->str_idx; - bkref_str_off = bkref_str_idx; - /* At first, check the last node of sub expressions we already - evaluated. */ - for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) - { - regoff_t sl_str_diff; - sub_last = sub_top->lasts[sub_last_idx]; - sl_str_diff = sub_last->str_idx - sl_str; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_diff > 0) - { - if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) - { - /* Not enough chars for a successful match. */ - if (bkref_str_off + sl_str_diff > mctx->input.len) - break; - - err = clean_state_log_if_needed (mctx, - bkref_str_off - + sl_str_diff); - if (BE (err != REG_NOERROR, 0)) - return err; - buf = (const char *) re_string_get_buffer (&mctx->input); - } - if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) - /* We don't need to search this sub expression any more. */ - break; - } - bkref_str_off += sl_str_diff; - sl_str += sl_str_diff; - err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - - /* Reload buf, since the preceding call might have reallocated - the buffer. */ - buf = (const char *) re_string_get_buffer (&mctx->input); - - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (sub_last_idx < sub_top->nlasts) - continue; - if (sub_last_idx > 0) - ++sl_str; - /* Then, search for the other last nodes of the sub expression. */ - for (; sl_str <= bkref_str_idx; ++sl_str) - { - Idx cls_node; - regoff_t sl_str_off; - const re_node_set *nodes; - sl_str_off = sl_str - sub_top->str_idx; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_off > 0) - { - if (BE (bkref_str_off >= mctx->input.valid_len, 0)) - { - /* If we are at the end of the input, we cannot match. */ - if (bkref_str_off >= mctx->input.len) - break; - - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - - buf = (const char *) re_string_get_buffer (&mctx->input); - } - if (buf [bkref_str_off++] != buf[sl_str - 1]) - break; /* We don't need to search this sub expression - any more. */ - } - if (mctx->state_log[sl_str] == NULL) - continue; - /* Does this state have a ')' of the sub expression? */ - nodes = &mctx->state_log[sl_str]->nodes; - cls_node = find_subexp_node (dfa, nodes, subexp_num, - OP_CLOSE_SUBEXP); - if (cls_node == REG_MISSING) - continue; /* No. */ - if (sub_top->path == NULL) - { - sub_top->path = calloc (sizeof (state_array_t), - sl_str - sub_top->str_idx + 1); - if (sub_top->path == NULL) - return REG_ESPACE; - } - /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node - in the current context? */ - err = check_arrival (mctx, sub_top->path, sub_top->node, - sub_top->str_idx, cls_node, sl_str, - OP_CLOSE_SUBEXP); - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); - if (BE (sub_last == NULL, 0)) - return REG_ESPACE; - err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - if (err == REG_NOMATCH) - continue; - } - } - return REG_NOERROR; -} - -/* Helper functions for get_subexp(). */ - -/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. - If it can arrive, register the sub expression expressed with SUB_TOP - and SUB_LAST. */ - -static reg_errcode_t -internal_function -get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, - re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) -{ - reg_errcode_t err; - Idx to_idx; - /* Can the subexpression arrive the back reference? */ - err = check_arrival (mctx, &sub_last->path, sub_last->node, - sub_last->str_idx, bkref_node, bkref_str, - OP_OPEN_SUBEXP); - if (err != REG_NOERROR) - return err; - err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, - sub_last->str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; - return clean_state_log_if_needed (mctx, to_idx); -} - -/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. - Search '(' if FL_OPEN, or search ')' otherwise. - TODO: This function isn't efficient... - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - -static Idx -internal_function -find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - Idx subexp_idx, int type) -{ - Idx cls_idx; - for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) - { - Idx cls_node = nodes->elems[cls_idx]; - const re_token_t *node = dfa->nodes + cls_node; - if (node->type == type - && node->opr.idx == subexp_idx) - return cls_node; - } - return REG_MISSING; -} - -/* Check whether the node TOP_NODE at TOP_STR can arrive to the node - LAST_NODE at LAST_STR. We record the path onto PATH since it will be - heavily reused. - Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ - -static reg_errcode_t -internal_function -check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, - Idx top_str, Idx last_node, Idx last_str, int type) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err = REG_NOERROR; - Idx subexp_num, backup_cur_idx, str_idx, null_cnt; - re_dfastate_t *cur_state = NULL; - re_node_set *cur_nodes, next_nodes; - re_dfastate_t **backup_state_log; - unsigned int context; - - subexp_num = dfa->nodes[top_node].opr.idx; - /* Extend the buffer if we need. */ - if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) - { - re_dfastate_t **new_array; - Idx old_alloc = path->alloc; - Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; - if (BE (new_alloc < old_alloc, 0) - || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) - return REG_ESPACE; - new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - path->array = new_array; - path->alloc = new_alloc; - memset (new_array + old_alloc, '\0', - sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); - } - - str_idx = path->next_idx ? path->next_idx : top_str; - - /* Temporary modify MCTX. */ - backup_state_log = mctx->state_log; - backup_cur_idx = mctx->input.cur_idx; - mctx->state_log = path->array; - mctx->input.cur_idx = str_idx; - - /* Setup initial node set. */ - context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); - if (str_idx == top_str) - { - err = re_node_set_init_1 (&next_nodes, top_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - else - { - cur_state = mctx->state_log[str_idx]; - if (cur_state && cur_state->has_backref) - { - err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - re_node_set_init_empty (&next_nodes); - } - if (str_idx == top_str || (cur_state && cur_state->has_backref)) - { - if (next_nodes.nelem) - { - err = expand_bkref_cache (mctx, &next_nodes, str_idx, - subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - } - - for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) - { - re_node_set_empty (&next_nodes); - if (mctx->state_log[str_idx + 1]) - { - err = re_node_set_merge (&next_nodes, - &mctx->state_log[str_idx + 1]->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - if (cur_state) - { - err = check_arrival_add_next_nodes (mctx, str_idx, - &cur_state->non_eps_nodes, - &next_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - ++str_idx; - if (next_nodes.nelem) - { - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - err = expand_bkref_cache (mctx, &next_nodes, str_idx, - subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - null_cnt = cur_state == NULL ? null_cnt + 1 : 0; - } - re_node_set_free (&next_nodes); - cur_nodes = (mctx->state_log[last_str] == NULL ? NULL - : &mctx->state_log[last_str]->nodes); - path->next_idx = str_idx; - - /* Fix MCTX. */ - mctx->state_log = backup_state_log; - mctx->input.cur_idx = backup_cur_idx; - - /* Then check the current node set has the node LAST_NODE. */ - if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) - return REG_NOERROR; - - return REG_NOMATCH; -} - -/* Helper functions for check_arrival. */ - -/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them - to NEXT_NODES. - TODO: This function is similar to the functions transit_state*(), - however this function has many additional works. - Can't we unify them? */ - -static reg_errcode_t -internal_function -check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, - re_node_set *cur_nodes, re_node_set *next_nodes) -{ - const re_dfa_t *const dfa = mctx->dfa; - bool ok; - Idx cur_idx; - reg_errcode_t err = REG_NOERROR; - re_node_set union_set; - re_node_set_init_empty (&union_set); - for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) - { - int naccepted = 0; - Idx cur_node = cur_nodes->elems[cur_idx]; -#ifdef DEBUG - re_token_type_t type = dfa->nodes[cur_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (dfa->nodes[cur_node].accept_mb) - { - naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, - str_idx); - if (naccepted > 1) - { - re_dfastate_t *dest_state; - Idx next_node = dfa->nexts[cur_node]; - Idx next_idx = str_idx + naccepted; - dest_state = mctx->state_log[next_idx]; - re_node_set_empty (&union_set); - if (dest_state) - { - err = re_node_set_merge (&union_set, &dest_state->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - } - ok = re_node_set_insert (&union_set, next_node); - if (BE (! ok, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - mctx->state_log[next_idx] = re_acquire_state (&err, dfa, - &union_set); - if (BE (mctx->state_log[next_idx] == NULL - && err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - } - } -#endif /* RE_ENABLE_I18N */ - if (naccepted - || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) - { - ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (! ok, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - } - } - re_node_set_free (&union_set); - return REG_NOERROR; -} - -/* For all the nodes in CUR_NODES, add the epsilon closures of them to - CUR_NODES, however exclude the nodes which are: - - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. - - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. -*/ - -static reg_errcode_t -internal_function -check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - Idx ex_subexp, int type) -{ - reg_errcode_t err; - Idx idx, outside_node; - re_node_set new_nodes; -#ifdef DEBUG - assert (cur_nodes->nelem); -#endif - err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) - return err; - /* Create a new node set NEW_NODES with the nodes which are epsilon - closures of the node in CUR_NODES. */ - - for (idx = 0; idx < cur_nodes->nelem; ++idx) - { - Idx cur_node = cur_nodes->elems[idx]; - const re_node_set *eclosure = dfa->eclosures + cur_node; - outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); - if (outside_node == REG_MISSING) - { - /* There are no problematic nodes, just merge them. */ - err = re_node_set_merge (&new_nodes, eclosure); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - else - { - /* There are problematic nodes, re-calculate incrementally. */ - err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, - ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - } - re_node_set_free (cur_nodes); - *cur_nodes = new_nodes; - return REG_NOERROR; -} - -/* Helper function for check_arrival_expand_ecl. - Check incrementally the epsilon closure of TARGET, and if it isn't - problematic append it to DST_NODES. */ - -static reg_errcode_t -internal_function -check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, - Idx target, Idx ex_subexp, int type) -{ - Idx cur_node; - for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) - { - bool ok; - - if (dfa->nodes[cur_node].type == type - && dfa->nodes[cur_node].opr.idx == ex_subexp) - { - if (type == OP_CLOSE_SUBEXP) - { - ok = re_node_set_insert (dst_nodes, cur_node); - if (BE (! ok, 0)) - return REG_ESPACE; - } - break; - } - ok = re_node_set_insert (dst_nodes, cur_node); - if (BE (! ok, 0)) - return REG_ESPACE; - if (dfa->edests[cur_node].nelem == 0) - break; - if (dfa->edests[cur_node].nelem == 2) - { - reg_errcode_t err; - err = check_arrival_expand_ecl_sub (dfa, dst_nodes, - dfa->edests[cur_node].elems[1], - ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) - return err; - } - cur_node = dfa->edests[cur_node].elems[0]; - } - return REG_NOERROR; -} - - -/* For all the back references in the current state, calculate the - destination of the back references by the appropriate entry - in MCTX->BKREF_ENTS. */ - -static reg_errcode_t -internal_function -expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, - Idx cur_str, Idx subexp_num, int type) -{ - const re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); - struct re_backref_cache_entry *ent; - - if (cache_idx_start == REG_MISSING) - return REG_NOERROR; - - restart: - ent = mctx->bkref_ents + cache_idx_start; - do - { - Idx to_idx, next_node; - - /* Is this entry ENT is appropriate? */ - if (!re_node_set_contains (cur_nodes, ent->node)) - continue; /* No. */ - - to_idx = cur_str + ent->subexp_to - ent->subexp_from; - /* Calculate the destination of the back reference, and append it - to MCTX->STATE_LOG. */ - if (to_idx == cur_str) - { - /* The backreference did epsilon transit, we must re-check all the - node in the current state. */ - re_node_set new_dests; - reg_errcode_t err2, err3; - next_node = dfa->edests[ent->node].elems[0]; - if (re_node_set_contains (cur_nodes, next_node)) - continue; - err = re_node_set_init_1 (&new_dests, next_node); - err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); - err3 = re_node_set_merge (cur_nodes, &new_dests); - re_node_set_free (&new_dests); - if (BE (err != REG_NOERROR || err2 != REG_NOERROR - || err3 != REG_NOERROR, 0)) - { - err = (err != REG_NOERROR ? err - : (err2 != REG_NOERROR ? err2 : err3)); - return err; - } - /* TODO: It is still inefficient... */ - goto restart; - } - else - { - re_node_set union_set; - next_node = dfa->nexts[ent->node]; - if (mctx->state_log[to_idx]) - { - bool ok; - if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, - next_node)) - continue; - err = re_node_set_init_copy (&union_set, - &mctx->state_log[to_idx]->nodes); - ok = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ! ok, 0)) - { - re_node_set_free (&union_set); - err = err != REG_NOERROR ? err : REG_ESPACE; - return err; - } - } - else - { - err = re_node_set_init_1 (&union_set, next_node); - if (BE (err != REG_NOERROR, 0)) - return err; - } - mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); - re_node_set_free (&union_set); - if (BE (mctx->state_log[to_idx] == NULL - && err != REG_NOERROR, 0)) - return err; - } - } - while (ent++->more); - return REG_NOERROR; -} - -/* Build transition table for the state. - Return true if successful. */ - -static bool -internal_function -build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) -{ - reg_errcode_t err; - Idx i, j; - int ch; - bool need_word_trtable = false; - bitset_word_t elem, mask; - bool dests_node_malloced = false; - bool dest_states_malloced = false; - Idx ndests; /* Number of the destination states from `state'. */ - re_dfastate_t **trtable; - re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; - re_node_set follows, *dests_node; - bitset_t *dests_ch; - bitset_t acceptable; - - struct dests_alloc - { - re_node_set dests_node[SBC_MAX]; - bitset_t dests_ch[SBC_MAX]; - } *dests_alloc; - - /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the - characters which i-th destination state accepts. */ - if (__libc_use_alloca (sizeof (struct dests_alloc))) - dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); - else - { - dests_alloc = re_malloc (struct dests_alloc, 1); - if (BE (dests_alloc == NULL, 0)) - return false; - dests_node_malloced = true; - } - dests_node = dests_alloc->dests_node; - dests_ch = dests_alloc->dests_ch; - - /* Initialize transiton table. */ - state->word_trtable = state->trtable = NULL; - - /* At first, group all nodes belonging to `state' into several - destinations. */ - ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); - if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) - { - if (dests_node_malloced) - free (dests_alloc); - if (ndests == 0) - { - state->trtable = (re_dfastate_t **) - calloc (sizeof (re_dfastate_t *), SBC_MAX); - return true; - } - return false; - } - - err = re_node_set_alloc (&follows, ndests + 1); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - - /* Avoid arithmetic overflow in size calculation. */ - if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) - / (3 * sizeof (re_dfastate_t *))) - < ndests), - 0)) - goto out_free; - - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX - + ndests * 3 * sizeof (re_dfastate_t *))) - dest_states = (re_dfastate_t **) - alloca (ndests * 3 * sizeof (re_dfastate_t *)); - else - { - dest_states = (re_dfastate_t **) - malloc (ndests * 3 * sizeof (re_dfastate_t *)); - if (BE (dest_states == NULL, 0)) - { -out_free: - if (dest_states_malloced) - free (dest_states); - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - if (dests_node_malloced) - free (dests_alloc); - return false; - } - dest_states_malloced = true; - } - dest_states_word = dest_states + ndests; - dest_states_nl = dest_states_word + ndests; - bitset_empty (acceptable); - - /* Then build the states for all destinations. */ - for (i = 0; i < ndests; ++i) - { - Idx next_node; - re_node_set_empty (&follows); - /* Merge the follows of this destination states. */ - for (j = 0; j < dests_node[i].nelem; ++j) - { - next_node = dfa->nexts[dests_node[i].elems[j]]; - if (next_node != REG_MISSING) - { - err = re_node_set_merge (&follows, dfa->eclosures + next_node); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - } - } - dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); - if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - /* If the new state has context constraint, - build appropriate states for these contexts. */ - if (dest_states[i]->has_constraint) - { - dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_WORD); - if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - - if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) - need_word_trtable = true; - - dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_NEWLINE); - if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - } - else - { - dest_states_word[i] = dest_states[i]; - dest_states_nl[i] = dest_states[i]; - } - bitset_merge (acceptable, dests_ch[i]); - } - - if (!BE (need_word_trtable, 0)) - { - /* We don't care about whether the following character is a word - character, or we are in a single-byte character set so we can - discern by looking at the character code: allocate a - 256-entry transition table. */ - trtable = state->trtable = - (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (trtable == NULL, 0)) - goto out_free; - - /* For all characters ch...: */ - for (i = 0; i < BITSET_WORDS; ++i) - for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; - elem; - mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) - { - /* There must be exactly one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) - ; - - /* j-th destination accepts the word character ch. */ - if (dfa->word_char[i] & mask) - trtable[ch] = dest_states_word[j]; - else - trtable[ch] = dest_states[j]; - } - } - else - { - /* We care about whether the following character is a word - character, and we are in a multi-byte character set: discern - by looking at the character code: build two 256-entry - transition tables, one starting at trtable[0] and one - starting at trtable[SBC_MAX]. */ - trtable = state->word_trtable = - (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); - if (BE (trtable == NULL, 0)) - goto out_free; - - /* For all characters ch...: */ - for (i = 0; i < BITSET_WORDS; ++i) - for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; - elem; - mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) - { - /* There must be exactly one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) - ; - - /* j-th destination accepts the word character ch. */ - trtable[ch] = dest_states[j]; - trtable[ch + SBC_MAX] = dest_states_word[j]; - } - } - - /* new line */ - if (bitset_contain (acceptable, NEWLINE_CHAR)) - { - /* The current state accepts newline character. */ - for (j = 0; j < ndests; ++j) - if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) - { - /* k-th destination accepts newline character. */ - trtable[NEWLINE_CHAR] = dest_states_nl[j]; - if (need_word_trtable) - trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; - /* There must be only one destination which accepts - newline. See group_nodes_into_DFAstates. */ - break; - } - } - - if (dest_states_malloced) - free (dest_states); - - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - - if (dests_node_malloced) - free (dests_alloc); - - return true; -} - -/* Group all nodes belonging to STATE into several destinations. - Then for all destinations, set the nodes belonging to the destination - to DESTS_NODE[i] and set the characters accepted by the destination - to DEST_CH[i]. This function return the number of destinations. */ - -static Idx -internal_function -group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, - re_node_set *dests_node, bitset_t *dests_ch) -{ - reg_errcode_t err; - bool ok; - Idx i, j, k; - Idx ndests; /* Number of the destinations from `state'. */ - bitset_t accepts; /* Characters a node can accept. */ - const re_node_set *cur_nodes = &state->nodes; - bitset_empty (accepts); - ndests = 0; - - /* For all the nodes belonging to `state', */ - for (i = 0; i < cur_nodes->nelem; ++i) - { - re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; - re_token_type_t type = node->type; - unsigned int constraint = node->constraint; - - /* Enumerate all single byte character this node can accept. */ - if (type == CHARACTER) - bitset_set (accepts, node->opr.c); - else if (type == SIMPLE_BRACKET) - { - bitset_merge (accepts, node->opr.sbcset); - } - else if (type == OP_PERIOD) - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - bitset_merge (accepts, dfa->sb_char); - else -#endif - bitset_set_all (accepts); - if (!(dfa->syntax & RE_DOT_NEWLINE)) - bitset_clear (accepts, '\n'); - if (dfa->syntax & RE_DOT_NOT_NULL) - bitset_clear (accepts, '\0'); - } -#ifdef RE_ENABLE_I18N - else if (type == OP_UTF8_PERIOD) - { - if (ASCII_CHARS % BITSET_WORD_BITS == 0) - memset (accepts, -1, ASCII_CHARS / CHAR_BIT); - else - bitset_merge (accepts, utf8_sb_map); - if (!(dfa->syntax & RE_DOT_NEWLINE)) - bitset_clear (accepts, '\n'); - if (dfa->syntax & RE_DOT_NOT_NULL) - bitset_clear (accepts, '\0'); - } -#endif - else - continue; - - /* Check the `accepts' and sift the characters which are not - match it the context. */ - if (constraint) - { - if (constraint & NEXT_NEWLINE_CONSTRAINT) - { - bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); - bitset_empty (accepts); - if (accepts_newline) - bitset_set (accepts, NEWLINE_CHAR); - else - continue; - } - if (constraint & NEXT_ENDBUF_CONSTRAINT) - { - bitset_empty (accepts); - continue; - } - - if (constraint & NEXT_WORD_CONSTRAINT) - { - bitset_word_t any_set = 0; - if (type == CHARACTER && !node->word_char) - { - bitset_empty (accepts); - continue; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_WORDS; ++j) - any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); - else -#endif - for (j = 0; j < BITSET_WORDS; ++j) - any_set |= (accepts[j] &= dfa->word_char[j]); - if (!any_set) - continue; - } - if (constraint & NEXT_NOTWORD_CONSTRAINT) - { - bitset_word_t any_set = 0; - if (type == CHARACTER && node->word_char) - { - bitset_empty (accepts); - continue; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_WORDS; ++j) - any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); - else -#endif - for (j = 0; j < BITSET_WORDS; ++j) - any_set |= (accepts[j] &= ~dfa->word_char[j]); - if (!any_set) - continue; - } - } - - /* Then divide `accepts' into DFA states, or create a new - state. Above, we make sure that accepts is not empty. */ - for (j = 0; j < ndests; ++j) - { - bitset_t intersec; /* Intersection sets, see below. */ - bitset_t remains; - /* Flags, see below. */ - bitset_word_t has_intersec, not_subset, not_consumed; - - /* Optimization, skip if this state doesn't accept the character. */ - if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) - continue; - - /* Enumerate the intersection set of this state and `accepts'. */ - has_intersec = 0; - for (k = 0; k < BITSET_WORDS; ++k) - has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; - /* And skip if the intersection set is empty. */ - if (!has_intersec) - continue; - - /* Then check if this state is a subset of `accepts'. */ - not_subset = not_consumed = 0; - for (k = 0; k < BITSET_WORDS; ++k) - { - not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; - not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; - } - - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ - if (not_subset) - { - bitset_copy (dests_ch[ndests], remains); - bitset_copy (dests_ch[j], intersec); - err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - } - - /* Put the position in the current group. */ - ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (! ok, 0)) - goto error_return; - - /* If all characters are consumed, go to next node. */ - if (!not_consumed) - break; - } - /* Some characters remain, create a new group. */ - if (j == ndests) - { - bitset_copy (dests_ch[ndests], accepts); - err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - bitset_empty (accepts); - } - } - return ndests; - error_return: - for (j = 0; j < ndests; ++j) - re_node_set_free (dests_node + j); - return REG_MISSING; -} - -#ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. - Return the number of the bytes the node accepts. - STR_IDX is the current index of the input string. - - This function handles the nodes which can accept one character, or - one collating element like '.', '[a-z]', opposite to the other nodes - can only accept one byte. */ - -static int -internal_function -check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, - const re_string_t *input, Idx str_idx) -{ - const re_token_t *node = dfa->nodes + node_idx; - int char_len, elem_len; - Idx i; - - if (BE (node->type == OP_UTF8_PERIOD, 0)) - { - unsigned char c = re_string_byte_at (input, str_idx), d; - if (BE (c < 0xc2, 1)) - return 0; - - if (str_idx + 2 > input->len) - return 0; - - d = re_string_byte_at (input, str_idx + 1); - if (c < 0xe0) - return (d < 0x80 || d > 0xbf) ? 0 : 2; - else if (c < 0xf0) - { - char_len = 3; - if (c == 0xe0 && d < 0xa0) - return 0; - } - else if (c < 0xf8) - { - char_len = 4; - if (c == 0xf0 && d < 0x90) - return 0; - } - else if (c < 0xfc) - { - char_len = 5; - if (c == 0xf8 && d < 0x88) - return 0; - } - else if (c < 0xfe) - { - char_len = 6; - if (c == 0xfc && d < 0x84) - return 0; - } - else - return 0; - - if (str_idx + char_len > input->len) - return 0; - - for (i = 1; i < char_len; ++i) - { - d = re_string_byte_at (input, str_idx + i); - if (d < 0x80 || d > 0xbf) - return 0; - } - return char_len; - } - - char_len = re_string_char_size_at (input, str_idx); - if (node->type == OP_PERIOD) - { - if (char_len <= 1) - return 0; - /* FIXME: I don't think this if is needed, as both '\n' - and '\0' are char_len == 1. */ - /* '.' accepts any one character except the following two cases. */ - if ((!(dfa->syntax & RE_DOT_NEWLINE) && - re_string_byte_at (input, str_idx) == '\n') || - ((dfa->syntax & RE_DOT_NOT_NULL) && - re_string_byte_at (input, str_idx) == '\0')) - return 0; - return char_len; - } - - elem_len = re_string_elem_size_at (input, str_idx); - if ((elem_len <= 1 && char_len <= 1) || char_len == 0) - return 0; - - if (node->type == COMPLEX_BRACKET) - { - const re_charset_t *cset = node->opr.mbcset; -# ifdef _LIBC - const unsigned char *pin - = ((const unsigned char *) re_string_get_buffer (input) + str_idx); - Idx j; - uint32_t nrules; -# endif /* _LIBC */ - int match_len = 0; - wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) - ? re_string_wchar_at (input, str_idx) : 0); - - /* match with multibyte character? */ - for (i = 0; i < cset->nmbchars; ++i) - if (wc == cset->mbchars[i]) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - /* match with character_class? */ - for (i = 0; i < cset->nchar_classes; ++i) - { - wctype_t wt = cset->char_classes[i]; - if (__iswctype (wc, wt)) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - -# ifdef _LIBC - nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules != 0) - { - unsigned int in_collseq = 0; - const int32_t *table, *indirect; - const unsigned char *weights, *extra; - const char *collseqwc; - int32_t idx; - /* This #include defines a local function! */ -# include <locale/weight.h> - - /* match with collating_symbol? */ - if (cset->ncoll_syms) - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - for (i = 0; i < cset->ncoll_syms; ++i) - { - const unsigned char *coll_sym = extra + cset->coll_syms[i]; - /* Compare the length of input collating element and - the length of current collating element. */ - if (*coll_sym != elem_len) - continue; - /* Compare each bytes. */ - for (j = 0; j < *coll_sym; j++) - if (pin[j] != coll_sym[1 + j]) - break; - if (j == *coll_sym) - { - /* Match if every bytes is equal. */ - match_len = j; - goto check_node_accept_bytes_match; - } - } - - if (cset->nranges) - { - if (elem_len <= char_len) - { - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - in_collseq = __collseq_table_lookup (collseqwc, wc); - } - else - in_collseq = find_collation_sequence_value (pin, elem_len); - } - /* match with range expression? */ - for (i = 0; i < cset->nranges; ++i) - if (cset->range_starts[i] <= in_collseq - && in_collseq <= cset->range_ends[i]) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - - /* match with equivalence_class? */ - if (cset->nequiv_classes) - { - const unsigned char *cp = pin; - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx = findidx (&cp); - if (idx > 0) - for (i = 0; i < cset->nequiv_classes; ++i) - { - int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx]; - if (weight_len == weights[equiv_class_idx]) - { - Idx cnt = 0; - while (cnt <= weight_len - && (weights[equiv_class_idx + 1 + cnt] - == weights[idx + 1 + cnt])) - ++cnt; - if (cnt > weight_len) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - } - } - } - } - else -# endif /* _LIBC */ - { - /* match with range expression? */ -#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__) - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif - for (i = 0; i < cset->nranges; ++i) - { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - } - check_node_accept_bytes_match: - if (!cset->non_match) - return match_len; - else - { - if (match_len > 0) - return 0; - else - return (elem_len > char_len) ? elem_len : char_len; - } - } - return 0; -} - -# ifdef _LIBC -static unsigned int -internal_function -find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) -{ - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules == 0) - { - if (mbs_len == 1) - { - /* No valid character. Match it as a single byte character. */ - const unsigned char *collseq = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - return collseq[mbs[0]]; - } - return UINT_MAX; - } - else - { - int32_t idx; - const unsigned char *extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - int32_t extrasize = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; - - for (idx = 0; idx < extrasize;) - { - int mbs_cnt; - bool found = false; - int32_t elem_mbs_len; - /* Skip the name of collating element name. */ - idx = idx + extra[idx] + 1; - elem_mbs_len = extra[idx++]; - if (mbs_len == elem_mbs_len) - { - for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) - if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) - break; - if (mbs_cnt == elem_mbs_len) - /* Found the entry. */ - found = true; - } - /* Skip the byte sequence of the collating element. */ - idx += elem_mbs_len; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); - /* If we found the entry, return the sequence value. */ - if (found) - return *(uint32_t *) (extra + idx); - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - } - return UINT_MAX; - } -} -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ - -/* Check whether the node accepts the byte which is IDX-th - byte of the INPUT. */ - -static bool -internal_function -check_node_accept (const re_match_context_t *mctx, const re_token_t *node, - Idx idx) -{ - unsigned char ch; - ch = re_string_byte_at (&mctx->input, idx); - switch (node->type) - { - case CHARACTER: - if (node->opr.c != ch) - return false; - break; - - case SIMPLE_BRACKET: - if (!bitset_contain (node->opr.sbcset, ch)) - return false; - break; - -#ifdef RE_ENABLE_I18N - case OP_UTF8_PERIOD: - if (ch >= ASCII_CHARS) - return false; - /* FALLTHROUGH */ -#endif - case OP_PERIOD: - if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) - || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) - return false; - break; - - default: - return false; - } - - if (node->constraint) - { - /* The node has constraints. Check whether the current context - satisfies the constraints. */ - unsigned int context = re_string_context_at (&mctx->input, idx, - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return false; - } - - return true; -} - -/* Extend the buffers, if the buffers have run out. */ - -static reg_errcode_t -internal_function -extend_buffers (re_match_context_t *mctx) -{ - reg_errcode_t ret; - re_string_t *pstr = &mctx->input; - - /* Avoid overflow. */ - if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) - return REG_ESPACE; - - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - if (mctx->state_log != NULL) - { - /* And double the length of state_log. */ - /* XXX We have no indication of the size of this buffer. If this - allocation fail we have no indication that the state_log array - does not have the right size. */ - re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, - pstr->bufs_len + 1); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->state_log = new_array; - } - - /* Then reconstruct the buffers. */ - if (pstr->icase) - { -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - } - return REG_NOERROR; -} - - -/* Functions for matching context. */ - -/* Initialize MCTX. */ - -static reg_errcode_t -internal_function -match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) -{ - mctx->eflags = eflags; - mctx->match_last = REG_MISSING; - if (n > 0) - { - /* Avoid overflow. */ - size_t max_object_size = - MAX (sizeof (struct re_backref_cache_entry), - sizeof (re_sub_match_top_t *)); - if (BE (SIZE_MAX / max_object_size < n, 0)) - return REG_ESPACE; - - mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); - mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); - if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) - return REG_ESPACE; - } - /* Already zero-ed by the caller. - else - mctx->bkref_ents = NULL; - mctx->nbkref_ents = 0; - mctx->nsub_tops = 0; */ - mctx->abkref_ents = n; - mctx->max_mb_elem_len = 1; - mctx->asub_tops = n; - return REG_NOERROR; -} - -/* Clean the entries which depend on the current input in MCTX. - This function must be invoked when the matcher changes the start index - of the input, or changes the input string. */ - -static void -internal_function -match_ctx_clean (re_match_context_t *mctx) -{ - Idx st_idx; - for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) - { - Idx sl_idx; - re_sub_match_top_t *top = mctx->sub_tops[st_idx]; - for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) - { - re_sub_match_last_t *last = top->lasts[sl_idx]; - re_free (last->path.array); - re_free (last); - } - re_free (top->lasts); - if (top->path) - { - re_free (top->path->array); - re_free (top->path); - } - free (top); - } - - mctx->nsub_tops = 0; - mctx->nbkref_ents = 0; -} - -/* Free all the memory associated with MCTX. */ - -static void -internal_function -match_ctx_free (re_match_context_t *mctx) -{ - /* First, free all the memory associated with MCTX->SUB_TOPS. */ - match_ctx_clean (mctx); - re_free (mctx->sub_tops); - re_free (mctx->bkref_ents); -} - -/* Add a new backreference entry to MCTX. - Note that we assume that caller never call this function with duplicate - entry, and call with STR_IDX which isn't smaller than any existing entry. -*/ - -static reg_errcode_t -internal_function -match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, - Idx to) -{ - if (mctx->nbkref_ents >= mctx->abkref_ents) - { - struct re_backref_cache_entry* new_entry; - new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, - mctx->abkref_ents * 2); - if (BE (new_entry == NULL, 0)) - { - re_free (mctx->bkref_ents); - return REG_ESPACE; - } - mctx->bkref_ents = new_entry; - memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', - sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); - mctx->abkref_ents *= 2; - } - if (mctx->nbkref_ents > 0 - && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) - mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; - - mctx->bkref_ents[mctx->nbkref_ents].node = node; - mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; - mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; - mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; - - /* This is a cache that saves negative results of check_dst_limits_calc_pos. - If bit N is clear, means that this entry won't epsilon-transition to - an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If - it is set, check_dst_limits_calc_pos_1 will recurse and try to find one - such node. - - A backreference does not epsilon-transition unless it is empty, so set - to all zeros if FROM != TO. */ - mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map - = (from == to ? -1 : 0); - - mctx->bkref_ents[mctx->nbkref_ents++].more = 0; - if (mctx->max_mb_elem_len < to - from) - mctx->max_mb_elem_len = to - from; - return REG_NOERROR; -} - -/* Return the first entry with the same str_idx, or REG_MISSING if none is - found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ - -static Idx -internal_function -search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) -{ - Idx left, right, mid, last; - last = right = mctx->nbkref_ents; - for (left = 0; left < right;) - { - mid = (left + right) / 2; - if (mctx->bkref_ents[mid].str_idx < str_idx) - left = mid + 1; - else - right = mid; - } - if (left < last && mctx->bkref_ents[left].str_idx == str_idx) - return left; - else - return REG_MISSING; -} - -/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches - at STR_IDX. */ - -static reg_errcode_t -internal_function -match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) -{ -#ifdef DEBUG - assert (mctx->sub_tops != NULL); - assert (mctx->asub_tops > 0); -#endif - if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) - { - Idx new_asub_tops = mctx->asub_tops * 2; - re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, - re_sub_match_top_t *, - new_asub_tops); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->sub_tops = new_array; - mctx->asub_tops = new_asub_tops; - } - mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); - if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) - return REG_ESPACE; - mctx->sub_tops[mctx->nsub_tops]->node = node; - mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; - return REG_NOERROR; -} - -/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches - at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ - -static re_sub_match_last_t * -internal_function -match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) -{ - re_sub_match_last_t *new_entry; - if (BE (subtop->nlasts == subtop->alasts, 0)) - { - Idx new_alasts = 2 * subtop->alasts + 1; - re_sub_match_last_t **new_array = re_realloc (subtop->lasts, - re_sub_match_last_t *, - new_alasts); - if (BE (new_array == NULL, 0)) - return NULL; - subtop->lasts = new_array; - subtop->alasts = new_alasts; - } - new_entry = calloc (1, sizeof (re_sub_match_last_t)); - if (BE (new_entry != NULL, 1)) - { - subtop->lasts[subtop->nlasts] = new_entry; - new_entry->node = node; - new_entry->str_idx = str_idx; - ++subtop->nlasts; - } - return new_entry; -} - -static void -internal_function -sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) -{ - sctx->sifted_states = sifted_sts; - sctx->limited_states = limited_sts; - sctx->last_node = last_node; - sctx->last_str_idx = last_str_idx; - re_node_set_init_empty (&sctx->limits); -} diff --git a/lib/stat-macros.h b/lib/stat-macros.h deleted file mode 100644 index 690216c..0000000 --- a/lib/stat-macros.h +++ /dev/null @@ -1,3 +0,0 @@ -/* All the mode bits that can be affected by chmod. */ -#define CHMOD_MODE_BITS \ - (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) diff --git a/lib/stdbool_.h b/lib/stdbool_.h deleted file mode 100644 index 8525f0f..0000000 --- a/lib/stdbool_.h +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (C) 2001, 2002, 2003, 2006, 2007 Free Software Foundation, Inc. - Written by Bruno Haible <haible@clisp.cons.org>, 2001. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _STDBOOL_H -#define _STDBOOL_H - -/* ISO C 99 <stdbool.h> for platforms that lack it. */ - -/* Usage suggestions: - - Programs that use <stdbool.h> should be aware of some limitations - and standards compliance issues. - - Standards compliance: - - - <stdbool.h> must be #included before 'bool', 'false', 'true' - can be used. - - - You cannot assume that sizeof (bool) == 1. - - - Programs should not undefine the macros bool, true, and false, - as C99 lists that as an "obsolescent feature". - - Limitations of this substitute, when used in a C89 environment: - - - <stdbool.h> must be #included before the '_Bool' type can be used. - - - You cannot assume that _Bool is a typedef; it might be a macro. - - - Bit-fields of type 'bool' are not supported. Portable code - should use 'unsigned int foo : 1;' rather than 'bool foo : 1;'. - - - In C99, casts and automatic conversions to '_Bool' or 'bool' are - performed in such a way that every nonzero value gets converted - to 'true', and zero gets converted to 'false'. This doesn't work - with this substitute. With this substitute, only the values 0 and 1 - give the expected result when converted to _Bool' or 'bool'. - - Also, it is suggested that programs use 'bool' rather than '_Bool'; - this isn't required, but 'bool' is more common. */ - - -/* 7.16. Boolean type and values */ - -/* BeOS <sys/socket.h> already #defines false 0, true 1. We use the same - definitions below, but temporarily we have to #undef them. */ -#ifdef __BEOS__ -# include <OS.h> /* defines bool but not _Bool */ -# undef false -# undef true -#endif - -/* For the sake of symbolic names in gdb, we define true and false as - enum constants, not only as macros. - It is tempting to write - typedef enum { false = 0, true = 1 } _Bool; - so that gdb prints values of type 'bool' symbolically. But if we do - this, values of type '_Bool' may promote to 'int' or 'unsigned int' - (see ISO C 99 6.7.2.2.(4)); however, '_Bool' must promote to 'int' - (see ISO C 99 6.3.1.1.(2)). So we add a negative value to the - enum; this ensures that '_Bool' promotes to 'int'. */ -#if defined __cplusplus || defined __BEOS__ - /* A compiler known to have 'bool'. */ - /* If the compiler already has both 'bool' and '_Bool', we can assume they - are the same types. */ -# if !@HAVE__BOOL@ -typedef bool _Bool; -# endif -#else -# if !defined __GNUC__ - /* If @HAVE__BOOL@: - Some HP-UX cc and AIX IBM C compiler versions have compiler bugs when - the built-in _Bool type is used. See - http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html - http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html - http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html - Similar bugs are likely with other compilers as well; this file - wouldn't be used if <stdbool.h> was working. - So we override the _Bool type. - If !@HAVE__BOOL@: - Need to define _Bool ourselves. As 'signed char' or as an enum type? - Use of a typedef, with SunPRO C, leads to a stupid - "warning: _Bool is a keyword in ISO C99". - Use of an enum type, with IRIX cc, leads to a stupid - "warning(1185): enumerated type mixed with another type". - The only benefit of the enum type, debuggability, is not important - with these compilers. So use 'signed char' and no typedef. */ -# define _Bool signed char -enum { false = 0, true = 1 }; -# else - /* With this compiler, trust the _Bool type if the compiler has it. */ -# if !@HAVE__BOOL@ -typedef enum { _Bool_must_promote_to_int = -1, false = 0, true = 1 } _Bool; -# endif -# endif -#endif -#define bool _Bool - -/* The other macros must be usable in preprocessor directives. */ -#define false 0 -#define true 1 -#define __bool_true_false_are_defined 1 - -#endif /* _STDBOOL_H */ diff --git a/lib/strcase.h b/lib/strcase.h deleted file mode 100644 index e420798..0000000 --- a/lib/strcase.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Case-insensitive string comparison functions. - Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _STRCASE_H -#define _STRCASE_H - -#include <stddef.h> - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Compare strings S1 and S2, ignoring case, returning less than, equal to or - greater than zero if S1 is lexicographically less than, equal to or greater - than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! */ -extern int strcasecmp (const char *s1, const char *s2); - -/* Compare no more than N characters of strings S1 and S2, ignoring case, - returning less than, equal to or greater than zero if S1 is - lexicographically less than, equal to or greater than S2. - Note: This function can not work correctly in multibyte locales. */ -extern int strncasecmp (const char *s1, const char *s2, size_t n); - - -#ifdef __cplusplus -} -#endif - - -#endif /* _STRCASE_H */ diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c deleted file mode 100644 index c605fb0..0000000 --- a/lib/strcasecmp.c +++ /dev/null @@ -1,63 +0,0 @@ -/* Case-insensitive string comparison function. - Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Specification. */ -#include <string.h> - -#include <ctype.h> -#include <limits.h> - -#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) - -/* Compare strings S1 and S2, ignoring case, returning less than, equal to or - greater than zero if S1 is lexicographically less than, equal to or greater - than S2. - Note: This function does not work with multibyte strings! */ - -int -strcasecmp (const char *s1, const char *s2) -{ - const unsigned char *p1 = (const unsigned char *) s1; - const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - if (p1 == p2) - return 0; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); - - if (c1 == '\0') - break; - - ++p1; - ++p2; - } - while (c1 == c2); - - if (UCHAR_MAX <= INT_MAX) - return c1 - c2; - else - /* On machines where 'char' and 'int' are types of the same size, the - difference of two 'unsigned char' values - including the sign bit - - doesn't fit in an 'int'. */ - return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); -} diff --git a/lib/strcspn.c b/lib/strcspn.c deleted file mode 100644 index 5a8d6f9..0000000 --- a/lib/strcspn.c +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (C) 1991, 1994, 1996-1997, 2002-2003, 2005-2006 Free Software Foundation, Inc. - - NOTE: The canonical source of this file is maintained with the GNU C Library. - Bugs can be reported to bug-glibc@gnu.org. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include <stddef.h> -#include <string.h> - -#undef strcspn - -/* Return the length of the maximum initial segment of S - which contains no characters from REJECT. */ -size_t -strcspn (const char *s, const char *reject) -{ - size_t count = 0; - - while (*s != '\0') - if (strchr (reject, *s++) == NULL) - ++count; - else - return count; - - return count; -} diff --git a/lib/strdup.c b/lib/strdup.c deleted file mode 100644 index c614108..0000000 --- a/lib/strdup.c +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (C) 1991, 1996, 1997, 1998, 2002, 2003, 2004, 2006, 2007 Free - Software Foundation, Inc. - - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _LIBC -# include <config.h> -#endif - -/* Get specification. */ -#include <string.h> - -#include <stdlib.h> - -#undef __strdup -#ifdef _LIBC -# undef strdup -#endif - -#ifndef weak_alias -# define __strdup strdup -#endif - -/* Duplicate S, returning an identical malloc'd string. */ -char * -__strdup (const char *s) -{ - size_t len = strlen (s) + 1; - void *new = malloc (len); - - if (new == NULL) - return NULL; - - return (char *) memcpy (new, s, len); -} -#ifdef libc_hidden_def -libc_hidden_def (__strdup) -#endif -#ifdef weak_alias -weak_alias (__strdup, strdup) -#endif diff --git a/lib/strdup.h b/lib/strdup.h deleted file mode 100644 index a0d5fb9..0000000 --- a/lib/strdup.h +++ /dev/null @@ -1,29 +0,0 @@ -/* strdup.h -- duplicate a string - Copyright (C) 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef STRDUP_H_ -#define STRDUP_H_ - -/* Get strdup declaration, if available. */ -#include <string.h> - -#if defined HAVE_DECL_STRDUP && !HAVE_DECL_STRDUP && !defined strdup -/* Duplicate S, returning an identical malloc'd string. */ -extern char *strdup (const char *s); -#endif - -#endif /* STRDUP_H_ */ diff --git a/lib/stripslash.c b/lib/stripslash.c deleted file mode 100644 index 342d497..0000000 --- a/lib/stripslash.c +++ /dev/null @@ -1,45 +0,0 @@ -/* stripslash.c -- remove redundant trailing slashes from a file name - - Copyright (C) 1990, 2001, 2003-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "dirname.h" - -/* Remove trailing slashes from FILE. Return true if a trailing slash - was removed. This is useful when using file name completion from a - shell that adds a "/" after directory names (such as tcsh and - bash), because on symlinks to directories, several system calls - have different semantics according to whether a trailing slash is - present. */ - -bool -strip_trailing_slashes (char *file) -{ - char *base = last_component (file); - char *base_lim; - bool had_slash; - - /* last_component returns "" for file system roots, but we need to turn - `///' into `/'. */ - if (! *base) - base = file; - base_lim = base + base_len (base); - had_slash = (*base_lim != '\0'); - *base_lim = '\0'; - return had_slash; -} diff --git a/lib/strncasecmp.c b/lib/strncasecmp.c deleted file mode 100644 index 473a610..0000000 --- a/lib/strncasecmp.c +++ /dev/null @@ -1,63 +0,0 @@ -/* strncasecmp.c -- case insensitive string comparator - Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Specification. */ -#include <string.h> - -#include <ctype.h> -#include <limits.h> - -#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) - -/* Compare no more than N bytes of strings S1 and S2, ignoring case, - returning less than, equal to or greater than zero if S1 is - lexicographically less than, equal to or greater than S2. - Note: This function cannot work correctly in multibyte locales. */ - -int -strncasecmp (const char *s1, const char *s2, size_t n) -{ - register const unsigned char *p1 = (const unsigned char *) s1; - register const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - if (p1 == p2 || n == 0) - return 0; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); - - if (--n == 0 || c1 == '\0') - break; - - ++p1; - ++p2; - } - while (c1 == c2); - - if (UCHAR_MAX <= INT_MAX) - return c1 - c2; - else - /* On machines where 'char' and 'int' are types of the same size, the - difference of two 'unsigned char' values - including the sign bit - - doesn't fit in an 'int'. */ - return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); -} diff --git a/lib/strndup.c b/lib/strndup.c deleted file mode 100644 index 3a1b0ea..0000000 --- a/lib/strndup.c +++ /dev/null @@ -1,37 +0,0 @@ -/* A replacement function, for systems that lack strndup. - - Copyright (C) 1996, 1997, 1998, 2001, 2002, 2003, 2005, 2006, 2007 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include <string.h> - -#include <stdlib.h> - -char * -strndup (char const *s, size_t n) -{ - size_t len = strnlen (s, n); - char *new = malloc (len + 1); - - if (new == NULL) - return NULL; - - new[len] = '\0'; - return memcpy (new, s, len); -} diff --git a/lib/strndup.h b/lib/strndup.h deleted file mode 100644 index 8eae493..0000000 --- a/lib/strndup.h +++ /dev/null @@ -1,30 +0,0 @@ -/* Duplicate a size-bounded string. - Copyright (C) 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#if HAVE_STRNDUP - -/* Get strndup() declaration. */ -#include <string.h> - -#else - -#include <stddef.h> - -/* Return a newly allocated copy of at most N bytes of STRING. */ -extern char *strndup (const char *string, size_t n); - -#endif diff --git a/lib/strnlen.c b/lib/strnlen.c deleted file mode 100644 index d346d32..0000000 --- a/lib/strnlen.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Find the length of STRING, but scan at most MAXLEN characters. - Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. - Written by Simon Josefsson. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include <string.h> - -/* Find the length of STRING, but scan at most MAXLEN characters. - If no '\0' terminator is found in that many characters, return MAXLEN. */ - -size_t -strnlen (const char *string, size_t maxlen) -{ - const char *end = memchr (string, '\0', maxlen); - return end ? (size_t) (end - string) : maxlen; -} diff --git a/lib/strnlen.h b/lib/strnlen.h deleted file mode 100644 index ba74dba..0000000 --- a/lib/strnlen.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Find the length of STRING, but scan at most MAXLEN characters. - Copyright (C) 2005 Free Software Foundation, Inc. - Written by Simon Josefsson. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef STRNLEN_H -#define STRNLEN_H - -/* Get strnlen declaration, if available. */ -#include <string.h> - -#if defined HAVE_DECL_STRNLEN && !HAVE_DECL_STRNLEN -/* Find the length (number of bytes) of STRING, but scan at most - MAXLEN bytes. If no '\0' terminator is found in that many bytes, - return MAXLEN. */ -extern size_t strnlen(const char *string, size_t maxlen); -#endif - -#endif /* STRNLEN_H */ diff --git a/lib/strnlen1.c b/lib/strnlen1.c deleted file mode 100644 index 422ed9e..0000000 --- a/lib/strnlen1.c +++ /dev/null @@ -1,36 +0,0 @@ -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Specification. */ -#include "strnlen1.h" - -#include <string.h> - -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - If no '\0' terminator is found in that many characters, return MAXLEN. */ -/* This is the same as strnlen (string, maxlen - 1) + 1. */ -size_t -strnlen1 (const char *string, size_t maxlen) -{ - const char *end = (const char *) memchr (string, '\0', maxlen); - if (end != NULL) - return end - string + 1; - else - return maxlen; -} diff --git a/lib/strnlen1.h b/lib/strnlen1.h deleted file mode 100644 index 7ce7d0c..0000000 --- a/lib/strnlen1.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _STRNLEN1_H -#define _STRNLEN1_H - -#include <stddef.h> - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - If no '\0' terminator is found in that many characters, return MAXLEN. */ -/* This is the same as strnlen (string, maxlen - 1) + 1. */ -extern size_t strnlen1 (const char *string, size_t maxlen); - - -#ifdef __cplusplus -} -#endif - - -#endif /* _STRNLEN1_H */ diff --git a/lib/strpbrk.c b/lib/strpbrk.c deleted file mode 100644 index 7843a6d..0000000 --- a/lib/strpbrk.c +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (C) 1991, 1994, 2000, 2002-2003, 2006 Free Software - Foundation, Inc. - - NOTE: The canonical source of this file is maintained with the GNU C Library. - Bugs can be reported to bug-glibc@prep.ai.mit.edu. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include <stddef.h> -#include <string.h> - -#undef strpbrk - -/* Find the first occurrence in S of any character in ACCEPT. */ -char * -strpbrk (const char *s, const char *accept) -{ - while (*s != '\0') - { - const char *a = accept; - while (*a != '\0') - if (*a++ == *s) - return (char *) s; - ++s; - } - - return NULL; -} diff --git a/lib/strpbrk.h b/lib/strpbrk.h deleted file mode 100644 index acc8d35..0000000 --- a/lib/strpbrk.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Searching in a string. - Copyright (C) 2001-2002 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#if HAVE_STRPBRK - -/* Get strpbrk() declaration. */ -#include <string.h> - -#else - -/* Find the first occurrence in S of any character in ACCEPT. */ -extern char *strpbrk (const char *s, const char *accept); - -#endif diff --git a/lib/strsep.c b/lib/strsep.c deleted file mode 100644 index 9f2fdd2..0000000 --- a/lib/strsep.c +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright (C) 2004, 2007 Free Software Foundation, Inc. - - Written by Yoann Vandoorselaere <yoann@prelude-ids.org>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -/* Specification. */ -#include <string.h> - -char * -strsep (char **stringp, const char *delim) -{ - char *start = *stringp; - char *ptr; - - if (start == NULL) - return NULL; - - /* Optimize the case of no delimiters. */ - if (delim[0] == '\0') - { - *stringp = NULL; - return start; - } - - /* Optimize the case of one delimiter. */ - if (delim[1] == '\0') - ptr = strchr (start, delim[0]); - else - /* The general case. */ - ptr = strpbrk (start, delim); - if (ptr == NULL) - { - *stringp = NULL; - return start; - } - - *ptr = '\0'; - *stringp = ptr + 1; - - return start; -} diff --git a/lib/strsep.h b/lib/strsep.h deleted file mode 100644 index ca28a2f..0000000 --- a/lib/strsep.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (C) 2004 Free Software Foundation, Inc. - - Written by Yoann Vandoorselaere <yoann@prelude-ids.org>. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef GNULIB_STRSEP_H_ -#define GNULIB_STRSEP_H_ - -#if HAVE_STRSEP - -/* - * Get strsep() declaration. - */ -#include <string.h> - -#else - -/* Searches the next delimiter (char listed in DELIM) starting at *STRINGP. - If one is found, it is overwritten with a NUL, and *STRINGP is advanced - to point to the next char after it. Otherwise, *STRINGP is set to NULL. - If *STRINGP was already NULL, nothing happens. - Returns the old value of *STRINGP. - - This is a variant of strtok() that is multithread-safe and supports - empty fields. - - Caveat: It modifies the original string. - Caveat: These functions cannot be used on constant strings. - Caveat: The identity of the delimiting character is lost. - Caveat: It doesn't work with multibyte strings unless all of the delimiter - characters are ASCII characters < 0x30. - - See also strtok_r(). */ - -extern char *strsep (char **stringp, const char *delim); - -#endif - -#endif /* GNULIB_STRSEP_H_ */ diff --git a/lib/strstr.c b/lib/strstr.c deleted file mode 100644 index e94cef7..0000000 --- a/lib/strstr.c +++ /dev/null @@ -1,128 +0,0 @@ -/* Searching in a string. - Copyright (C) 2005 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2005. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -/* Specification. */ -#include "strstr.h" - -#include <stddef.h> /* for NULL */ - -#if HAVE_MBRTOWC -# include "mbuiter.h" -#endif - -/* Find the first occurrence of NEEDLE in HAYSTACK. */ -char * -strstr (const char *haystack, const char *needle) -{ - /* Be careful not to look at the entire extent of haystack or needle - until needed. This is useful because of these two cases: - - haystack may be very long, and a match of needle found early, - - needle may be very long, and not even a short initial segment of - needle may be found in haystack. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) - { - mbui_iterator_t iter_needle; - - mbui_init (iter_needle, needle); - if (mbui_avail (iter_needle)) - { - mbui_iterator_t iter_haystack; - - mbui_init (iter_haystack, haystack); - for (;; mbui_advance (iter_haystack)) - { - if (!mbui_avail (iter_haystack)) - /* No match. */ - return NULL; - - if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle))) - /* The first character matches. */ - { - mbui_iterator_t rhaystack; - mbui_iterator_t rneedle; - - memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); - mbui_advance (rhaystack); - - mbui_init (rneedle, needle); - if (!mbui_avail (rneedle)) - abort (); - mbui_advance (rneedle); - - for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) - { - if (!mbui_avail (rneedle)) - /* Found a match. */ - return (char *) mbui_cur_ptr (iter_haystack); - if (!mbui_avail (rhaystack)) - /* No match. */ - return NULL; - if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle))) - /* Nothing in this round. */ - break; - } - } - } - } - else - return (char *) haystack; - } - else -#endif - { - if (*needle != '\0') - { - /* Speed up the following searches of needle by caching its first - character. */ - char b = *needle++; - - for (;; haystack++) - { - if (*haystack == '\0') - /* No match. */ - return NULL; - if (*haystack == b) - /* The first character matches. */ - { - const char *rhaystack = haystack + 1; - const char *rneedle = needle; - - for (;; rhaystack++, rneedle++) - { - if (*rneedle == '\0') - /* Found a match. */ - return (char *) haystack; - if (*rhaystack == '\0') - /* No match. */ - return NULL; - if (*rhaystack != *rneedle) - /* Nothing in this round. */ - break; - } - } - } - } - else - return (char *) haystack; - } -} diff --git a/lib/strstr.h b/lib/strstr.h deleted file mode 100644 index a28b140..0000000 --- a/lib/strstr.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Searching in a string. - Copyright (C) 2001-2003, 2005 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - - -/* Include string.h: on glibc systems, it contains a macro definition of - strstr() that would collide with our definition if included afterwards. */ -#include <string.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* No known system has a strstr() function that works correctly in - multibyte locales. Therefore we use our version always. */ -#undef strstr -#define strstr rpl_strstr - -/* Find the first occurrence of NEEDLE in HAYSTACK. */ -extern char *strstr (const char *haystack, const char *needle); - -#ifdef __cplusplus -} -#endif diff --git a/lib/uinttostr.c b/lib/uinttostr.c deleted file mode 100644 index 52d288e..0000000 --- a/lib/uinttostr.c +++ /dev/null @@ -1,3 +0,0 @@ -#define inttostr uinttostr -#define inttype unsigned int -#include "inttostr.c" diff --git a/lib/umaxtostr.c b/lib/umaxtostr.c deleted file mode 100644 index 4f49a7f..0000000 --- a/lib/umaxtostr.c +++ /dev/null @@ -1,3 +0,0 @@ -#define inttostr umaxtostr -#define inttype uintmax_t -#include "inttostr.c" diff --git a/lib/verify.h b/lib/verify.h deleted file mode 100644 index d603b17..0000000 --- a/lib/verify.h +++ /dev/null @@ -1,141 +0,0 @@ -/* Compile-time assert-like macros. - - Copyright (C) 2005, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Paul Eggert, Bruno Haible, and Jim Meyering. */ - -#ifndef VERIFY_H -# define VERIFY_H 1 - -/* Each of these macros verifies that its argument R is nonzero. To - be portable, R should be an integer constant expression. Unlike - assert (R), there is no run-time overhead. - - There are two macros, since no single macro can be used in all - contexts in C. verify_true (R) is for scalar contexts, including - integer constant expression contexts. verify (R) is for declaration - contexts, e.g., the top level. - - Symbols ending in "__" are private to this header. - - The code below uses several ideas. - - * The first step is ((R) ? 1 : -1). Given an expression R, of - integral or boolean or floating-point type, this yields an - expression of integral type, whose value is later verified to be - constant and nonnegative. - - * Next this expression W is wrapped in a type - struct verify_type__ { unsigned int verify_error_if_negative_size__: W; }. - If W is negative, this yields a compile-time error. No compiler can - deal with a bit-field of negative size. - - One might think that an array size check would have the same - effect, that is, that the type struct { unsigned int dummy[W]; } - would work as well. However, inside a function, some compilers - (such as C++ compilers and GNU C) allow local parameters and - variables inside array size expressions. With these compilers, - an array size check would not properly diagnose this misuse of - the verify macro: - - void function (int n) { verify (n < 0); } - - * For the verify macro, the struct verify_type__ will need to - somehow be embedded into a declaration. To be portable, this - declaration must declare an object, a constant, a function, or a - typedef name. If the declared entity uses the type directly, - such as in - - struct dummy {...}; - typedef struct {...} dummy; - extern struct {...} *dummy; - extern void dummy (struct {...} *); - extern struct {...} *dummy (void); - - two uses of the verify macro would yield colliding declarations - if the entity names are not disambiguated. A workaround is to - attach the current line number to the entity name: - - #define GL_CONCAT0(x, y) x##y - #define GL_CONCAT(x, y) GL_CONCAT0 (x, y) - extern struct {...} * GL_CONCAT(dummy,__LINE__); - - But this has the problem that two invocations of verify from - within the same macro would collide, since the __LINE__ value - would be the same for both invocations. - - A solution is to use the sizeof operator. It yields a number, - getting rid of the identity of the type. Declarations like - - extern int dummy [sizeof (struct {...})]; - extern void dummy (int [sizeof (struct {...})]); - extern int (*dummy (void)) [sizeof (struct {...})]; - - can be repeated. - - * Should the implementation use a named struct or an unnamed struct? - Which of the following alternatives can be used? - - extern int dummy [sizeof (struct {...})]; - extern int dummy [sizeof (struct verify_type__ {...})]; - extern void dummy (int [sizeof (struct {...})]); - extern void dummy (int [sizeof (struct verify_type__ {...})]); - extern int (*dummy (void)) [sizeof (struct {...})]; - extern int (*dummy (void)) [sizeof (struct verify_type__ {...})]; - - In the second and sixth case, the struct type is exported to the - outer scope; two such declarations therefore collide. GCC warns - about the first, third, and fourth cases. So the only remaining - possibility is the fifth case: - - extern int (*dummy (void)) [sizeof (struct {...})]; - - * This implementation exploits the fact that GCC does not warn about - the last declaration mentioned above. If a future version of GCC - introduces a warning for this, the problem could be worked around - by using code specialized to GCC, e.g.,: - - #if 4 <= __GNUC__ - # define verify(R) \ - extern int (* verify_function__ (void)) \ - [__builtin_constant_p (R) && (R) ? 1 : -1] - #endif - - * In C++, any struct definition inside sizeof is invalid. - Use a template type to work around the problem. */ - - -/* Verify requirement R at compile-time, as an integer constant expression. - Return 1. */ - -# ifdef __cplusplus -template <int w> - struct verify_type__ { unsigned int verify_error_if_negative_size__: w; }; -# define verify_true(R) \ - (!!sizeof (verify_type__<(R) ? 1 : -1>)) -# else -# define verify_true(R) \ - (!!sizeof \ - (struct { unsigned int verify_error_if_negative_size__: (R) ? 1 : -1; })) -# endif - -/* Verify requirement R at compile-time, as a declaration without a - trailing ';'. */ - -# define verify(R) extern int (* verify_function__ (void)) [verify_true (R)] - -#endif diff --git a/lib/wcwidth.h b/lib/wcwidth.h deleted file mode 100644 index 8ed5ff8..0000000 --- a/lib/wcwidth.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Determine the number of screen columns needed for a character. - Copyright (C) 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _gl_WCWIDTH_H -#define _gl_WCWIDTH_H - -#if HAVE_WCHAR_T - -/* Get wcwidth if available, along with wchar_t. */ -# include <wchar.h> - -/* Get iswprint. */ -# include <wctype.h> - -# ifndef HAVE_DECL_WCWIDTH -"this configure-time declaration test was not run" -# endif -# ifndef wcwidth -# if !HAVE_WCWIDTH - -/* wcwidth doesn't exist, so assume all printable characters have - width 1. */ -static inline int -wcwidth (wchar_t wc) -{ - return wc == 0 ? 0 : iswprint (wc) ? 1 : -1; -} - -# elif !HAVE_DECL_WCWIDTH - -/* wcwidth exists but is not declared. */ -extern -# ifdef __cplusplus -"C" -# endif -int wcwidth (int /* actually wchar_t */); - -# endif -# endif - -#endif /* HAVE_WCHAR_T */ - -#endif /* _gl_WCWIDTH_H */ diff --git a/lib/xalloc-die.c b/lib/xalloc-die.c deleted file mode 100644 index 090f060..0000000 --- a/lib/xalloc-die.c +++ /dev/null @@ -1,42 +0,0 @@ -/* Report a memory allocation failure and exit. - - Copyright (C) 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2006 Free - Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#include "xalloc.h" - -#include <stdlib.h> - -#include "error.h" -#include "exitfail.h" - -#include "gettext.h" -#define _(msgid) gettext (msgid) - -void -xalloc_die (void) -{ - error (exit_failure, 0, "%s", _("memory exhausted")); - - /* The `noreturn' cannot be given to error, since it may return if - its first argument is 0. To help compilers understand the - xalloc_die does not return, call abort. Also, the abort is a - safety feature if exit_failure is 0 (which shouldn't happen). */ - abort (); -} diff --git a/lib/xalloc.h b/lib/xalloc.h deleted file mode 100644 index 0c6d8dc..0000000 --- a/lib/xalloc.h +++ /dev/null @@ -1,271 +0,0 @@ -/* xalloc.h -- malloc with out-of-memory checking - - Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2003, 2004, 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef XALLOC_H_ -# define XALLOC_H_ - -# include <stddef.h> - - -# ifdef __cplusplus -extern "C" { -# endif - - -# ifndef __attribute__ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__ -# define __attribute__(x) -# endif -# endif - -# ifndef ATTRIBUTE_NORETURN -# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__)) -# endif - -/* This function is always triggered when memory is exhausted. - It must be defined by the application, either explicitly - or by using gnulib's xalloc-die module. This is the - function to call when one wants the program to die because of a - memory allocation failure. */ -extern void xalloc_die (void) ATTRIBUTE_NORETURN; - -void *xmalloc (size_t s); -void *xzalloc (size_t s); -void *xcalloc (size_t n, size_t s); -void *xrealloc (void *p, size_t s); -void *x2realloc (void *p, size_t *pn); -void *xmemdup (void const *p, size_t s); -char *xstrdup (char const *str); - -/* Return 1 if an array of N objects, each of size S, cannot exist due - to size arithmetic overflow. S must be positive and N must be - nonnegative. This is a macro, not an inline function, so that it - works correctly even when SIZE_MAX < N. - - By gnulib convention, SIZE_MAX represents overflow in size - calculations, so the conservative dividend to use here is - SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value. - However, malloc (SIZE_MAX) fails on all known hosts where - sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for - exactly-SIZE_MAX allocations on such hosts; this avoids a test and - branch when S is known to be 1. */ -# define xalloc_oversized(n, s) \ - ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n)) - - -/* In the following macros, T must be an elementary or structure/union or - typedef'ed type, or a pointer to such a type. To apply one of the - following macros to a function pointer or array type, you need to typedef - it first and use the typedef name. */ - -/* Allocate an object of type T dynamically, with error checking. */ -/* extern t *XMALLOC (typename t); */ -# define XMALLOC(t) ((t *) xmalloc (sizeof (t))) - -/* Allocate memory for N elements of type T, with error checking. */ -/* extern t *XNMALLOC (size_t n, typename t); */ -# define XNMALLOC(n, t) \ - ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))) - -/* Allocate an object of type T dynamically, with error checking, - and zero it. */ -/* extern t *XZALLOC (typename t); */ -# define XZALLOC(t) ((t *) xzalloc (sizeof (t))) - -/* Allocate memory for N elements of type T, with error checking, - and zero it. */ -/* extern t *XCALLOC (size_t n, typename t); */ -# define XCALLOC(n, t) \ - ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))) - - -# if HAVE_INLINE -# define static_inline static inline -# else - void *xnmalloc (size_t n, size_t s); - void *xnrealloc (void *p, size_t n, size_t s); - void *x2nrealloc (void *p, size_t *pn, size_t s); - char *xcharalloc (size_t n); -# endif - -# ifdef static_inline - -/* Allocate an array of N objects, each with S bytes of memory, - dynamically, with error checking. S must be nonzero. */ - -static_inline void * -xnmalloc (size_t n, size_t s) -{ - if (xalloc_oversized (n, s)) - xalloc_die (); - return xmalloc (n * s); -} - -/* Change the size of an allocated block of memory P to an array of N - objects each of S bytes, with error checking. S must be nonzero. */ - -static_inline void * -xnrealloc (void *p, size_t n, size_t s) -{ - if (xalloc_oversized (n, s)) - xalloc_die (); - return xrealloc (p, n * s); -} - -/* If P is null, allocate a block of at least *PN such objects; - otherwise, reallocate P so that it contains more than *PN objects - each of S bytes. *PN must be nonzero unless P is null, and S must - be nonzero. Set *PN to the new number of objects, and return the - pointer to the new block. *PN is never set to zero, and the - returned pointer is never null. - - Repeated reallocations are guaranteed to make progress, either by - allocating an initial block with a nonzero size, or by allocating a - larger block. - - In the following implementation, nonzero sizes are increased by a - factor of approximately 1.5 so that repeated reallocations have - O(N) overall cost rather than O(N**2) cost, but the - specification for this function does not guarantee that rate. - - Here is an example of use: - - int *p = NULL; - size_t used = 0; - size_t allocated = 0; - - void - append_int (int value) - { - if (used == allocated) - p = x2nrealloc (p, &allocated, sizeof *p); - p[used++] = value; - } - - This causes x2nrealloc to allocate a block of some nonzero size the - first time it is called. - - To have finer-grained control over the initial size, set *PN to a - nonzero value before calling this function with P == NULL. For - example: - - int *p = NULL; - size_t used = 0; - size_t allocated = 0; - size_t allocated1 = 1000; - - void - append_int (int value) - { - if (used == allocated) - { - p = x2nrealloc (p, &allocated1, sizeof *p); - allocated = allocated1; - } - p[used++] = value; - } - - */ - -static_inline void * -x2nrealloc (void *p, size_t *pn, size_t s) -{ - size_t n = *pn; - - if (! p) - { - if (! n) - { - /* The approximate size to use for initial small allocation - requests, when the invoking code specifies an old size of - zero. 64 bytes is the largest "small" request for the - GNU C library malloc. */ - enum { DEFAULT_MXFAST = 64 }; - - n = DEFAULT_MXFAST / s; - n += !n; - } - } - else - { - /* Set N = ceil (1.5 * N) so that progress is made if N == 1. - Check for overflow, so that N * S stays in size_t range. - The check is slightly conservative, but an exact check isn't - worth the trouble. */ - if ((size_t) -1 / 3 * 2 / s <= n) - xalloc_die (); - n += (n + 1) / 2; - } - - *pn = n; - return xrealloc (p, n * s); -} - -/* Return a pointer to a new buffer of N bytes. This is like xmalloc, - except it returns char *. */ - -static_inline char * -xcharalloc (size_t n) -{ - return XNMALLOC (n, char); -} - -# endif - -# ifdef __cplusplus -} - -/* C++ does not allow conversions from void * to other pointer types - without a cast. Use templates to work around the problem when - possible. */ - -template <typename T> inline T * -xrealloc (T *p, size_t s) -{ - return (T *) xrealloc ((void *) p, s); -} - -template <typename T> inline T * -xnrealloc (T *p, size_t n, size_t s) -{ - return (T *) xnrealloc ((void *) p, n, s); -} - -template <typename T> inline T * -x2realloc (T *p, size_t *pn) -{ - return (T *) x2realloc ((void *) p, pn); -} - -template <typename T> inline T * -x2nrealloc (T *p, size_t *pn, size_t s) -{ - return (T *) x2nrealloc ((void *) p, pn, s); -} - -template <typename T> inline T * -xmemdup (T const *p, size_t s) -{ - return (T *) xmemdup ((void const *) p, s); -} - -# endif - - -#endif /* !XALLOC_H_ */ diff --git a/lib/xgetcwd.c b/lib/xgetcwd.c deleted file mode 100644 index db5f2d2..0000000 --- a/lib/xgetcwd.c +++ /dev/null @@ -1,41 +0,0 @@ -/* xgetcwd.c -- return current directory with unlimited length - - Copyright (C) 2001, 2003, 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* Written by Jim Meyering. */ - -#include <config.h> - -#include "xgetcwd.h" - -#include <errno.h> - -#include "getcwd.h" -#include "xalloc.h" - -/* Return the current directory, newly allocated. - Upon an out-of-memory error, call xalloc_die. - Upon any other type of error, return NULL. */ - -char * -xgetcwd (void) -{ - char *cwd = getcwd (NULL, 0); - if (! cwd && errno == ENOMEM) - xalloc_die (); - return cwd; -} diff --git a/lib/xgetcwd.h b/lib/xgetcwd.h deleted file mode 100644 index 70afe35..0000000 --- a/lib/xgetcwd.h +++ /dev/null @@ -1,18 +0,0 @@ -/* prototype for xgetcwd - Copyright (C) 1995, 2001, 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -extern char *xgetcwd (void); diff --git a/lib/xmalloc.c b/lib/xmalloc.c deleted file mode 100644 index 318e0dd..0000000 --- a/lib/xmalloc.c +++ /dev/null @@ -1,123 +0,0 @@ -/* xmalloc.c -- malloc with out of memory checking - - Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, - Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -#if ! HAVE_INLINE -# define static_inline -#endif -#include "xalloc.h" -#undef static_inline - -#include <stdlib.h> -#include <string.h> - -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -/* 1 if calloc is known to be compatible with GNU calloc. This - matters if we are not also using the calloc module, which defines - HAVE_CALLOC and supports the GNU API even on non-GNU platforms. */ -#if defined HAVE_CALLOC || defined __GLIBC__ -enum { HAVE_GNU_CALLOC = 1 }; -#else -enum { HAVE_GNU_CALLOC = 0 }; -#endif - -/* Allocate N bytes of memory dynamically, with error checking. */ - -void * -xmalloc (size_t n) -{ - void *p = malloc (n); - if (!p && n != 0) - xalloc_die (); - return p; -} - -/* Change the size of an allocated block of memory P to N bytes, - with error checking. */ - -void * -xrealloc (void *p, size_t n) -{ - p = realloc (p, n); - if (!p && n != 0) - xalloc_die (); - return p; -} - -/* If P is null, allocate a block of at least *PN bytes; otherwise, - reallocate P so that it contains more than *PN bytes. *PN must be - nonzero unless P is null. Set *PN to the new block's size, and - return the pointer to the new block. *PN is never set to zero, and - the returned pointer is never null. */ - -void * -x2realloc (void *p, size_t *pn) -{ - return x2nrealloc (p, pn, 1); -} - -/* Allocate S bytes of zeroed memory dynamically, with error checking. - There's no need for xnzalloc (N, S), since it would be equivalent - to xcalloc (N, S). */ - -void * -xzalloc (size_t s) -{ - return memset (xmalloc (s), 0, s); -} - -/* Allocate zeroed memory for N elements of S bytes, with error - checking. S must be nonzero. */ - -void * -xcalloc (size_t n, size_t s) -{ - void *p; - /* Test for overflow, since some calloc implementations don't have - proper overflow checks. But omit overflow and size-zero tests if - HAVE_GNU_CALLOC, since GNU calloc catches overflow and never - returns NULL if successful. */ - if ((! HAVE_GNU_CALLOC && xalloc_oversized (n, s)) - || (! (p = calloc (n, s)) && (HAVE_GNU_CALLOC || n != 0))) - xalloc_die (); - return p; -} - -/* Clone an object P of size S, with error checking. There's no need - for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any - need for an arithmetic overflow check. */ - -void * -xmemdup (void const *p, size_t s) -{ - return memcpy (xmalloc (s), p, s); -} - -/* Clone STRING. */ - -char * -xstrdup (char const *string) -{ - return xmemdup (string, strlen (string) + 1); -} diff --git a/lib/xstrndup.c b/lib/xstrndup.c deleted file mode 100644 index 7ccefd7..0000000 --- a/lib/xstrndup.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Duplicate a bounded initial segment of a string, with out-of-memory - checking. - Copyright (C) 2003, 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Specification. */ -#include "xstrndup.h" - -#include <string.h> -#include "xalloc.h" - -/* Return a newly allocated copy of at most N bytes of STRING. - In other words, return a copy of the initial segment of length N of - STRING. */ -char * -xstrndup (const char *string, size_t n) -{ - char *s = strndup (string, n); - if (! s) - xalloc_die (); - return s; -} diff --git a/lib/xstrndup.h b/lib/xstrndup.h deleted file mode 100644 index 88354cf..0000000 --- a/lib/xstrndup.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Duplicate a bounded initial segment of a string, with out-of-memory - checking. - Copyright (C) 2003 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <stddef.h> - -/* Return a newly allocated copy of at most N bytes of STRING. - In other words, return a copy of the initial segment of length N of - STRING. */ -extern char *xstrndup (const char *string, size_t n); |