summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile.am28
-rw-r--r--lib/Makefile.in325
-rw-r--r--lib/alloc.h40
-rw-r--r--lib/alloca.c492
-rw-r--r--lib/ansi2knr.119
-rw-r--r--lib/ansi2knr.c439
-rw-r--r--lib/ansidecl.h108
-rw-r--r--lib/basename.c32
-rw-r--r--lib/bitops.c115
-rw-r--r--lib/bitops.h30
-rw-r--r--lib/dirname.c39
-rw-r--r--lib/dynvec.c40
-rw-r--r--lib/dynvec.h16
-rw-r--r--lib/error.c132
-rw-r--r--lib/error.h49
-rw-r--r--lib/filenames.c375
-rw-r--r--lib/filenames.h37
-rw-r--r--lib/fnmatch.c200
-rw-r--r--lib/fnmatch.h67
-rw-r--r--lib/getopt.c770
-rw-r--r--lib/getopt.h130
-rw-r--r--lib/getopt1.c181
-rw-r--r--lib/hash.c295
-rw-r--r--lib/hash.h144
-rw-r--r--lib/idarg.h32
-rw-r--r--lib/idfile.c226
-rw-r--r--lib/idfile.h158
-rw-r--r--lib/idwalk.c1189
-rw-r--r--lib/language.map88
-rw-r--r--lib/misc.c72
-rw-r--r--lib/misc.h37
-rw-r--r--lib/obstack.c493
-rw-r--r--lib/obstack.h519
-rw-r--r--lib/pathmax.h53
-rw-r--r--lib/regex.c5488
-rw-r--r--lib/regex.h495
-rw-r--r--lib/rx.c7190
-rw-r--r--lib/rx.h3732
-rw-r--r--lib/scanners.c1201
-rw-r--r--lib/scanners.h67
-rw-r--r--lib/strcasecmp.c76
-rw-r--r--lib/strdup.c38
-rw-r--r--lib/strndup.c37
-rw-r--r--lib/strtok.c73
-rw-r--r--lib/strxtra.h40
-rw-r--r--lib/system.h45
-rw-r--r--lib/token.c49
-rw-r--r--lib/token.h39
-rw-r--r--lib/xgetcwd.c78
-rw-r--r--lib/xmalloc.c98
-rw-r--r--lib/xmalloc.h12
51 files changed, 25728 insertions, 0 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000..520f05e
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,28 @@
+## Process this file with automake to create Makefile.in
+
+AUTOMAKE_OPTIONS = ansi2knr
+
+noinst_LIBRARIES = idu
+
+EXTRA_DIST = alloca.c error.c getopt.c getopt1.c regex.c rx.c \
+ strcasecmp.c strdup.c strndup.c strtok.c \
+ obstack.c basename.c dirname.c fnmatch.c \
+ ansi2knr.1 ansi2knr.c
+
+idu_SOURCES = misc.c scanners.c idfile.c filenames.c bitops.c token.c \
+ idwalk.c hash.c dynvec.c xmalloc.c xgetcwd.c
+
+idudir = $(prefix)/share
+idu_DATA = language.map
+
+noinst_HEADERS = alloc.h bitops.h error.h filenames.h getopt.h idarg.h \
+ idfile.h misc.h regex.h rx.h scanners.h strxtra.h \
+ token.h system.h hash.h obstack.h pathmax.h dynvec.h \
+ xmalloc.h fnmatch.h ansidecl.h
+
+INCLUDES = -I. -I$(srcdir) \
+ -I../lib -I$(top_srcdir)/lib \
+ -I../intl -I$(top_srcdir)/intl \
+ -I.. -I$(top_srcdir)
+DEFS = -DLOCALEDIR=\"$(localedir)\" -DLANGUAGE_MAP=\"$(idudir)/language.map\" @DEFS@
+idu_LIBADD = @REGEXOBJ@ @LIBOBJS@ @ALLOCA@
diff --git a/lib/Makefile.in b/lib/Makefile.in
new file mode 100644
index 0000000..ef55b85
--- /dev/null
+++ b/lib/Makefile.in
@@ -0,0 +1,325 @@
+# Makefile.in generated automatically by automake 1.0 from Makefile.am
+
+# Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+AUTOMAKE_OPTIONS = ansi2knr
+
+noinst_LIBRARIES = idu
+
+EXTRA_DIST = alloca.c error.c getopt.c getopt1.c regex.c rx.c \
+ strcasecmp.c strdup.c strndup.c strtok.c \
+ obstack.c basename.c dirname.c fnmatch.c \
+ ansi2knr.1 ansi2knr.c
+
+idu_SOURCES = misc.c scanners.c idfile.c filenames.c bitops.c token.c \
+ idwalk.c hash.c dynvec.c xmalloc.c xgetcwd.c
+
+idudir = $(prefix)/share
+idu_DATA = language.map
+
+noinst_HEADERS = alloc.h bitops.h error.h filenames.h getopt.h idarg.h \
+ idfile.h misc.h regex.h rx.h scanners.h strxtra.h \
+ token.h system.h hash.h obstack.h pathmax.h dynvec.h \
+ xmalloc.h fnmatch.h ansidecl.h
+
+INCLUDES = -I. -I$(srcdir) \
+ -I../lib -I$(top_srcdir)/lib \
+ -I../intl -I$(top_srcdir)/intl \
+ -I.. -I$(top_srcdir)
+DEFS = -DLOCALEDIR=\"$(localedir)\" -DLANGUAGE_MAP=\"$(idudir)/language.map\" @DEFS@
+idu_LIBADD = @REGEXOBJ@ @LIBOBJS@ @ALLOCA@
+mkinstalldirs = $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+LIBRARIES = $(noinst_LIBRARIES)
+
+noinst_LIBFILES = libidu.a
+
+CC = @CC@
+LEX = @LEX@
+YACC = @YACC@
+CPPFLAGS = @CPPFLAGS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+
+COMPILE = $(CC) -c $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
+LINK = $(CC) $(LDFLAGS) -o $@
+
+ANSI2KNR = @ANSI2KNR@
+o = .@U@o
+idu_OBJECTS = misc$o scanners$o idfile$o filenames$o bitops$o token$o \
+idwalk$o hash$o dynvec$o xmalloc$o xgetcwd$o
+EXTRA_idu_SOURCES =
+LIBFILES = libidu.a
+AR = ar
+RANLIB = @RANLIB@
+DATA = $(idu_DATA)
+
+HEADERS = $(noinst_HEADERS)
+
+DIST_COMMON = Makefile.am Makefile.in
+
+
+PACKAGE = @PACKAGE@
+VERSION = @VERSION@
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFOS) $(MANS) $(EXTRA_DIST) $(DATA)
+DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFO_DEPS) $(MANS) $(EXTRA_DIST) $(DATA)
+
+TAR = tar
+SOURCES = $(idu_SOURCES)
+OBJECTS = $(idu_OBJECTS)
+
+default: all
+
+
+$(srcdir)/Makefile.in: @MAINT@Makefile.am $(top_srcdir)/configure.in
+ cd $(top_srcdir) && automake $(subdir)/Makefile
+
+Makefile: $(top_builddir)/config.status Makefile.in
+ cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status
+
+mostlyclean-noinstLIBRARIES:
+
+clean-noinstLIBRARIES:
+ rm -f $(noinst_LIBFILES)
+
+distclean-noinstLIBRARIES:
+
+maintainer-clean-noinstLIBRARIES:
+
+.c.o:
+ $(COMPILE) $<
+
+mostlyclean-compile:
+ rm -f *.o core
+
+clean-compile:
+
+distclean-compile:
+ rm -f *.tab.c
+
+maintainer-clean-compile:
+
+.c._c:
+ $(ANSI2KNR) $< > $*.tmp && mv $*.tmp $@
+
+._c._o:
+ @echo $(COMPILE) $<
+ @rm -f _$*.c
+ @ln $< _$*.c && $(COMPILE) _$*.c && mv _$*.o $@ && rm _$*.c
+
+.c._o:
+ $(ANSI2KNR) $< > $*.tmp && mv $*.tmp $*._c
+ @echo $(COMPILE) $*._c
+ @rm -f _$*.c
+ @ln $*._c _$*.c && $(COMPILE) _$*.c && mv _$*.o $@ && rm _$*.c
+
+ansi2knr: ansi2knr.o
+ $(LINK) ansi2knr.o $(LIBS)
+
+$(OBJECTS): $(ANSI2KNR)
+ansi2knr.o: $(CONFIG_HEADER)
+
+mostlyclean-kr:
+ rm -f *._o *._c _*.c _*.o
+
+clean-kr:
+
+distclean-kr:
+ rm -f ansi2knr
+
+maintainer-clean-kr:
+$(idu_OBJECTS): ../config.h
+
+libidu.a: $(idu_OBJECTS) $(idu_LIBADD)
+ rm -f libidu.a
+ $(AR) cru libidu.a $(idu_OBJECTS) $(idu_LIBADD)
+ $(RANLIB) libidu.a
+
+install-iduDATA: $(idu_DATA)
+ $(mkinstalldirs) $(idudir)
+ list="$(idu_DATA)"; for p in $$list; do \
+ if test -f $(srcdir)/$$p; then \
+ $(INSTALL_DATA) $(srcdir)/$$p $(idudir)/$$p; \
+ else if test -f $$p; then \
+ $(INSTALL_DATA) $$p $(idudir)/$$p; \
+ fi; fi; \
+ done
+
+uninstall-iduDATA:
+ list="$(idu_DATA)"; for p in $$list; do \
+ rm -f $(idudir)/$$p; \
+ done
+
+ID: $(HEADERS) $(SOURCES)
+ here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS)
+
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES)
+ here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ rm -f TAGS ID
+
+maintainer-clean-tags:
+
+subdir = lib
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+distdir: $(DEP_DISTFILES)
+ @for file in `cd $(srcdir) && echo $(DISTFILES)`; do \
+ test -f $(distdir)/$$file \
+ || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \
+ || cp -p $(srcdir)/$$file $(distdir)/$$file; \
+ done
+alloca.o: alloca.c
+basename.o: basename.c
+bitops.o: bitops.c bitops.h
+dirname.o: dirname.c strxtra.h
+dynvec.o: dynvec.c dynvec.h alloc.h
+error.o: error.c
+filenames$o: filenames.c system.h \
+ strxtra.h filenames.h misc.h \
+ error.h
+fnmatch.o: fnmatch.c
+getopt.o: getopt.c
+getopt1.o: getopt1.c getopt.h
+hash.o: hash.c hash.h alloc.h \
+ error.h
+idfile.o: idfile.c alloc.h idfile.h \
+ hash.h strxtra.h error.h
+idwalk$o: idwalk.c system.h idfile.h \
+ hash.h error.h alloc.h \
+ dynvec.h strxtra.h scanners.h \
+ pathmax.h
+memcpy.o: memcpy.c
+memset.o: memset.c
+misc.o: misc.c system.h strxtra.h \
+ misc.h
+scanners$o: scanners.c system.h error.h \
+ strxtra.h token.h alloc.h \
+ scanners.h
+strcasecmp.o: strcasecmp.c
+strdup.o: strdup.c
+strerror.o: strerror.c
+strndup$o: strndup.c
+strtok.o: strtok.c
+token.o: token.c token.h
+xgetcwd.o: xgetcwd.c pathmax.h
+xmalloc.o: xmalloc.c error.h
+
+info:
+
+dvi:
+
+check: all
+
+installcheck:
+
+install-exec:
+
+install-data: install-iduDATA
+
+install: install-exec install-data all
+ @:
+
+uninstall: uninstall-iduDATA
+
+all: $(LIBFILES) $(DATA) $(HEADERS) Makefile
+
+install-strip:
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
+installdirs:
+ $(mkinstalldirs) $(idudir)
+
+
+mostlyclean-generic:
+ test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+ test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ rm -f Makefile $(DISTCLEANFILES)
+ rm -f config.cache config.log $(CONFIG_HEADER) stamp-h
+
+maintainer-clean-generic:
+ test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+ test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+mostlyclean: mostlyclean-noinstLIBRARIES mostlyclean-compile \
+ mostlyclean-kr mostlyclean-tags mostlyclean-generic
+
+clean: clean-noinstLIBRARIES clean-compile clean-kr clean-tags \
+ clean-generic mostlyclean
+
+distclean: distclean-noinstLIBRARIES distclean-compile distclean-kr \
+ distclean-tags distclean-generic clean
+ rm -f config.status
+
+maintainer-clean: maintainer-clean-noinstLIBRARIES \
+ maintainer-clean-compile maintainer-clean-kr \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+.PHONY: default mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \
+clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-kr distclean-kr clean-kr \
+maintainer-clean-kr uninstall-iduDATA install-iduDATA tags \
+mostlyclean-tags distclean-tags clean-tags maintainer-clean-tags \
+distdir info dvi check installcheck install-exec install-data install \
+uninstall all installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+.SUFFIXES:
+.SUFFIXES: .c .o ._c ._o
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/alloc.h b/lib/alloc.h
new file mode 100644
index 0000000..bdcbf07
--- /dev/null
+++ b/lib/alloc.h
@@ -0,0 +1,40 @@
+/* alloc.h -- convenient interface macros for malloc(3) & friends
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _alloc_h_
+#define _alloc_h_
+
+#if HAVE_STDLIB_H
+#include <stdlib.h>
+#else /* not HAVE_STDLIB_H */
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+#endif /* not HAVE_STDLIB_H */
+#include <xmalloc.h>
+#include <obstack.h>
+
+#define CALLOC(t, n) ((t *) calloc (sizeof (t), (n)))
+#define MALLOC(t, n) ((t *) xmalloc (sizeof (t) * (n)))
+#define REALLOC(o, t, n) ((t *) xrealloc ((o), sizeof (t) * (n)))
+#define CLONE(o, t, n) ((t *) memcpy (MALLOC (t, (n)), (o), sizeof (t) * (n)))
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+#define OBSTACK_ALLOC(obs, t, n) ((t *)obstack_alloc ((obs), (n)*sizeof(t)))
+
+#endif /* not _alloc_h_ */
diff --git a/lib/alloca.c b/lib/alloca.c
new file mode 100644
index 0000000..7020f32
--- /dev/null
+++ b/lib/alloca.c
@@ -0,0 +1,492 @@
+/* alloca.c -- allocate automatically reclaimed memory
+ (Mostly) portable public-domain implementation -- D A Gwyn
+
+ This implementation of the PWB library alloca function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+ J.Otto Tennant <jot@cray.com> contributed the Cray support.
+
+ There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef emacs
+#include "blockinput.h"
+#endif
+
+/* If compiling with GCC 2, this file's not needed. */
+#if !defined (__GNUC__) || __GNUC__ < 2
+
+/* If someone has defined alloca as a macro,
+ there must be some other way alloca is supposed to work. */
+#ifndef alloca
+
+#ifdef emacs
+#ifdef static
+/* actually, only want this if static is defined as ""
+ -- this is for usg, in which emacs must undefine static
+ in order to make unexec workable
+ */
+#ifndef STACK_DIRECTION
+you
+lose
+-- must know STACK_DIRECTION at compile-time
+#endif /* STACK_DIRECTION undefined */
+#endif /* static */
+#endif /* emacs */
+
+/* If your stack is a linked list of frames, you have to
+ provide an "address metric" ADDRESS_FUNCTION macro. */
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+long i00afunc ();
+#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
+#else
+#define ADDRESS_FUNCTION(arg) &(arg)
+#endif
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+#define NULL 0
+
+/* Different portions of Emacs need to call different versions of
+ malloc. The Emacs executable needs alloca to call xmalloc, because
+ ordinary malloc isn't protected from input signals. On the other
+ hand, the utilities in lib-src need alloca to call malloc; some of
+ them are very simple, and don't have an xmalloc routine.
+
+ Non-Emacs programs expect this to call use xmalloc.
+
+ Callers below should use malloc. */
+
+#ifndef emacs
+#define malloc xmalloc
+#endif
+extern pointer malloc ();
+
+/* Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+
+#ifndef STACK_DIRECTION
+#define STACK_DIRECTION 0 /* Direction unknown. */
+#endif
+
+#if STACK_DIRECTION != 0
+
+#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
+
+#else /* STACK_DIRECTION == 0; need run-time code. */
+
+static int stack_dir; /* 1 or -1 once known. */
+#define STACK_DIR stack_dir
+
+static void
+find_stack_direction ()
+{
+ static char *addr = NULL; /* Address of first `dummy', once known. */
+ auto char dummy; /* To get stack address. */
+
+ if (addr == NULL)
+ { /* Initial entry. */
+ addr = ADDRESS_FUNCTION (dummy);
+
+ find_stack_direction (); /* Recurse once. */
+ }
+ else
+ {
+ /* Second entry. */
+ if (ADDRESS_FUNCTION (dummy) > addr)
+ stack_dir = 1; /* Stack grew upward. */
+ else
+ stack_dir = -1; /* Stack grew downward. */
+ }
+}
+
+#endif /* STACK_DIRECTION == 0 */
+
+/* An "alloca header" is used to:
+ (a) chain together all alloca'ed blocks;
+ (b) keep track of stack depth.
+
+ It is very important that sizeof(header) agree with malloc
+ alignment chunk size. The following default should work okay. */
+
+#ifndef ALIGN_SIZE
+#define ALIGN_SIZE sizeof(double)
+#endif
+
+typedef union hdr
+{
+ char align[ALIGN_SIZE]; /* To force sizeof(header). */
+ struct
+ {
+ union hdr *next; /* For chaining headers. */
+ char *deep; /* For stack depth measure. */
+ } h;
+} header;
+
+static header *last_alloca_header = NULL; /* -> last alloca header. */
+
+/* Return a pointer to at least SIZE bytes of storage,
+ which will be automatically reclaimed upon exit from
+ the procedure that called alloca. Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32. */
+
+pointer
+alloca (size)
+ unsigned size;
+{
+ auto char probe; /* Probes stack depth: */
+ register char *depth = ADDRESS_FUNCTION (probe);
+
+#if STACK_DIRECTION == 0
+ if (STACK_DIR == 0) /* Unknown growth direction. */
+ find_stack_direction ();
+#endif
+
+ /* Reclaim garbage, defined as all alloca'd storage that
+ was allocated from deeper in the stack than currently. */
+
+ {
+ register header *hp; /* Traverses linked list. */
+
+#ifdef emacs
+ BLOCK_INPUT;
+#endif
+
+ for (hp = last_alloca_header; hp != NULL;)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
+ {
+ register header *np = hp->h.next;
+
+ free ((pointer) hp); /* Collect garbage. */
+
+ hp = np; /* -> next header. */
+ }
+ else
+ break; /* Rest are not deeper. */
+
+ last_alloca_header = hp; /* -> last valid storage. */
+
+#ifdef emacs
+ UNBLOCK_INPUT;
+#endif
+ }
+
+ if (size == 0)
+ return NULL; /* No allocation required. */
+
+ /* Allocate combined header + user data storage. */
+
+ {
+ register pointer new = malloc (sizeof (header) + size);
+ /* Address of header. */
+
+ ((header *) new)->h.next = last_alloca_header;
+ ((header *) new)->h.deep = depth;
+
+ last_alloca_header = (header *) new;
+
+ /* User storage begins just after header. */
+
+ return (pointer) ((char *) new + sizeof (header));
+ }
+}
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+
+#ifdef DEBUG_I00AFUNC
+#include <stdio.h>
+#endif
+
+#ifndef CRAY_STACK
+#define CRAY_STACK
+#ifndef CRAY2
+/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
+struct stack_control_header
+ {
+ long shgrow:32; /* Number of times stack has grown. */
+ long shaseg:32; /* Size of increments to stack. */
+ long shhwm:32; /* High water mark of stack. */
+ long shsize:32; /* Current size of stack (all segments). */
+ };
+
+/* The stack segment linkage control information occurs at
+ the high-address end of a stack segment. (The stack
+ grows from low addresses to high addresses.) The initial
+ part of the stack segment linkage control information is
+ 0200 (octal) words. This provides for register storage
+ for the routine which overflows the stack. */
+
+struct stack_segment_linkage
+ {
+ long ss[0200]; /* 0200 overflow words. */
+ long sssize:32; /* Number of words in this segment. */
+ long ssbase:32; /* Offset to stack base. */
+ long:32;
+ long sspseg:32; /* Offset to linkage control of previous
+ segment of stack. */
+ long:32;
+ long sstcpt:32; /* Pointer to task common address block. */
+ long sscsnm; /* Private control structure number for
+ microtasking. */
+ long ssusr1; /* Reserved for user. */
+ long ssusr2; /* Reserved for user. */
+ long sstpid; /* Process ID for pid based multi-tasking. */
+ long ssgvup; /* Pointer to multitasking thread giveup. */
+ long sscray[7]; /* Reserved for Cray Research. */
+ long ssa0;
+ long ssa1;
+ long ssa2;
+ long ssa3;
+ long ssa4;
+ long ssa5;
+ long ssa6;
+ long ssa7;
+ long sss0;
+ long sss1;
+ long sss2;
+ long sss3;
+ long sss4;
+ long sss5;
+ long sss6;
+ long sss7;
+ };
+
+#else /* CRAY2 */
+/* The following structure defines the vector of words
+ returned by the STKSTAT library routine. */
+struct stk_stat
+ {
+ long now; /* Current total stack size. */
+ long maxc; /* Amount of contiguous space which would
+ be required to satisfy the maximum
+ stack demand to date. */
+ long high_water; /* Stack high-water mark. */
+ long overflows; /* Number of stack overflow ($STKOFEN) calls. */
+ long hits; /* Number of internal buffer hits. */
+ long extends; /* Number of block extensions. */
+ long stko_mallocs; /* Block allocations by $STKOFEN. */
+ long underflows; /* Number of stack underflow calls ($STKRETN). */
+ long stko_free; /* Number of deallocations by $STKRETN. */
+ long stkm_free; /* Number of deallocations by $STKMRET. */
+ long segments; /* Current number of stack segments. */
+ long maxs; /* Maximum number of stack segments so far. */
+ long pad_size; /* Stack pad size. */
+ long current_address; /* Current stack segment address. */
+ long current_size; /* Current stack segment size. This
+ number is actually corrupted by STKSTAT to
+ include the fifteen word trailer area. */
+ long initial_address; /* Address of initial segment. */
+ long initial_size; /* Size of initial segment. */
+ };
+
+/* The following structure describes the data structure which trails
+ any stack segment. I think that the description in 'asdef' is
+ out of date. I only describe the parts that I am sure about. */
+
+struct stk_trailer
+ {
+ long this_address; /* Address of this block. */
+ long this_size; /* Size of this block (does not include
+ this trailer). */
+ long unknown2;
+ long unknown3;
+ long link; /* Address of trailer block of previous
+ segment. */
+ long unknown5;
+ long unknown6;
+ long unknown7;
+ long unknown8;
+ long unknown9;
+ long unknown10;
+ long unknown11;
+ long unknown12;
+ long unknown13;
+ long unknown14;
+ };
+
+#endif /* CRAY2 */
+#endif /* not CRAY_STACK */
+
+#ifdef CRAY2
+/* Determine a "stack measure" for an arbitrary ADDRESS.
+ I doubt that "lint" will like this much. */
+
+static long
+i00afunc (long *address)
+{
+ struct stk_stat status;
+ struct stk_trailer *trailer;
+ long *block, size;
+ long result = 0;
+
+ /* We want to iterate through all of the segments. The first
+ step is to get the stack status structure. We could do this
+ more quickly and more directly, perhaps, by referencing the
+ $LM00 common block, but I know that this works. */
+
+ STKSTAT (&status);
+
+ /* Set up the iteration. */
+
+ trailer = (struct stk_trailer *) (status.current_address
+ + status.current_size
+ - 15);
+
+ /* There must be at least one stack segment. Therefore it is
+ a fatal error if "trailer" is null. */
+
+ if (trailer == 0)
+ abort ();
+
+ /* Discard segments that do not contain our argument address. */
+
+ while (trailer != 0)
+ {
+ block = (long *) trailer->this_address;
+ size = trailer->this_size;
+ if (block == 0 || size == 0)
+ abort ();
+ trailer = (struct stk_trailer *) trailer->link;
+ if ((block <= address) && (address < (block + size)))
+ break;
+ }
+
+ /* Set the result to the offset in this segment and add the sizes
+ of all predecessor segments. */
+
+ result = address - block;
+
+ if (trailer == 0)
+ {
+ return result;
+ }
+
+ do
+ {
+ if (trailer->this_size <= 0)
+ abort ();
+ result += trailer->this_size;
+ trailer = (struct stk_trailer *) trailer->link;
+ }
+ while (trailer != 0);
+
+ /* We are done. Note that if you present a bogus address (one
+ not in any segment), you will get a different number back, formed
+ from subtracting the address of the first block. This is probably
+ not what you want. */
+
+ return (result);
+}
+
+#else /* not CRAY2 */
+/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
+ Determine the number of the cell within the stack,
+ given the address of the cell. The purpose of this
+ routine is to linearize, in some sense, stack addresses
+ for alloca. */
+
+static long
+i00afunc (long address)
+{
+ long stkl = 0;
+
+ long size, pseg, this_segment, stack;
+ long result = 0;
+
+ struct stack_segment_linkage *ssptr;
+
+ /* Register B67 contains the address of the end of the
+ current stack segment. If you (as a subprogram) store
+ your registers on the stack and find that you are past
+ the contents of B67, you have overflowed the segment.
+
+ B67 also points to the stack segment linkage control
+ area, which is what we are really interested in. */
+
+ stkl = CRAY_STACKSEG_END ();
+ ssptr = (struct stack_segment_linkage *) stkl;
+
+ /* If one subtracts 'size' from the end of the segment,
+ one has the address of the first word of the segment.
+
+ If this is not the first segment, 'pseg' will be
+ nonzero. */
+
+ pseg = ssptr->sspseg;
+ size = ssptr->sssize;
+
+ this_segment = stkl - size;
+
+ /* It is possible that calling this routine itself caused
+ a stack overflow. Discard stack segments which do not
+ contain the target address. */
+
+ while (!(this_segment <= address && address <= stkl))
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
+#endif
+ if (pseg == 0)
+ break;
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ this_segment = stkl - size;
+ }
+
+ result = address - this_segment;
+
+ /* If you subtract pseg from the current end of the stack,
+ you get the address of the previous stack segment's end.
+ This seems a little convoluted to me, but I'll bet you save
+ a cycle somewhere. */
+
+ while (pseg != 0)
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o\n", pseg, size);
+#endif
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ result += size;
+ }
+ return (result);
+}
+
+#endif /* not CRAY2 */
+#endif /* CRAY */
+
+#endif /* no alloca */
+#endif /* not GCC version 2 */
diff --git a/lib/ansi2knr.1 b/lib/ansi2knr.1
new file mode 100644
index 0000000..434ce8f
--- /dev/null
+++ b/lib/ansi2knr.1
@@ -0,0 +1,19 @@
+.TH ANSI2KNR 1 "31 December 1990"
+.SH NAME
+ansi2knr \- convert ANSI C to Kernighan & Ritchie C
+.SH SYNOPSIS
+.I ansi2knr
+input_file output_file
+.SH DESCRIPTION
+If no output_file is supplied, output goes to stdout.
+.br
+There are no error messages.
+.sp
+.I ansi2knr
+recognizes functions by seeing a non-keyword identifier at the left margin, followed by a left parenthesis, with a right parenthesis as the last character on the line. It will recognize a multi-line header if the last character on each line but the last is a left parenthesis or comma. These algorithms ignore whitespace and comments, except that the function name must be the first thing on the line.
+.sp
+The following constructs will confuse it:
+.br
+ - Any other construct that starts at the left margin and follows the above syntax (such as a macro or function call).
+.br
+ - Macros that tinker with the syntax of the function header.
diff --git a/lib/ansi2knr.c b/lib/ansi2knr.c
new file mode 100644
index 0000000..9bcc4ad
--- /dev/null
+++ b/lib/ansi2knr.c
@@ -0,0 +1,439 @@
+/* Copyright (C) 1989, 1991, 1993, 1994 Aladdin Enterprises. All rights reserved. */
+
+/* ansi2knr.c */
+/* Convert ANSI function declarations to K&R syntax */
+
+/*
+ansi2knr is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY. No author or distributor accepts responsibility
+to anyone for the consequences of using it or for whether it serves any
+particular purpose or works at all, unless he says so in writing. Refer
+to the GNU General Public License for full details.
+
+Everyone is granted permission to copy, modify and redistribute
+ansi2knr, but only under the conditions described in the GNU
+General Public License. A copy of this license is supposed to have been
+given to you along with ansi2knr so you can know your rights and
+responsibilities. It should be in a file named COPYLEFT. Among other
+things, the copyright notice and this notice must be preserved on all
+copies.
+*/
+
+/*
+ * Usage:
+ ansi2knr [--varargs] input_file [output_file]
+ * If no output_file is supplied, output goes to stdout.
+ * There are no error messages.
+ *
+ * ansi2knr recognizes function definitions by seeing a non-keyword
+ * identifier at the left margin, followed by a left parenthesis,
+ * with a right parenthesis as the last character on the line.
+ * It will recognize a multi-line header provided that the last character
+ * of the last line of the header is a right parenthesis,
+ * and no intervening line ends with a left brace or a semicolon.
+ * These algorithms ignore whitespace and comments, except that
+ * the function name must be the first thing on the line.
+ * The following constructs will confuse it:
+ * - Any other construct that starts at the left margin and
+ * follows the above syntax (such as a macro or function call).
+ * - Macros that tinker with the syntax of the function header.
+ *
+ * If the --varargs switch is supplied, ansi2knr will attempt to
+ * convert a ... argument to va_alist and va_dcl. If this switch is not
+ * supplied, ansi2knr will simply drop any such arguments.
+ */
+
+/*
+ * The original and principal author of ansi2knr is L. Peter Deutsch
+ * <ghost@aladdin.com>. Other authors are noted in the change history
+ * that follows (in reverse chronological order):
+ lpd 94-10-10 removed CONFIG_BROKETS conditional
+ lpd 94-07-16 added some conditionals to help GNU `configure',
+ suggested by Francois Pinard <pinard@iro.umontreal.ca>;
+ properly erase prototype args in function parameters,
+ contributed by Jim Avera <jima@netcom.com>;
+ correct error in writeblanks (it shouldn't erase EOLs)
+ lpd 89-xx-xx original version
+ */
+
+/* Most of the conditionals here are to make ansi2knr work with */
+/* the GNU configure machinery. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+#ifdef HAVE_CONFIG_H
+
+/*
+ For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
+ This will define HAVE_CONFIG_H and so, activate the following lines.
+ */
+
+# if STDC_HEADERS || HAVE_STRING_H
+# include <string.h>
+# else
+# include <strings.h>
+# endif
+
+#else /* not HAVE_CONFIG_H */
+
+/*
+ Without AC_CONFIG_HEADER, merely use <string.h> as in the original
+ Ghostscript distribution. This loses on older BSD systems.
+ */
+
+# include <string.h>
+
+#endif /* not HAVE_CONFIG_H */
+
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+#else
+/*
+ malloc and free should be declared in stdlib.h,
+ but if you've got a K&R compiler, they probably aren't.
+ */
+char *malloc();
+void free();
+#endif
+
+/* Scanning macros */
+#define isidchar(ch) (isalnum(ch) || (ch) == '_')
+#define isidfirstchar(ch) (isalpha(ch) || (ch) == '_')
+
+/* Forward references */
+char *skipspace();
+void writeblanks();
+int test1();
+int convert1();
+
+/* The main program */
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{ FILE *in, *out;
+#define bufsize 5000 /* arbitrary size */
+ char *buf;
+ char *line;
+ int convert_varargs = 0;
+ if ( argc > 1 && argv[1][0] == '-' )
+ { if ( !strcmp(argv[1], "--varargs") )
+ { convert_varargs = 1;
+ argc--;
+ argv++;
+ }
+ else
+ { fprintf(stderr, "Unrecognized switch: %s\n", argv[1]);
+ exit(1);
+ }
+ }
+ switch ( argc )
+ {
+ default:
+ printf("Usage: ansi2knr [--varargs] input_file [output_file]\n");
+ exit(0);
+ case 2:
+ out = stdout;
+ break;
+ case 3:
+ out = fopen(argv[2], "w");
+ if ( out == NULL )
+ { fprintf(stderr, "Cannot open output file %s\n", argv[2]);
+ exit(1);
+ }
+ }
+ in = fopen(argv[1], "r");
+ if ( in == NULL )
+ { fprintf(stderr, "Cannot open input file %s\n", argv[1]);
+ exit(1);
+ }
+ fprintf(out, "#line 1 \"%s\"\n", argv[1]);
+ buf = malloc(bufsize);
+ line = buf;
+ while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
+ { switch ( test1(buf) )
+ {
+ case 2: /* a function header */
+ convert1(buf, out, 1, convert_varargs);
+ break;
+ case 1: /* a function */
+ convert1(buf, out, 0, convert_varargs);
+ break;
+ case -1: /* maybe the start of a function */
+ line = buf + strlen(buf);
+ if ( line != buf + (bufsize - 1) ) /* overflow check */
+ continue;
+ /* falls through */
+ default: /* not a function */
+ fputs(buf, out);
+ break;
+ }
+ line = buf;
+ }
+ if ( line != buf ) fputs(buf, out);
+ free(buf);
+ fclose(out);
+ fclose(in);
+ return 0;
+}
+
+/* Skip over space and comments, in either direction. */
+char *
+skipspace(p, dir)
+ register char *p;
+ register int dir; /* 1 for forward, -1 for backward */
+{ for ( ; ; )
+ { while ( isspace(*p) ) p += dir;
+ if ( !(*p == '/' && p[dir] == '*') ) break;
+ p += dir; p += dir;
+ while ( !(*p == '*' && p[dir] == '/') )
+ { if ( *p == 0 ) return p; /* multi-line comment?? */
+ p += dir;
+ }
+ p += dir; p += dir;
+ }
+ return p;
+}
+
+/*
+ * Write blanks over part of a string.
+ * Don't overwrite end-of-line characters.
+ */
+void
+writeblanks(start, end)
+ char *start;
+ char *end;
+{ char *p;
+ for ( p = start; p < end; p++ )
+ if ( *p != '\r' && *p != '\n' ) *p = ' ';
+}
+
+/*
+ * Test whether the string in buf is a function definition.
+ * The string may contain and/or end with a newline.
+ * Return as follows:
+ * 0 - definitely not a function definition;
+ * 1 - definitely a function definition;
+ * 2 - definitely a function prototype (NOT USED);
+ * -1 - may be the beginning of a function definition,
+ * append another line and look again.
+ * The reason we don't attempt to convert function prototypes is that
+ * Ghostscript's declaration-generating macros look too much like
+ * prototypes, and confuse the algorithms.
+ */
+int
+test1(buf)
+ char *buf;
+{ register char *p = buf;
+ char *bend;
+ char *endfn;
+ int contin;
+ if ( !isidfirstchar(*p) )
+ return 0; /* no name at left margin */
+ bend = skipspace(buf + strlen(buf) - 1, -1);
+ switch ( *bend )
+ {
+ case ';': contin = 0 /*2*/; break;
+ case ')': contin = 1; break;
+ case '{': return 0; /* not a function */
+ default: contin = -1;
+ }
+ while ( isidchar(*p) ) p++;
+ endfn = p;
+ p = skipspace(p, 1);
+ if ( *p++ != '(' )
+ return 0; /* not a function */
+ p = skipspace(p, 1);
+ if ( *p == ')' )
+ return 0; /* no parameters */
+ /* Check that the apparent function name isn't a keyword. */
+ /* We only need to check for keywords that could be followed */
+ /* by a left parenthesis (which, unfortunately, is most of them). */
+ { static char *words[] =
+ { "asm", "auto", "case", "char", "const", "double",
+ "extern", "float", "for", "if", "int", "long",
+ "register", "return", "short", "signed", "sizeof",
+ "static", "switch", "typedef", "unsigned",
+ "void", "volatile", "while", 0
+ };
+ char **key = words;
+ char *kp;
+ int len = endfn - buf;
+ while ( (kp = *key) != 0 )
+ { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
+ return 0; /* name is a keyword */
+ key++;
+ }
+ }
+ return contin;
+}
+
+/* Convert a recognized function definition or header to K&R syntax. */
+int
+convert1(buf, out, header, convert_varargs)
+ char *buf;
+ FILE *out;
+ int header; /* Boolean */
+ int convert_varargs; /* Boolean */
+{ char *endfn;
+ register char *p;
+ char **breaks;
+ unsigned num_breaks = 2; /* for testing */
+ char **btop;
+ char **bp;
+ char **ap;
+ char *vararg = 0;
+ /* Pre-ANSI implementations don't agree on whether strchr */
+ /* is called strchr or index, so we open-code it here. */
+ for ( endfn = buf; *(endfn++) != '('; ) ;
+top: p = endfn;
+ breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
+ if ( breaks == 0 )
+ { /* Couldn't allocate break table, give up */
+ fprintf(stderr, "Unable to allocate break table!\n");
+ fputs(buf, out);
+ return -1;
+ }
+ btop = breaks + num_breaks * 2 - 2;
+ bp = breaks;
+ /* Parse the argument list */
+ do
+ { int level = 0;
+ char *lp = NULL;
+ char *rp;
+ char *end = NULL;
+ if ( bp >= btop )
+ { /* Filled up break table. */
+ /* Allocate a bigger one and start over. */
+ free((char *)breaks);
+ num_breaks <<= 1;
+ goto top;
+ }
+ *bp++ = p;
+ /* Find the end of the argument */
+ for ( ; end == NULL; p++ )
+ { switch(*p)
+ {
+ case ',':
+ if ( !level ) end = p;
+ break;
+ case '(':
+ if ( !level ) lp = p;
+ level++;
+ break;
+ case ')':
+ if ( --level < 0 ) end = p;
+ else rp = p;
+ break;
+ case '/':
+ p = skipspace(p, 1) - 1;
+ break;
+ default:
+ ;
+ }
+ }
+ /* Erase any embedded prototype parameters. */
+ if ( lp )
+ writeblanks(lp + 1, rp);
+ p--; /* back up over terminator */
+ /* Find the name being declared. */
+ /* This is complicated because of procedure and */
+ /* array modifiers. */
+ for ( ; ; )
+ { p = skipspace(p - 1, -1);
+ switch ( *p )
+ {
+ case ']': /* skip array dimension(s) */
+ case ')': /* skip procedure args OR name */
+ { int level = 1;
+ while ( level )
+ switch ( *--p )
+ {
+ case ']': case ')': level++; break;
+ case '[': case '(': level--; break;
+ case '/': p = skipspace(p, -1) + 1; break;
+ default: ;
+ }
+ }
+ if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
+ { /* We found the name being declared */
+ while ( !isidfirstchar(*p) )
+ p = skipspace(p, 1) + 1;
+ goto found;
+ }
+ break;
+ default: goto found;
+ }
+ }
+found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
+ { if ( convert_varargs )
+ { *bp++ = "va_alist";
+ vararg = p-2;
+ }
+ else
+ { p++;
+ if ( bp == breaks + 1 ) /* sole argument */
+ writeblanks(breaks[0], p);
+ else
+ writeblanks(bp[-1] - 1, p);
+ bp--;
+ }
+ }
+ else
+ { while ( isidchar(*p) ) p--;
+ *bp++ = p+1;
+ }
+ p = end;
+ }
+ while ( *p++ == ',' );
+ *bp = p;
+ /* Make a special check for 'void' arglist */
+ if ( bp == breaks+2 )
+ { p = skipspace(breaks[0], 1);
+ if ( !strncmp(p, "void", 4) )
+ { p = skipspace(p+4, 1);
+ if ( p == breaks[2] - 1 )
+ { bp = breaks; /* yup, pretend arglist is empty */
+ writeblanks(breaks[0], p + 1);
+ }
+ }
+ }
+ /* Put out the function name and left parenthesis. */
+ p = buf;
+ while ( p != endfn ) putc(*p, out), p++;
+ /* Put out the declaration. */
+ if ( header )
+ { fputs(");", out);
+ for ( p = breaks[0]; *p; p++ )
+ if ( *p == '\r' || *p == '\n' )
+ putc(*p, out);
+ }
+ else
+ { for ( ap = breaks+1; ap < bp; ap += 2 )
+ { p = *ap;
+ while ( isidchar(*p) )
+ putc(*p, out), p++;
+ if ( ap < bp - 1 )
+ fputs(", ", out);
+ }
+ fputs(") ", out);
+ /* Put out the argument declarations */
+ for ( ap = breaks+2; ap <= bp; ap += 2 )
+ (*ap)[-1] = ';';
+ if ( vararg != 0 )
+ { *vararg = 0;
+ fputs(breaks[0], out); /* any prior args */
+ fputs("va_dcl", out); /* the final arg */
+ fputs(bp[0], out);
+ }
+ else
+ fputs(breaks[0], out);
+ }
+ free((char *)breaks);
+ return 0;
+}
diff --git a/lib/ansidecl.h b/lib/ansidecl.h
new file mode 100644
index 0000000..c351653
--- /dev/null
+++ b/lib/ansidecl.h
@@ -0,0 +1,108 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the GNU C Library; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* ANSI and traditional C compatibility macros
+
+ ANSI C is assumed if __STDC__ is #defined.
+
+ Macro ANSI C definition Traditional C definition
+ ----- ---- - ---------- ----------- - ----------
+ PTR `void *' `char *'
+ LONG_DOUBLE `long double' `double'
+ CONST `const' `'
+ VOLATILE `volatile' `'
+ SIGNED `signed' `'
+ PTRCONST `void *const' `char *'
+
+ DEFUN(name, arglist, args)
+
+ Defines function NAME.
+
+ ARGLIST lists the arguments, separated by commas and enclosed in
+ parentheses. ARGLIST becomes the argument list in traditional C.
+
+ ARGS list the arguments with their types. It becomes a prototype in
+ ANSI C, and the type declarations in traditional C. Arguments should
+ be separated with `AND'. For functions with a variable number of
+ arguments, the last thing listed should be `DOTS'.
+
+ DEFUN_VOID(name)
+
+ Defines a function NAME, which takes no arguments.
+
+ EXFUN(name, prototype)
+
+ Is used in an external function declaration.
+ In ANSI C it is `NAMEPROTOTYPE' (so PROTOTYPE should be enclosed in
+ parentheses). In traditional C it is `NAME()'.
+ For a function that takes no arguments, PROTOTYPE should be `(NOARGS)'.
+
+ For example:
+ extern int EXFUN(printf, (CONST char *format DOTS));
+ int DEFUN(fprintf, (stream, format),
+ FILE *stream AND CONST char *format DOTS) { ... }
+ void DEFUN_VOID(abort) { ... }
+*/
+
+#ifndef _ANSIDECL_H
+
+#define _ANSIDECL_H 1
+
+
+/* Every source file includes this file,
+ so they will all get the switch for lint. */
+/* LINTLIBRARY */
+
+
+#ifdef __STDC__
+
+#define PTR void *
+#define PTRCONST void *CONST
+#define LONG_DOUBLE long double
+
+#define AND ,
+#define NOARGS void
+#define CONST const
+#define VOLATILE volatile
+#define SIGNED signed
+#define DOTS , ...
+
+#define EXFUN(name, proto) name proto
+#define DEFUN(name, arglist, args) name(args)
+#define DEFUN_VOID(name) name(NOARGS)
+
+#else /* Not ANSI C. */
+
+#define PTR char *
+#define PTRCONST PTR
+#define LONG_DOUBLE double
+
+#define AND ;
+#define NOARGS
+#define CONST
+#define VOLATILE
+#define SIGNED
+#define DOTS
+
+#define EXFUN(name, proto) name()
+#define DEFUN(name, arglist, args) name arglist args;
+#define DEFUN_VOID(name) name()
+
+#endif /* ANSI C. */
+
+
+#endif /* ansidecl.h */
diff --git a/lib/basename.c b/lib/basename.c
new file mode 100644
index 0000000..e474866
--- /dev/null
+++ b/lib/basename.c
@@ -0,0 +1,32 @@
+/* basename.c -- return the last element in a path
+ Copyright (C) 1994 Free Software Foundation, Inc.
+ This file is part of the Linux C Library.
+
+The Linux C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The Linux C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <string.h>
+
+/* Return NAME with any leading path stripped off. */
+
+CONST char *
+DEFUN(basename, (name), CONST char *name)
+{
+ char *base;
+
+ base = strrchr (name, '/');
+ return base ? base + 1 : name;
+}
diff --git a/lib/bitops.c b/lib/bitops.c
new file mode 100644
index 0000000..5e96e26
--- /dev/null
+++ b/lib/bitops.c
@@ -0,0 +1,115 @@
+/* bitops.c -- Bit-vector manipulation for mkid
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <config.h>
+#include "bitops.h"
+
+static int str_to_int __P((char *bufp, int size));
+static char *int_to_str __P((int i, int size));
+
+int
+vec_to_bits (char *bit_array, char *vec, int size)
+{
+ int i;
+ int count;
+
+ for (count = 0; (*vec & 0xff) != 0xff; count++)
+ {
+ i = str_to_int (vec, size);
+ BITSET (bit_array, i);
+ vec += size;
+ }
+ return count;
+}
+
+int
+bits_to_vec (char *vec, char *bit_array, int bit_count, int size)
+{
+ char *element;
+ int i;
+ int count;
+
+ for (count = i = 0; i < bit_count; i++)
+ {
+ if (!BITTST (bit_array, i))
+ continue;
+ element = int_to_str (i, size);
+ switch (size)
+ {
+ case 4:
+ *vec++ = *element++;
+ case 3:
+ *vec++ = *element++;
+ case 2:
+ *vec++ = *element++;
+ case 1:
+ *vec++ = *element++;
+ }
+ count++;
+ }
+ *vec++ = 0xff;
+
+ return count;
+}
+
+/* NEEDSWORK: ENDIAN */
+
+static char *
+int_to_str (int i, int size)
+{
+ static char buf0[4];
+ char *bufp = &buf0[size];
+
+ switch (size)
+ {
+ case 4:
+ *--bufp = (i & 0xff);
+ i >>= 8;
+ case 3:
+ *--bufp = (i & 0xff);
+ i >>= 8;
+ case 2:
+ *--bufp = (i & 0xff);
+ i >>= 8;
+ case 1:
+ *--bufp = (i & 0xff);
+ }
+ return buf0;
+}
+
+static int
+str_to_int (char *bufp, int size)
+{
+ int i = 0;
+
+ bufp--;
+ switch (size)
+ {
+ case 4:
+ i |= (*++bufp & 0xff);
+ i <<= 8;
+ case 3:
+ i |= (*++bufp & 0xff);
+ i <<= 8;
+ case 2:
+ i |= (*++bufp & 0xff);
+ i <<= 8;
+ case 1:
+ i |= (*++bufp & 0xff);
+ }
+ return i;
+}
diff --git a/lib/bitops.h b/lib/bitops.h
new file mode 100644
index 0000000..187fa91
--- /dev/null
+++ b/lib/bitops.h
@@ -0,0 +1,30 @@
+/* bitops.h -- defs for interface to bitops.c, plus bit-vector macros
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _bitops_h_
+#define _bitops_h_
+
+#define BITTST(ba, bn) ((ba)[(bn) >> 3] & (1 << ((bn) & 0x07)))
+#define BITSET(ba, bn) ((ba)[(bn) >> 3] |= (1 << ((bn) & 0x07)))
+#define BITCLR(ba, bn) ((ba)[(bn) >> 3] &=~(1 << ((bn) & 0x07)))
+#define BITAND(ba, bn) ((ba)[(bn) >> 3] &= (1 << ((bn) & 0x07)))
+#define BITXOR(ba, bn) ((ba)[(bn) >> 3] ^= (1 << ((bn) & 0x07)))
+
+int vec_to_bits __P((char *bit_array, char *vec, int size));
+int bits_to_vec __P((char *vec, char *bit_array, int bit_count, int size));
+
+#endif /* not _bitops_h_ */
diff --git a/lib/dirname.c b/lib/dirname.c
new file mode 100644
index 0000000..09f82aa
--- /dev/null
+++ b/lib/dirname.c
@@ -0,0 +1,39 @@
+/* basename.c -- return the leading elements in a path
+ Copyright (C) 1996 Free Software Foundation, Inc.
+
+The Linux C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The Linux C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <string.h>
+#include <config.h>
+#include "strxtra.h"
+
+/* Return NAME with any leading path stripped off. */
+
+CONST char *
+DEFUN(dirname, (name), CONST char *name)
+{
+ char *base;
+
+ base = strrchr (name, '/');
+ if (base)
+ {
+ while (*--base == '/')
+ ;
+ base++;
+ }
+ return base ? strndup (name, base - name) : ".";
+}
diff --git a/lib/dynvec.c b/lib/dynvec.c
new file mode 100644
index 0000000..ff5e6ba
--- /dev/null
+++ b/lib/dynvec.c
@@ -0,0 +1,40 @@
+#include <config.h>
+#include "dynvec.h"
+#include "alloc.h"
+
+struct dynvec *
+make_dynvec (int n)
+{
+ struct dynvec *dv = MALLOC (struct dynvec, 1);
+ dv->dv_vec = MALLOC (void *, n);
+ dv->dv_capacity = n;
+ dv->dv_fill = 0;
+ return dv;
+}
+
+void
+dynvec_free (struct dynvec *dv)
+{
+ free (dv->dv_vec);
+ free (dv);
+}
+
+void
+dynvec_freeze (struct dynvec *dv)
+{
+ if (dv->dv_fill == dv->dv_capacity)
+ return;
+ dv->dv_capacity = dv->dv_fill;
+ dv->dv_vec = REALLOC (dv->dv_vec, void *, dv->dv_capacity);
+}
+
+void
+dynvec_append (struct dynvec *dv, void *element)
+{
+ if (dv->dv_fill == dv->dv_capacity)
+ {
+ dv->dv_capacity *= 2;
+ dv->dv_vec = REALLOC (dv->dv_vec, void *, dv->dv_capacity);
+ }
+ dv->dv_vec[dv->dv_fill++] = element;
+}
diff --git a/lib/dynvec.h b/lib/dynvec.h
new file mode 100644
index 0000000..f2beb7e
--- /dev/null
+++ b/lib/dynvec.h
@@ -0,0 +1,16 @@
+#ifndef _dynvec_h_
+#define _dynvec_h_
+
+struct dynvec
+{
+ void **dv_vec;
+ int dv_capacity;
+ int dv_fill;
+};
+
+struct dynvec *make_dynvec __P((int n));
+void dynvec_free __P((struct dynvec *dv));
+void dynvec_freeze __P((struct dynvec *dv));
+void dynvec_append __P((struct dynvec *dv, void *element));
+
+#endif /* not _dynvec_h_ */
diff --git a/lib/error.c b/lib/error.c
new file mode 100644
index 0000000..797b697
--- /dev/null
+++ b/lib/error.c
@@ -0,0 +1,132 @@
+/* error.c -- error handler for noninteractive utilities
+ Copyright (C) 1990, 91, 92, 93, 94, 95 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+#if HAVE_VPRINTF || HAVE_DOPRNT || _LIBC
+# if __STDC__
+# include <stdarg.h>
+# define VA_START(args, lastarg) va_start(args, lastarg)
+# else
+# include <varargs.h>
+# define VA_START(args, lastarg) va_start(args)
+# endif
+#else
+# define va_alist a1, a2, a3, a4, a5, a6, a7, a8
+# define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
+#endif
+
+#if STDC_HEADERS || _LIBC
+# include <stdlib.h>
+# include <string.h>
+#else
+void exit ();
+#endif
+
+/* This variable is incremented each time `error' is called. */
+unsigned int error_message_count;
+
+/* If NULL, error will flush stdout, then print on stderr the program
+ name, a colon and a space. Otherwise, error will call this
+ function without parameters instead. */
+void (*error_print_progname) () = NULL;
+
+#ifdef _LIBC
+#define program_name program_invocation_name
+#endif
+
+/* The calling program should define program_name and set it to the
+ name of the executing program. */
+extern char *program_name;
+
+#if HAVE_STRERROR || _LIBC
+# ifndef strerror /* On some systems, strerror is a macro */
+char *strerror ();
+# endif
+#else
+static char *
+private_strerror (errnum)
+ int errnum;
+{
+#if !HAVE_DECL_SYS_ERRLIST
+ extern char *sys_errlist[];
+#endif
+ extern int sys_nerr;
+
+ if (errnum > 0 && errnum <= sys_nerr)
+ return sys_errlist[errnum];
+ return "Unknown system error";
+}
+#define strerror private_strerror
+#endif
+
+/* Print the program name and error message MESSAGE, which is a printf-style
+ format string with optional args.
+ If ERRNUM is nonzero, print its corresponding system error message.
+ Exit with status STATUS if it is nonzero. */
+/* VARARGS */
+
+void
+#if defined(VA_START) && __STDC__
+error (int status, int errnum, const char *message, ...)
+#else
+error (status, errnum, message, va_alist)
+ int status;
+ int errnum;
+ char *message;
+ va_dcl
+#endif
+{
+#ifdef VA_START
+ va_list args;
+#endif
+
+ if (error_print_progname)
+ (*error_print_progname) ();
+ else
+ {
+ fflush (stdout);
+ fprintf (stderr, "%s: ", program_name);
+ }
+
+#ifdef VA_START
+ VA_START (args, message);
+# if HAVE_VPRINTF || _LIBC
+ vfprintf (stderr, message, args);
+# else
+ _doprnt (message, args, stderr);
+# endif
+ va_end (args);
+#else
+ fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8);
+#endif
+
+ ++error_message_count;
+
+ if (errnum)
+ fprintf (stderr, ": %s", strerror (errnum));
+ putc ('\n', stderr);
+ fflush (stderr);
+ if (status)
+ exit (status);
+}
diff --git a/lib/error.h b/lib/error.h
new file mode 100644
index 0000000..d8ed9e5
--- /dev/null
+++ b/lib/error.h
@@ -0,0 +1,49 @@
+/* error.h -- declaration for error-reporting function
+ Copyright (C) 1995 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _error_h_
+#define _error_h_
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__
+# define __attribute__(Spec) /* empty */
+# endif
+/* The __-protected variants of `format' and `printf' attributes
+ are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7)
+# define __format__ format
+# define __printf__ printf
+# endif
+#endif
+
+#if __STDC__
+void error (int, int, const char *, ...) \
+ __attribute__ ((__format__ (__printf__, 3, 4)));
+#else
+void error ();
+#endif
+
+/* This variable is incremented each time `error' is called. */
+extern unsigned int error_message_count;
+
+#endif /* _error_h_ */
diff --git a/lib/filenames.c b/lib/filenames.c
new file mode 100644
index 0000000..fc154a7
--- /dev/null
+++ b/lib/filenames.c
@@ -0,0 +1,375 @@
+/* filenames.c -- file & directory name manipulations
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <config.h>
+#include "system.h"
+#include "strxtra.h"
+#include "filenames.h"
+#include "misc.h"
+#include "error.h"
+
+extern char *xgetcwd __P((void));
+
+/* root_name returns the base name of the file with any leading
+ * directory information or trailing suffix stripped off. Examples:
+ *
+ * /usr/include/stdio.h -> stdio
+ * fred -> fred
+ * barney.c -> barney
+ * bill/bob -> bob
+ * / -> < null string >
+ */
+char const *
+root_name (char const *path)
+{
+ static char file_name_buffer[BUFSIZ];
+ char const *root;
+ char const *dot;
+
+ root = strrchr (path, '/');
+ if (root == NULL)
+ root = path;
+ else
+ root++;
+
+ dot = strrchr (root, '.');
+ if (dot == NULL)
+ strcpy (file_name_buffer, root);
+ else
+ {
+ strncpy (file_name_buffer, root, dot - root);
+ file_name_buffer[dot - root] = '\0';
+ }
+ return file_name_buffer;
+}
+
+/* suff_name returns the suffix (including the dot) or a null string
+ * if there is no suffix. Examples:
+ *
+ * /usr/include/stdio.h -> .h
+ * fred -> < null string >
+ * barney.c -> .c
+ * bill/bob -> < null string >
+ * / -> < null string >
+ */
+char const *
+suff_name (char const *path)
+{
+ char const *dot;
+
+ dot = strrchr (path, '.');
+ if (dot == NULL)
+ return "";
+ return dot;
+}
+
+int
+can_crunch (char const *path1, char const *path2)
+{
+ char const *slash1;
+ char const *slash2;
+
+ slash1 = strrchr (path1, '/');
+ slash2 = strrchr (path2, '/');
+
+ if (slash1 == NULL && slash2 == NULL)
+ return strequ (suff_name (path1), suff_name (path2));
+ if ((slash1 - path1) != (slash2 - path2))
+ return 0;
+ if (!strnequ (path1, path2, slash1 - path1))
+ return 0;
+ return strequ (suff_name (slash1), suff_name (slash2));
+}
+
+/* look_up adds ../s to the beginning of a file name until it finds
+ * the one that really exists. Returns NULL if it gets all the way
+ * to / and never finds it.
+ *
+ * If the file name starts with /, just return it as is.
+ *
+ * This routine is used to locate the ID database file.
+ */
+char const *
+look_up (char const *arg)
+{
+ static char file_name_buffer[BUFSIZ];
+ char *buf = file_name_buffer;
+ char *id_path = 0;
+ struct stat rootb;
+ struct stat statb;
+
+ if (arg == 0)
+ {
+ id_path = getenv ("IDPATH");
+ if (id_path)
+ {
+ id_path = strdup (id_path);
+ arg = strtok (id_path, ":");
+ /* FIXME: handle multiple ID file names */
+ }
+ }
+ if (arg == 0)
+ arg = ID_FILE_NAME;
+
+ /* if we got absolute name, just use it. */
+ if (arg[0] == '/')
+ return arg;
+ /* if the name we were give exists, don't bother searching */
+ if (stat (arg, &statb) == 0)
+ return arg;
+ /* search up the tree until we find a directory where this
+ * relative file name is visible.
+ * (or we run out of tree to search by hitting root).
+ */
+
+ if (stat ("/", &rootb) != 0)
+ return NULL;
+ do
+ {
+ strcpy (buf, "../");
+ buf += 3;
+ strcpy (buf, arg);
+ if (stat (file_name_buffer, &statb) == 0)
+ return file_name_buffer;
+ *buf = '\0';
+ if (stat (file_name_buffer, &statb) != 0)
+ return NULL;
+ }
+ while (!((statb.st_ino == rootb.st_ino) ||
+ (statb.st_dev == rootb.st_dev)));
+ return NULL;
+}
+
+/* define special name components */
+
+static char slash[] = "/";
+static char dot[] = ".";
+static char dotdot[] = "..";
+
+/* nextc points to the next character to look at in the string or is
+ * null if the end of string was reached.
+ *
+ * namep points to buffer that holds the components.
+ */
+static char const *nextc = NULL;
+static char *namep;
+
+/* lexname - Return next name component. Uses global variables initialized
+ * by canonize_file_name to figure out what it is scanning.
+ */
+static char const *
+lexname (void)
+{
+ char c;
+ char const *d;
+
+ if (nextc)
+ {
+ c = *nextc++;
+ if (c == '\0')
+ {
+ nextc = NULL;
+ return NULL;
+ }
+ if (c == '/')
+ {
+ return &slash[0];
+ }
+ if (c == '.')
+ {
+ if ((*nextc == '/') || (*nextc == '\0'))
+ return &dot[0];
+ if (*nextc == '.' && (*(nextc + 1) == '/' || *(nextc + 1) == '\0'))
+ {
+ ++nextc;
+ return &dotdot[0];
+ }
+ }
+ d = namep;
+ *namep++ = c;
+ while ((c = *nextc) != '/')
+ {
+ *namep++ = c;
+ if (c == '\0')
+ {
+ nextc = NULL;
+ return d;
+ }
+ ++nextc;
+ }
+ *namep++ = '\0';
+ return d;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+/* canonize_file_name - Put a file name in cannonical form. Looks for all the
+ * whacky wonderful things a demented *ni* programmer might put
+ * in a file name and reduces the name to cannonical form.
+ */
+void
+canonize_file_name (char *n)
+{
+ char const *components[1024];
+ char const **cap = &components[0];
+ char const **cad;
+ char const *cp;
+ char namebuf[2048];
+ char const *s;
+
+ /* initialize scanner */
+ nextc = n;
+ namep = &namebuf[0];
+
+ /* break the file name into individual components */
+ while ((cp = lexname ()))
+ {
+ *cap++ = cp;
+ }
+
+ /* If name is empty, leave it that way */
+ if (cap == &components[0])
+ return;
+
+ /* flag end of component list */
+ *cap = NULL;
+
+ /* remove all trailing slashes and dots */
+ while ((--cap != &components[0]) &&
+ ((*cap == &slash[0]) || (*cap == &dot[0])))
+ *cap = NULL;
+
+ /* squeeze out all . / component sequences */
+ cap = &components[0];
+ cad = cap;
+ while (*cap)
+ {
+ if ((*cap == &dot[0]) && (*(cap + 1) == &slash[0]))
+ {
+ cap += 2;
+ }
+ else
+ {
+ *cad++ = *cap++;
+ }
+ }
+ *cad++ = NULL;
+
+ /* find multiple // and use last slash as root, except on apollo which
+ * apparently actually uses // in real file names (don't ask me why).
+ */
+#ifndef apollo
+ s = NULL;
+ cap = &components[0];
+ cad = cap;
+ while (*cap)
+ {
+ if ((s == &slash[0]) && (*cap == &slash[0]))
+ {
+ cad = &components[0];
+ }
+ s = *cap++;
+ *cad++ = s;
+ }
+ *cad = NULL;
+#endif
+
+ /* if this is absolute name get rid of any /.. at beginning */
+ if ((components[0] == &slash[0]) && (components[1] == &dotdot[0]))
+ {
+ cap = &components[1];
+ cad = cap;
+ while (*cap == &dotdot[0])
+ {
+ ++cap;
+ if (*cap == NULL)
+ break;
+ if (*cap == &slash[0])
+ ++cap;
+ }
+ while (*cap)
+ *cad++ = *cap++;
+ *cad = NULL;
+ }
+
+ /* squeeze out any name/.. sequences (but leave leading ../..) */
+ cap = &components[0];
+ cad = cap;
+ while (*cap)
+ {
+ if ((*cap == &dotdot[0]) &&
+ ((cad - 2) >= &components[0]) &&
+ ((*(cad - 2)) != &dotdot[0]))
+ {
+ cad -= 2;
+ ++cap;
+ if (*cap)
+ ++cap;
+ }
+ else
+ {
+ *cad++ = *cap++;
+ }
+ }
+ /* squeezing out a trailing /.. can leave unsightly trailing /s */
+ if ((cad >= &components[2]) && ((*(cad - 1)) == &slash[0]))
+ --cad;
+ *cad = NULL;
+ /* if it was just name/.. it now becomes . */
+ if (components[0] == NULL)
+ {
+ components[0] = &dot[0];
+ components[1] = NULL;
+ }
+
+ /* re-assemble components */
+ cap = &components[0];
+ while ((s = *cap++))
+ {
+ while (*s)
+ *n++ = *s++;
+ }
+ *n++ = '\0';
+}
+
+FILE *
+open_source_FILE (char *file_name)
+{
+ FILE *source_FILE;
+
+ source_FILE = fopen (file_name, "r");
+ if (source_FILE == NULL)
+ error (0, errno, _("can't open `%s'"), file_name);
+ return source_FILE;
+}
+
+void
+close_source_FILE (FILE *fp)
+{
+ fclose (fp);
+}
diff --git a/lib/filenames.h b/lib/filenames.h
new file mode 100644
index 0000000..eb5969f
--- /dev/null
+++ b/lib/filenames.h
@@ -0,0 +1,37 @@
+/* filenames.h -- defs for interface to filenames.c
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _filenames_h_
+#define _filenames_h_
+
+#define ID_FILE_NAME "ID"
+
+char const *relative_file_name __P((char const *dir, char const *arg));
+char const *span_file_name __P((char const *dir, char const *arg));
+char const *root_name __P((char const *path));
+char const *suff_name __P((char const *path));
+int can_crunch __P((char const *path1, char const *path2));
+char const *look_up __P((char const *arg));
+void cannoname __P((char *n));
+char const *kshgetwd __P((char *pathname));
+char const *unsymlink __P((char *n));
+FILE *open_source_FILE __P((char *file_name));
+void close_source_FILE __P((FILE *fp));
+char const *get_sccs __P((char const *dir, char const *base, char const *sccs_dir));
+char const *co_rcs __P((char const *dir, char const *base, char const *rcs_dir));
+
+#endif /* not _filenames_h_ */
diff --git a/lib/fnmatch.c b/lib/fnmatch.c
new file mode 100644
index 0000000..2fb65b5
--- /dev/null
+++ b/lib/fnmatch.c
@@ -0,0 +1,200 @@
+/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <ctype.h>
+
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+#if !defined(__GNU_LIBRARY__) && !defined(STDC_HEADERS)
+extern int errno;
+#endif
+
+/* Match STRING against the filename pattern PATTERN, returning zero if
+ it matches, nonzero if not. */
+int
+fnmatch (pattern, string, flags)
+ const char *pattern;
+ const char *string;
+ int flags;
+{
+ register const char *p = pattern, *n = string;
+ register char c;
+
+/* Note that this evalutes C many times. */
+#define FOLD(c) ((flags & FNM_CASEFOLD) && isupper (c) ? tolower (c) : (c))
+
+ while ((c = *p++) != '\0')
+ {
+ c = FOLD (c);
+
+ switch (c)
+ {
+ case '?':
+ if (*n == '\0')
+ return FNM_NOMATCH;
+ else if ((flags & FNM_FILE_NAME) && *n == '/')
+ return FNM_NOMATCH;
+ else if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+ break;
+
+ case '\\':
+ if (!(flags & FNM_NOESCAPE))
+ {
+ c = *p++;
+ c = FOLD (c);
+ }
+ if (FOLD (*n) != c)
+ return FNM_NOMATCH;
+ break;
+
+ case '*':
+ if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+
+ for (c = *p++; c == '?' || c == '*'; c = *p++, ++n)
+ if (((flags & FNM_FILE_NAME) && *n == '/') ||
+ (c == '?' && *n == '\0'))
+ return FNM_NOMATCH;
+
+ if (c == '\0')
+ return 0;
+
+ {
+ char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c;
+ c1 = FOLD (c1);
+ for (--p; *n != '\0'; ++n)
+ if ((c == '[' || FOLD (*n) == c1) &&
+ fnmatch (p, n, flags & ~FNM_PERIOD) == 0)
+ return 0;
+ return FNM_NOMATCH;
+ }
+
+ case '[':
+ {
+ /* Nonzero if the sense of the character class is inverted. */
+ register int not;
+
+ if (*n == '\0')
+ return FNM_NOMATCH;
+
+ if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+
+ not = (*p == '!' || *p == '^');
+ if (not)
+ ++p;
+
+ c = *p++;
+ for (;;)
+ {
+ register char cstart = c, cend = c;
+
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ cstart = cend = *p++;
+
+ cstart = cend = FOLD (cstart);
+
+ if (c == '\0')
+ /* [ (unterminated) loses. */
+ return FNM_NOMATCH;
+
+ c = *p++;
+ c = FOLD (c);
+
+ if ((flags & FNM_FILE_NAME) && c == '/')
+ /* [/] can never match. */
+ return FNM_NOMATCH;
+
+ if (c == '-' && *p != ']')
+ {
+ cend = *p++;
+ if (!(flags & FNM_NOESCAPE) && cend == '\\')
+ cend = *p++;
+ if (cend == '\0')
+ return FNM_NOMATCH;
+ cend = FOLD (cend);
+
+ c = *p++;
+ }
+
+ if (FOLD (*n) >= cstart && FOLD (*n) <= cend)
+ goto matched;
+
+ if (c == ']')
+ break;
+ }
+ if (!not)
+ return FNM_NOMATCH;
+ break;
+
+ matched:;
+ /* Skip the rest of the [...] that already matched. */
+ while (c != ']')
+ {
+ if (c == '\0')
+ /* [... (unterminated) loses. */
+ return FNM_NOMATCH;
+
+ c = *p++;
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ /* XXX 1003.2d11 is unclear if this is right. */
+ ++p;
+ }
+ if (not)
+ return FNM_NOMATCH;
+ }
+ break;
+
+ default:
+ if (c != FOLD (*n))
+ return FNM_NOMATCH;
+ }
+
+ ++n;
+ }
+
+ if (*n == '\0')
+ return 0;
+
+ if ((flags & FNM_LEADING_DIR) && *n == '/')
+ /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
+ return 0;
+
+ return FNM_NOMATCH;
+}
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
diff --git a/lib/fnmatch.h b/lib/fnmatch.h
new file mode 100644
index 0000000..d9d73b3
--- /dev/null
+++ b/lib/fnmatch.h
@@ -0,0 +1,67 @@
+/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#ifndef _FNMATCH_H
+
+#define _FNMATCH_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
+#undef __P
+#define __P(protos) protos
+#else /* Not C++ or ANSI C. */
+#undef __P
+#define __P(protos) ()
+/* We can get away without defining `const' here only because in this file
+ it is used only inside the prototype for `fnmatch', which is elided in
+ non-ANSI C where `const' is problematical. */
+#endif /* C++ or ANSI C. */
+
+
+/* We #undef these before defining them because some losing systems
+ (HP-UX A.08.07 for example) define these in <unistd.h>. */
+#undef FNM_PATHNAME
+#undef FNM_NOESCAPE
+#undef FNM_PERIOD
+
+/* Bits set in the FLAGS argument to `fnmatch'. */
+#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
+#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
+#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
+
+#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE)
+#define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
+#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
+#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
+#endif
+
+/* Value returned by `fnmatch' if STRING does not match PATTERN. */
+#define FNM_NOMATCH 1
+
+/* Match STRING against the filename pattern PATTERN,
+ returning zero if it matches, FNM_NOMATCH if not. */
+extern int fnmatch __P ((const char *__pattern, const char *__string,
+ int __flags));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* fnmatch.h */
diff --git a/lib/getopt.c b/lib/getopt.c
new file mode 100644
index 0000000..8f3e8cc
--- /dev/null
+++ b/lib/getopt.c
@@ -0,0 +1,770 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+#include <stdlib.h>
+#endif /* GNU C library. */
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+ When compiling libc, the _ macro is predefined. */
+#ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#else
+# define _(msgid) (msgid)
+#endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* XXX 1003.2 says this must be 1 before any call. */
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+#include <string.h>
+#define my_index strchr
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (optstring)
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+ return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+o If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns `EOF'.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ optarg = NULL;
+
+ if (optind == 0)
+ {
+ optstring = _getopt_initialize (optstring);
+ optind = 1; /* Don't scan ARGV[0], the program name. */
+ }
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0'))
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return EOF;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0'))
+ {
+ if (ordering == REQUIRE_ORDER)
+ return EOF;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2]
+ || !my_index (optstring, argv[optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound;
+ int option_index;
+
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+#ifdef lint
+ indfound = 0; /* Avoid spurious compiler warning. */
+#endif
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if (nameend - nextchar == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ _("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ _("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+ else
+ fprintf (stderr, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ return '?';
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = NULL;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt.h b/lib/getopt.h
new file mode 100644
index 0000000..85454e9
--- /dev/null
+++ b/lib/getopt.h
@@ -0,0 +1,130 @@
+/* Declarations for getopt.
+ Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if defined (__STDC__) && __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/lib/getopt1.c b/lib/getopt1.c
new file mode 100644
index 0000000..8a299c9
--- /dev/null
+++ b/lib/getopt1.c
@@ -0,0 +1,181 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 1993, 1994
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#else
+char *getenv ();
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 0000000..68ba92a
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,295 @@
+/* hash.c -- hash table maintenance
+ Copyright (C) 1986, 1995 Greg McGary
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <config.h>
+#include "hash.h"
+#include "alloc.h"
+#include "error.h"
+
+static void hash_rehash __P((struct hash_table* ht));
+static unsigned long round_up_2 __P((unsigned long rough));
+
+/* Implement double hashing with open addressing. The table size is
+ always a power of two. The secondary (`increment') hash function
+ is forced to return an odd-value, in order to be relatively prime
+ to the table size. This guarantees that the increment can
+ potentially hit every slot in the table during collision
+ resolution. */
+
+void *hash_deleted_item = &hash_deleted_item;
+
+/* Force the table size to be a power of two, possibly rounding up the
+ given size. */
+
+void
+hash_init (struct hash_table* ht, unsigned long size,
+ hash_func_t hash_1, hash_func_t hash_2, hash_cmp_func_t hash_cmp)
+{
+ ht->ht_size = round_up_2 (size);
+#if 0
+ if (ht->ht_size > (32 * 1024)) /* for wimpy 16-bit systems (e.g. DOS) */
+ ht->ht_size = 32 * 1024;
+#else
+ if (ht->ht_size > (128 * 1024)) /* prevent size from getting out of hand */
+ ht->ht_size /= 2;
+#endif
+ ht->ht_vec = (void**) CALLOC (struct token *, ht->ht_size);
+ if (ht->ht_vec == 0)
+ error (1, 0, _("can't allocate %ld bytes for hash table: memory exhausted"),
+ ht->ht_size * sizeof(struct token *));
+ ht->ht_capacity = ht->ht_size * 15 / 16; /* 93.75% loading factor */
+ ht->ht_fill = 0;
+ ht->ht_collisions = 0;
+ ht->ht_lookups = 0;
+ ht->ht_rehashes = 0;
+ ht->ht_hash_1 = hash_1;
+ ht->ht_hash_2 = hash_2;
+ ht->ht_compare = hash_cmp;
+}
+
+/* Load an array of items into `ht'. */
+
+void
+hash_load (struct hash_table* ht, void *item_table, unsigned long cardinality, unsigned long size)
+{
+ char *items = (char *) item_table;
+ while (cardinality--)
+ {
+ hash_insert (ht, items);
+ items += size;
+ }
+}
+
+/* Returns the address of the table slot matching `key'. If `key' is
+ not found, return the address of an empty slot suitable for
+ inserting `key'. The caller is responsible for incrementing
+ ht_fill on insertion. */
+
+void **
+hash_find_slot (struct hash_table* ht, void const *key)
+{
+ void **slot;
+ void **deleted_slot = 0;
+ unsigned int hash_2 = 0;
+ unsigned int hash_1 = (*ht->ht_hash_1) (key);
+
+ ht->ht_lookups++;
+ for (;;)
+ {
+ hash_1 %= ht->ht_size;
+ slot = &ht->ht_vec[hash_1];
+
+ if (*slot == 0)
+ return slot;
+ if (*slot == hash_deleted_item)
+ {
+ if (deleted_slot == 0)
+ deleted_slot = slot;
+ }
+ else
+ {
+ if (key == *slot)
+ return slot;
+ if ((*ht->ht_compare) (key, *slot) == 0)
+ return slot;
+ ht->ht_collisions++;
+ }
+ if (!hash_2)
+ hash_2 = (*ht->ht_hash_2) (key) | 1;
+ hash_1 += hash_2;
+ }
+}
+
+void *
+hash_find_item (struct hash_table* ht, void const *key)
+{
+ void **slot = hash_find_slot (ht, key);
+ return ((HASH_VACANT (*slot)) ? 0 : *slot);
+}
+
+void *
+hash_insert (struct hash_table* ht, void *item)
+{
+ void **slot = hash_find_slot (ht, item);
+ return hash_insert_at (ht, item, slot);
+}
+
+void *
+hash_insert_at (struct hash_table* ht, void *item, void const *slot)
+{
+ void *old_item = *(void **) slot;
+ if (HASH_VACANT (old_item))
+ {
+ ht->ht_fill++;
+ old_item = item;
+ }
+ *(void const **) slot = item;
+ if (ht->ht_fill >= ht->ht_capacity)
+ hash_rehash (ht);
+ return old_item;
+}
+
+void *
+hash_delete (struct hash_table* ht, void const *item)
+{
+ void **slot = hash_find_slot (ht, item);
+ return hash_delete_at (ht, slot);
+}
+
+void *
+hash_delete_at (struct hash_table* ht, void const *slot)
+{
+ void *item = *(void **) slot;
+ if (!HASH_VACANT (item))
+ {
+ *(void const **) slot = hash_deleted_item;
+ ht->ht_fill--;
+ return item;
+ }
+ else
+ return 0;
+}
+
+void
+hash_free_items (struct hash_table* ht)
+{
+ void **vec = ht->ht_vec;
+ void **end = &vec[ht->ht_size];
+ for (; vec < end; vec++)
+ {
+ void *item = *vec;
+ if (!HASH_VACANT (item))
+ free (item);
+ *vec = 0;
+ }
+ ht->ht_fill = 0;
+}
+
+void
+hash_delete_items (struct hash_table* ht)
+{
+ void **vec = ht->ht_vec;
+ void **end = &vec[ht->ht_size];
+ for (; vec < end; vec++)
+ *vec = 0;
+ ht->ht_fill = 0;
+ ht->ht_collisions = 0;
+ ht->ht_lookups = 0;
+ ht->ht_rehashes = 0;
+}
+
+void
+hash_free (struct hash_table* ht, int free_items)
+{
+ if (free_items)
+ hash_free_items (ht);
+ free (ht->ht_vec);
+ ht->ht_vec = 0;
+ ht->ht_fill = 0;
+ ht->ht_capacity = 0;
+}
+
+void
+hash_map (struct hash_table *ht, hash_map_func_t map)
+{
+ void **slot;
+ void **end = &ht->ht_vec[ht->ht_size];
+
+ for (slot = ht->ht_vec; slot < end; slot++)
+ {
+ if (!HASH_VACANT (*slot))
+ (*map) (*slot);
+ }
+}
+
+/* Double the size of the hash table in the event of overflow... */
+
+static void
+hash_rehash (struct hash_table* ht)
+{
+ unsigned long old_ht_size = ht->ht_size;
+ void **old_vec = ht->ht_vec;
+ void **ovp;
+ void **slot;
+
+ ht->ht_size *= 2;
+ ht->ht_rehashes++;
+ ht->ht_capacity = ht->ht_size - (ht->ht_size >> 4);
+ ht->ht_vec = (void **) CALLOC (struct token *, ht->ht_size);
+
+ for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
+ {
+ if (*ovp == 0)
+ continue;
+ slot = hash_find_slot (ht, *ovp);
+ *slot = *ovp;
+ }
+ free (old_vec);
+}
+
+void
+hash_print_stats (struct hash_table *ht, FILE *out_FILE)
+{
+ fprintf (out_FILE, _("Load=%ld/%ld=%.0f%%, "), ht->ht_fill, ht->ht_size,
+ 100.0 * (double) ht->ht_fill / (double) ht->ht_size);
+ fprintf (out_FILE, _("Rehash=%d, "), ht->ht_rehashes);
+ fprintf (out_FILE, _("Collisions=%ld/%ld=%.0f%%"), ht->ht_collisions, ht->ht_lookups,
+ (ht->ht_lookups
+ ? (100.0 * (double) ht->ht_collisions / (double) ht->ht_lookups)
+ : 0));
+}
+
+/* Dump all items into a NULL-terminated vector. Use the
+ user-supplied vector, or malloc one. */
+
+void**
+hash_dump (struct hash_table *ht, void **vector_0, qsort_cmp_t compare)
+{
+ void **vector;
+ void **slot;
+ void **end = &ht->ht_vec[ht->ht_size];
+
+ if (vector_0 == 0)
+ vector_0 = MALLOC (void *, ht->ht_fill + 1);
+ vector = vector_0;
+
+ for (slot = ht->ht_vec; slot < end; slot++)
+ if (!HASH_VACANT (*slot))
+ *vector++ = *slot;
+ *vector = 0;
+
+ if (compare)
+ qsort (vector_0, ht->ht_fill, sizeof (void *), compare);
+ return vector_0;
+}
+
+/* Round a given number up to the nearest power of 2. */
+
+static unsigned long
+round_up_2 (unsigned long rough)
+{
+ int round;
+
+ round = 1;
+ while (rough)
+ {
+ round <<= 1;
+ rough >>= 1;
+ }
+ return round;
+}
diff --git a/lib/hash.h b/lib/hash.h
new file mode 100644
index 0000000..a8723a9
--- /dev/null
+++ b/lib/hash.h
@@ -0,0 +1,144 @@
+/* hash.h -- decls for hash table
+ Copyright (C) 1986, 1995 Greg McGary
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef _hash_h_
+#define _hash_h_
+
+#include <stdio.h>
+
+typedef unsigned long (*hash_func_t) __P((void const *key));
+typedef int (*hash_cmp_func_t) __P((void const *x, void const *y));
+typedef void (*hash_map_func_t) __P((void const *item));
+
+struct hash_table
+{
+ void **ht_vec;
+ unsigned long ht_size; /* total number of slots (power of 2) */
+ unsigned long ht_capacity; /* usable slots, limited by loading-factor */
+ unsigned long ht_fill; /* items in table */
+ unsigned long ht_collisions; /* # of failed calls to comparison function */
+ unsigned long ht_lookups; /* # of queries */
+ unsigned int ht_rehashes; /* # of times we've expanded table */
+ hash_func_t ht_hash_1; /* primary hash function */
+ hash_func_t ht_hash_2; /* secondary hash function */
+ hash_cmp_func_t ht_compare; /* comparison function */
+};
+
+typedef int (*qsort_cmp_t) __P((void const *, void const *));
+
+void hash_init __P((struct hash_table *ht, unsigned long size,
+ hash_func_t hash_1, hash_func_t hash_2, hash_cmp_func_t hash_cmp));
+void hash_load __P((struct hash_table *ht, void *item_table,
+ unsigned long cardinality, unsigned long size));
+void **hash_find_slot __P((struct hash_table *ht, void const *key));
+void *hash_find_item __P((struct hash_table *ht, void const *key));
+void *hash_insert __P((struct hash_table *ht, void *item));
+void *hash_insert_at __P((struct hash_table *ht, void *item, void const *slot));
+void *hash_delete __P((struct hash_table *ht, void const *item));
+void *hash_delete_at __P((struct hash_table *ht, void const *slot));
+void hash_delete_items __P((struct hash_table *ht));
+void hash_free_items __P((struct hash_table *ht));
+void hash_free __P((struct hash_table *ht, int free_items));
+void hash_map __P((struct hash_table *ht, hash_map_func_t map));
+void hash_print_stats __P((struct hash_table *ht, FILE *out_FILE));
+void **hash_dump __P((struct hash_table *ht, void **vector_0, qsort_cmp_t compare));
+
+extern void *hash_deleted_item;
+#define HASH_VACANT(item) ((item) == 0 || (item) == hash_deleted_item)
+
+
+/* hash and comparison macros for string keys. */
+
+#define STRING_HASH_1(_key_, _result_) { \
+ unsigned char const *kk = (unsigned char const *) (_key_) - 1; \
+ while (*++kk) \
+ (_result_) += (*kk << (kk[1] & 0xf)); \
+} while (0)
+#define return_STRING_HASH_1(_key_) do { \
+ unsigned long result = 0; \
+ STRING_HASH_1 ((_key_), result); \
+ return result; \
+} while (0)
+
+#define STRING_HASH_2(_key_, _result_) do { \
+ unsigned char const *kk = (unsigned char const *) (_key_) - 1; \
+ while (*++kk) \
+ (_result_) += (*kk << (kk[1] & 0x7)); \
+} while (0)
+#define return_STRING_HASH_2(_key_) do { \
+ unsigned long result = 0; \
+ STRING_HASH_2 ((_key_), result); \
+ return result; \
+} while (0)
+
+#define STRING_COMPARE(_x_, _y_, _result_) do { \
+ unsigned char const *xx = (unsigned char const *) (_x_) - 1; \
+ unsigned char const *yy = (unsigned char const *) (_y_) - 1; \
+ do { \
+ if (*++xx == '\0') { \
+ yy++; \
+ break; \
+ } \
+ } while (*xx == *++yy); \
+ (_result_) = *xx - *yy; \
+} while (0)
+#define return_STRING_COMPARE(_x_, _y_) do { \
+ int result; \
+ STRING_COMPARE (_x_, _y_, result); \
+ return result; \
+} while (0)
+
+/* hash and comparison macros for integer keys. */
+
+#define INTEGER_HASH_1(_key_, _result_) do { \
+ (_result_) += ((unsigned long)(_key_)); \
+} while (0)
+#define return_INTEGER_HASH_1(_key_) do { \
+ unsigned long result = 0; \
+ INTEGER_HASH_1 ((_key_), result); \
+ return result; \
+} while (0)
+
+#define INTEGER_HASH_2(_key_, _result_) do { \
+ (_result_) += ~((unsigned long)(_key_)); \
+} while (0)
+#define return_INTEGER_HASH_2(_key_) do { \
+ unsigned long result = 0; \
+ INTEGER_HASH_2 ((_key_), result); \
+ return result; \
+} while (0)
+
+#define INTEGER_COMPARE(_x_, _y_, _result_) do { \
+ (_result_) = _x_ - _y_; \
+} while (0)
+#define return_INTEGER_COMPARE(_x_, _y_) do { \
+ int result; \
+ INTEGER_COMPARE (_x_, _y_, result); \
+ return result; \
+} while (0)
+
+/* hash and comparison macros for address keys. */
+
+#define ADDRESS_HASH_1(_key_, _result_) INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3, (_result_))
+#define ADDRESS_HASH_2(_key_, _result_) INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3, (_result_))
+#define ADDRESS_COMPARE(_x_, _y_, _result_) INTEGER_COMPARE ((_x_), (_y_), (_result_))
+#define return_ADDRESS_HASH_1(_key_) return_INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3)
+#define return_ADDRESS_HASH_2(_key_) return_INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3)
+#define return_ADDRESS_COMPARE(_x_, _y_) return_INTEGER_COMPARE ((_x_), (_y_))
+
+#endif /* not _hash_h_ */
diff --git a/lib/idarg.h b/lib/idarg.h
new file mode 100644
index 0000000..28c820c
--- /dev/null
+++ b/lib/idarg.h
@@ -0,0 +1,32 @@
+/* idarg.h -- defs for internal form of command-line arguments
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _idarg_h_
+#define _idarg_h_
+
+struct idarg
+{
+ struct idarg *ida_next;
+ char *ida_arg;
+ int ida_index;
+ char ida_flags;
+#define IDA_RELATIVE 0x01 /* file name is now relative (lid) */
+#define IDA_SCAN_ME 0x01 /* file should be scanned (mkid) */
+#define IDA_PREFIX_US 0x02 /* file has names with prefixed underscores */
+};
+
+#endif /* not _idarg_h_ */
diff --git a/lib/idfile.c b/lib/idfile.c
new file mode 100644
index 0000000..19ceaed
--- /dev/null
+++ b/lib/idfile.c
@@ -0,0 +1,226 @@
+/* idfile.c -- read & write mkid database file header
+ Copyright (C) 1986, 1995 Greg McGary
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <obstack.h>
+
+#include <config.h>
+#include "alloc.h"
+#include "idfile.h"
+#include "strxtra.h"
+#include "error.h"
+
+typedef int (*iof_t) __P((FILE *, void *, unsigned int, int));
+static int io_idhead __P((FILE *fp, iof_t iof, struct idhead *idh));
+static int io_size __P((FILE *, void *, unsigned int size, int));
+int fgets0 __P((char *buf0, int size, FILE *in_FILE));
+
+extern char *program_name;
+
+/* read_id_file opens the ID file, reads header fields into idh,
+ verifies the magic number and version, and reads the constituent
+ file names. Any errors are considered fatal and cause an exit. */
+
+struct file_link **
+read_id_file (char const *id_file_name, struct idhead *idh)
+{
+ struct file_link **flinkv = maybe_read_id_file (id_file_name, idh);
+ if (flinkv)
+ return flinkv;
+ error (1, errno, _("can't open `%s'"), id_file_name);
+ return NULL;
+}
+
+/* maybe_read_id_file does everything that read_id_file does, but is
+ tolerant of errors opening the ID file, returning NULL in this case
+ (this is called from mkid where an ID might or might not already
+ exist). All other errors are considered fatal. */
+
+struct file_link **
+maybe_read_id_file (char const *id_file_name, struct idhead *idh)
+{
+ obstack_init (&idh->idh_file_link_obstack);
+ idh->idh_FILE = fopen (id_file_name, "r");
+ if (idh->idh_FILE == 0)
+ return 0;
+
+ read_idhead (idh);
+ if (idh->idh_magic[0] != IDH_MAGIC_0 || idh->idh_magic[1] != IDH_MAGIC_1)
+ error (1, 0, _("`%s' is not an ID file! (bad magic #)"), id_file_name);
+ if (idh->idh_version != IDH_VERSION)
+ error (1, 0, _("`%s' is version %d, but I only grok version %d"),
+ id_file_name, idh->idh_version, IDH_VERSION);
+
+ fseek (idh->idh_FILE, idh->idh_flinks_offset, 0);
+ return deserialize_file_links (idh);
+}
+
+
+int
+read_idhead (struct idhead *idh)
+{
+ return io_idhead (idh->idh_FILE, io_read, idh);
+}
+
+int
+write_idhead (struct idhead *idh)
+{
+ return io_idhead (idh->idh_FILE, io_write, idh);
+}
+
+int
+sizeof_idhead ()
+{
+ return io_idhead (0, io_size, 0);
+}
+
+static int
+io_size (FILE *ignore_FILE, void *ignore_addr, unsigned int size, int io_type)
+{
+ if (io_type == IO_TYPE_STR)
+ error (0, 0, _("can't determine the io_size of a string!"));
+ return size;
+}
+
+/* This is like fgets(3s), except that lines are delimited by NULs
+ rather than newlines. Also, we return the number of characters
+ read rather than the address of buf0. */
+
+int
+fgets0 (char *buf0, int size, FILE * in_FILE)
+{
+ char *buf;
+ int c;
+ char *end;
+
+ buf = buf0;
+ end = &buf[size];
+ while ((c = getc (in_FILE)) > 0 && buf < end)
+ *buf++ = c;
+ *buf = '\0';
+ return (buf - buf0);
+}
+
+int
+io_read (FILE *input_FILE, void *addr, unsigned int size, int io_type)
+{
+ if (io_type == IO_TYPE_INT || size == 1)
+ {
+ switch (size)
+ {
+ case 4:
+ *(unsigned long *)addr = getc (input_FILE);
+ *(unsigned long *)addr += getc (input_FILE) << 010;
+ *(unsigned long *)addr += getc (input_FILE) << 020;
+ *(unsigned long *)addr += getc (input_FILE) << 030;
+ break;
+ case 3:
+ *(unsigned long *)addr = getc (input_FILE);
+ *(unsigned long *)addr += getc (input_FILE) << 010;
+ *(unsigned long *)addr += getc (input_FILE) << 020;
+ break;
+ case 2:
+ *(unsigned short *)addr = getc (input_FILE);
+ *(unsigned short *)addr += getc (input_FILE) << 010;
+ break;
+ case 1:
+ *(unsigned char *)addr = getc (input_FILE);
+ break;
+ default:
+ fprintf (stderr, _("unsupported size in io_write (): %d\n"), size);
+ abort ();
+ }
+ }
+ else if (io_type == IO_TYPE_STR)
+ fgets0 (addr, size, input_FILE);
+ else if (io_type == IO_TYPE_FIX)
+ fread (addr, size, 1, input_FILE);
+ else
+ error (0, 0, _("unknown I/O type: %d"), io_type);
+ return size;
+}
+
+int
+io_write (FILE *output_FILE, void *addr, unsigned int size, int io_type)
+{
+ if (io_type == IO_TYPE_INT || size == 1)
+ {
+ switch (size)
+ {
+ case 4:
+ putc (*(unsigned long *)addr, output_FILE);
+ putc (*(unsigned long *)addr >> 010, output_FILE);
+ putc (*(unsigned long *)addr >> 020, output_FILE);
+ putc (*(unsigned long *)addr >> 030, output_FILE);
+ break;
+ case 3:
+ putc (*(unsigned long *)addr, output_FILE);
+ putc (*(unsigned long *)addr >> 010, output_FILE);
+ putc (*(unsigned long *)addr >> 020, output_FILE);
+ break;
+ case 2:
+ putc (*(unsigned short *)addr, output_FILE);
+ putc (*(unsigned short *)addr >> 010, output_FILE);
+ break;
+ case 1:
+ putc (*(unsigned char *)addr, output_FILE);
+ break;
+ default:
+ fprintf (stderr, _("unsupported size in io_write (): %d\n"), size);
+ abort ();
+ }
+ }
+ else if (io_type == IO_TYPE_STR) {
+ fputs (addr, output_FILE);
+ putc ('\0', output_FILE);
+ } else if (io_type == IO_TYPE_FIX)
+ fwrite (addr, size, 1, output_FILE);
+ else
+ error (0, 0, _("unknown I/O type: %d"), io_type);
+ return size;
+}
+
+/* The sizes of the fields must be hard-coded. They aren't
+ necessarily the sizes of the struct members, because some
+ architectures don't have any way to declare 4-byte integers
+ (e.g., Cray) */
+
+static int
+io_idhead (FILE *fp, iof_t iof, struct idhead *idh)
+{
+ unsigned int size = 0;
+ unsigned char pad = 0;
+ if (fp)
+ fseek (fp, 0L, 0);
+ size += iof (fp, idh->idh_magic, 2, IO_TYPE_FIX);
+ size += iof (fp, &pad, 1, IO_TYPE_FIX);
+ size += iof (fp, &idh->idh_version, 1, IO_TYPE_FIX);
+ size += iof (fp, &idh->idh_flags, 2, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_file_links, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_files, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_tokens, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_buf_size, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_vec_size, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_tokens_offset, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_flinks_offset, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_end_offset, 4, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_max_link, 2, IO_TYPE_INT);
+ size += iof (fp, &idh->idh_max_path, 2, IO_TYPE_INT);
+ return size;
+}
diff --git a/lib/idfile.h b/lib/idfile.h
new file mode 100644
index 0000000..c2cd0a0
--- /dev/null
+++ b/lib/idfile.h
@@ -0,0 +1,158 @@
+/* idfile.h -- decls for ID file header and constituent file names
+ Copyright (C) 1986, 1995 Greg McGary
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef _idfile_h_
+#define _idfile_h_ 1
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <obstack.h>
+#include "hash.h"
+
+/* The ID file header is the nexus of all ID file information. This
+ is an in-core structure, only some of which is read/written to disk. */
+
+struct idhead
+{
+ unsigned char idh_magic[2];
+#define IDH_MAGIC_0 ('I'|0x80)
+#define IDH_MAGIC_1 ('D'|0x80)
+ unsigned char idh_version;
+#define IDH_VERSION 4
+ unsigned short idh_flags;
+#define IDH_COUNTS (1<<0) /* include occurrence counts for each token */
+#define IDH_FOLLOW_SL (1<<1) /* follow symlinks to directories */
+#define IDH_COMMENTS (1<<2) /* include tokens found in comments */
+#define IDH_LOCALS (1<<3) /* include names of formal params & local vars */
+#define IDH_DECL_DEFN_USE (1<<4) /* include decl/defn/use info */
+#define IDH_L_R_VALUE (1<<5) /* include lvalue/rvalue info */
+#define IDH_CALL_ER_EE (1<<6) /* include caller/callee relationship info */
+ unsigned long idh_file_links; /* total # of file links */
+ unsigned long idh_files; /* total # of constituent source files */
+ unsigned long idh_tokens; /* total # of constituent tokens */
+ /* idh_*_size: max buffer-sizes for ID file reading programs */
+ unsigned long idh_buf_size; /* # of bytes in longest entry */
+ unsigned long idh_vec_size; /* # of hits in longest entry */
+ /* idh_*_offset: ID file offsets for start of various sections */
+ long idh_tokens_offset; /* constituent tokens section */
+ long idh_flinks_offset; /* constituent file & directory names section */
+ long idh_end_offset; /* end of tokens section */
+ unsigned short idh_max_link; /* longest file name component */
+ unsigned short idh_max_path; /* largest # of file name components */
+
+ /* The following are run-time variables and are not stored on disk */
+ struct hash_table idh_member_file_table;
+ struct hash_table idh_file_link_table;
+#if HAVE_LINK
+ struct hash_table idh_dev_ino_table; /* for detecting file name aliases */
+#endif
+ struct obstack idh_member_file_obstack;
+ struct obstack idh_file_link_obstack;
+#if HAVE_LINK
+ struct obstack idh_dev_ino_obstack;
+#endif
+ char *idh_file_name;
+ FILE *idh_FILE;
+#if 0
+ time_t idh_mod_time;
+ struct arg_file **idh_file_order; /* sequence in ID file */
+ struct arg_file **idh_scan_order; /* sequence in summaries */
+#endif
+};
+
+/* A file_link represents a single component (file or directory) in a
+ file name. It has a name, a parent file_link and some flags. */
+
+struct file_link
+{
+ union {
+ struct file_link *u_parent;
+#define fl_parent fl_u.u_parent
+ unsigned long u_index;
+#define fl_index fl_u.u_index
+#define FL_PARENT_INDEX_BYTES 3
+#define IS_ROOT_FILE_LINK(flink) ((flink)->fl_parent == (flink))
+ } fl_u;
+ unsigned char fl_flags;
+#define FL_CMD_LINE_ARG (1<<0)
+#define FL_USED (1<<1)
+#define FL_MEMBER (1<<2) /* has a corresponding member_file entry */
+#define FL_SCAN_ME (1<<3)
+#define FL_SYM_LINK (1<<4)
+#define FL_TYPE_MASK (FL_TYPE_DIR|FL_TYPE_FILE)
+# define FL_TYPE_DIR (1<<5)
+# define FL_IS_DIR(_f_) (((_f_) & FL_TYPE_MASK) == FL_TYPE_DIR)
+# define FL_TYPE_FILE (1<<6)
+# define FL_IS_FILE(_f_) (((_f_) & FL_TYPE_MASK) == FL_TYPE_FILE)
+ char fl_name[1];
+};
+
+/* A member_file represents a source file that is treated by mkid. */
+
+struct member_file
+{
+ struct file_link *mf_link;
+ struct lang_args const *mf_lang_args;
+time_t mf_modify_time;
+time_t mf_access_time;
+ short mf_index; /* order in ID file */
+};
+
+#if HAVE_LINK
+
+/* On systems that support multiple names for a single file (via hard
+ and/or soft links), dev_ino records information needed to detect
+ such aliasing. */
+
+struct dev_ino
+{
+ dev_t di_dev;
+ ino_t di_ino;
+ struct file_link *di_link;
+};
+
+extern struct hash_table dev_ino_table;
+
+#endif
+
+extern struct idhead idh;
+
+extern struct file_link **read_id_file __P((char const *id_file_name, struct idhead *idhp));
+extern struct file_link **maybe_read_id_file __P((char const *id_file_name, struct idhead *idhp));
+extern int read_idhead __P((struct idhead *idhp));
+extern int write_idhead __P((struct idhead *idhp));
+extern int sizeof_idhead __P((void));
+extern void init_idh_obstacks __P((struct idhead *idhp));
+extern void init_idh_tables __P((struct idhead *idhp));
+
+#define IO_TYPE_INT 0 /* integer */
+#define IO_TYPE_STR 1 /* NUL terminated string */
+#define IO_TYPE_FIX 2 /* fix-sized */
+
+extern int io_write __P((FILE *output_FILE, void *addr, unsigned int size, int io_type));
+extern int io_read __P((FILE *input_FILE, void *addr, unsigned int size, int io_type));
+extern struct file_link *get_current_dir_link __P((void));
+
+extern struct file_link **deserialize_file_links __P((struct idhead *idhp));
+extern void mark_member_file_links __P((struct idhead *idhp));
+extern int member_file_qsort_compare __P((void const *x, void const *y));
+extern void serialize_file_links __P((struct idhead *idhp));
+extern struct file_link *parse_file_name __P((char *file_name,
+ struct file_link *relative_dir_link));
+
+#endif /* not _idfile_h_ */
diff --git a/lib/idwalk.c b/lib/idwalk.c
new file mode 100644
index 0000000..77aacf2
--- /dev/null
+++ b/lib/idwalk.c
@@ -0,0 +1,1189 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#ifndef FNM_FILE_NAME
+#define FNM_FILE_NAME FNM_PATHNAME
+#endif
+
+#define DEBUG(args) /* printf args */
+
+#include <config.h>
+#include "system.h"
+#include "idfile.h"
+#include "error.h"
+#include "alloc.h"
+#include "dynvec.h"
+#include "strxtra.h"
+#include "scanners.h"
+#include "pathmax.h"
+
+int walk_dir __P((struct file_link *dir_link));
+void walk_flink __P((struct file_link *flink, struct dynvec *sub_dirs_vec));
+struct member_file *get_member_file __P((struct file_link *flink));
+struct lang_args *get_lang_args __P((struct file_link const *flink));
+int walk_sub_dirs __P((struct dynvec *sub_dirs_vec));
+int classify_link __P((struct file_link *flink, struct stat *st));
+struct file_link *get_link_from_dirent __P((struct dirent *dirent, struct file_link *parent));
+struct file_link *make_link_from_dirent __P((struct dirent *dirent, struct file_link *parent));
+struct file_link *get_link_from_string __P((char const *name, struct file_link *parent));
+struct file_link *make_link_from_string __P((char const *name, struct file_link *parent));
+static int same_as_dot __P((char const *cwd));
+int chdir_to_link __P((struct file_link* dir_link));
+struct file_link const **fill_link_vector __P((struct file_link const **vec_buf, struct file_link const *flink));
+struct file_link const **fill_link_vector_1 __P((struct file_link const **vec_buf, struct file_link const *flink));
+char const *maybe_relative_path __P((char *buffer, struct file_link const *to_link, struct file_link const *from_link));
+char *fill_dot_dots __P((char *buf, int levels));
+char *absolute_path __P((char *buffer, struct file_link const *flink));
+static char *absolute_path_1 __P((char *buffer, struct file_link const *flink));
+unsigned long member_file_hash_1 __P((void const *key));
+unsigned long member_file_hash_2 __P((void const *key));
+int member_file_hash_compare __P((void const *x, void const *y));
+unsigned long file_link_hash_1 __P((void const *key));
+unsigned long file_link_hash_2 __P((void const *key));
+int file_link_hash_compare __P((void const *x, void const *y));
+int file_link_qsort_compare __P((void const *x, void const *y));
+int links_depth __P((struct file_link const *flink));
+unsigned long dev_ino_hash_1 __P((void const *key));
+unsigned long dev_ino_hash_2 __P((void const *key));
+int dev_ino_hash_compare __P((void const *x, void const *y));
+int symlink_ancestry __P((struct file_link *flink));
+
+/* Interpret `HAVE_LINK' as meaning `UN*X style' directory structure
+ (e.g., A single root called `/', with `/' separating links), and
+ !HAVE_LINK as `DOS|OS/2|Windows style' (e.g., Multiple root volues
+ named `x:', with `\' separating links). */
+
+#if HAVE_LINK
+struct file_link *find_alias_link __P((struct file_link *flink, struct stat *st));
+struct member_file *maybe_get_member_file __P((struct file_link *flink, struct stat *st));
+struct member_file *find_member_file __P((struct file_link const *flink));
+# define IS_ABSOLUTE(_dir_) ((_dir_)[0] == '/')
+# define SLASH_STRING "/"
+# define SLASH_CHAR '/'
+# define DOT_DOT_SLASH "../"
+# define MAYBE_FNM_CASEFOLD 0
+#else
+/* NEEDSWORK: prefer forward-slashes as a user-configurable option. */
+# define IS_ABSOLUTE(_dir_) ((_dir_)[1] == ':')
+# define SLASH_STRING "\\/"
+# define SLASH_CHAR '\\'
+# define DOT_DOT_SLASH "..\\"
+# define MAYBE_FNM_CASEFOLD FNM_CASEFOLD
+#endif
+
+#define IS_DOT(s) ((s)[0] == '.' && (s)[1] == '\0')
+#define IS_DOT_DOT(s) ((s)[0] == '.' && (s)[1] == '.' && (s)[2] == '\0')
+#define IS_DOT_or_DOT_DOT(s) \
+ (((s)[0] == '.') && (((s)[1] == '\0') || ((s)[1] == '.' && (s)[2] == '\0')))
+
+static struct file_link *current_dir_link = 0;
+
+char* xgetcwd __P((void));
+
+/****************************************************************************/
+/* Walk the file-system tree rooted at `dir_link', looking for files
+ that are eligible for scanning. */
+
+int
+walk_dir (struct file_link *dir_link)
+{
+ char buf[PATH_MAX];
+ int scannable_files;
+ struct dynvec *sub_dirs_vec;
+ DIR *dirp;
+
+ if (!chdir_to_link (dir_link))
+ return 0;
+ dirp = opendir (".");
+ if (dirp == 0)
+ {
+ error (0, errno, _("can't read directory `%s' (`.' from `%s')"),
+ absolute_path (buf, dir_link), xgetcwd ());
+ return 0;
+ }
+ sub_dirs_vec = make_dynvec (32);
+ scannable_files = 0;
+ for (;;)
+ {
+ struct file_link *flink;
+ struct dirent *dirent = readdir (dirp);
+
+ if (dirent == 0)
+ break;
+ if (IS_DOT_or_DOT_DOT (dirent->d_name))
+ continue;
+
+ flink = get_link_from_dirent (dirent, dir_link);
+ walk_flink (flink, sub_dirs_vec);
+ }
+ closedir (dirp);
+
+ scannable_files += walk_sub_dirs (sub_dirs_vec);
+ dynvec_free (sub_dirs_vec);
+ return scannable_files;
+}
+
+/* Walk the directories found by walk_dir, calling walk_dir
+ recursively for each directory. */
+
+int
+walk_sub_dirs (struct dynvec *sub_dirs_vec)
+{
+ struct file_link **sub_dirs;
+ struct file_link **sub_dirs_end;
+ int total_scannable_files = 0;
+
+ dynvec_freeze (sub_dirs_vec);
+ sub_dirs_end = (struct file_link **)
+ &sub_dirs_vec->dv_vec[sub_dirs_vec->dv_fill];
+ sub_dirs = (struct file_link **) sub_dirs_vec->dv_vec;
+ for ( ; sub_dirs < sub_dirs_end; sub_dirs++)
+ {
+ struct file_link *sub_dir_link = *sub_dirs;
+ int scannable_files = walk_dir (sub_dir_link);
+ if (scannable_files)
+ total_scannable_files += scannable_files;
+ }
+ return total_scannable_files;
+}
+
+void
+walk_flink (struct file_link *flink, struct dynvec *sub_dirs_vec)
+{
+ char buf[PATH_MAX];
+ struct stat st;
+ unsigned int old_flags;
+ unsigned int new_flags;
+
+ new_flags = classify_link (flink, &st);
+ if (new_flags == 0)
+ return;
+
+ old_flags = flink->fl_flags;
+ if ((old_flags & FL_TYPE_MASK)
+ && (old_flags & FL_TYPE_MASK) != (new_flags & FL_TYPE_MASK))
+ error (0, 0, _("notice: `%s' was a %s, but is now a %s!"),
+ absolute_path (buf, flink),
+ (FL_IS_FILE (old_flags) ? _("file") : _("directory")),
+ (FL_IS_FILE (new_flags) ? _("file") : _("directory")));
+
+ flink->fl_flags = (old_flags & ~(FL_TYPE_MASK|FL_SYM_LINK)) | new_flags;
+ if (FL_IS_DIR (new_flags))
+ {
+ if (sub_dirs_vec == 0)
+ walk_dir (flink);
+ else if (!(new_flags & FL_SYM_LINK)) /* NEEDSWORK: optinally ignore? */
+ dynvec_append (sub_dirs_vec, flink);
+ }
+ else
+ {
+ struct member_file *member;
+#if HAVE_LINK
+ member = maybe_get_member_file (flink, &st);
+#else
+ member = get_member_file (flink);
+#endif
+ if (member == 0)
+ return;
+#if 0
+ member->mf_modify_time = st.st_mtime;
+ member->mf_access_time = st.st_atime;
+ if (member->mf_old_index < 0 || st.st_mtime > idh.idh_mod_time)
+ member->mf_scan_index = 0;
+#endif
+ }
+}
+
+/****************************************************************************/
+/* Serialize and write a file_link hierarchy. */
+
+void
+serialize_file_links (struct idhead *idhp)
+{
+ struct file_link **flinks_0;
+ struct file_link **flinks;
+ struct file_link **end;
+ struct file_link **parents_0;
+ struct file_link **parents;
+ unsigned long parent_index = 0;
+
+ flinks_0 = (struct file_link **) hash_dump (&idhp->idh_file_link_table,
+ 0, file_link_qsort_compare);
+ end = &flinks_0[idhp->idh_file_link_table.ht_fill];
+ parents = parents_0 = MALLOC (struct file_link *, idhp->idh_file_link_table.ht_fill);
+ for (flinks = flinks_0; flinks < end; flinks++)
+ {
+ struct file_link *flink = *flinks;
+ if (!(flink->fl_flags & FL_USED))
+ break;
+ io_write (idhp->idh_FILE, flink->fl_name, 0, IO_TYPE_STR);
+ io_write (idhp->idh_FILE, &flink->fl_flags, sizeof (flink->fl_flags), IO_TYPE_INT);
+ io_write (idhp->idh_FILE, (IS_ROOT_FILE_LINK (flink)
+ ? &parent_index : &flink->fl_parent->fl_index),
+ FL_PARENT_INDEX_BYTES, IO_TYPE_INT);
+ *parents++ = flink->fl_parent; /* save parent link before clobbering */
+ flink->fl_index = parent_index++;
+ }
+ /* restore parent links */
+ for ((flinks = flinks_0), (parents = parents_0); flinks < end; flinks++)
+ {
+ struct file_link *flink = *flinks;
+ if (!(flink->fl_flags & FL_USED))
+ break;
+ flink->fl_parent = *parents++;
+ }
+ free (parents_0);
+ free (flinks_0);
+ idhp->idh_file_links = parent_index;
+ idhp->idh_files = idhp->idh_member_file_table.ht_fill;
+}
+
+/* Separate the wheat from the chaff. Mark those file_links that are
+ components in member files. */
+
+void
+mark_member_file_links (struct idhead *idhp)
+{
+ struct member_file **members_0
+ = (struct member_file **) hash_dump (&idhp->idh_member_file_table,
+ 0, member_file_qsort_compare);
+ struct member_file **end = &members_0[idhp->idh_member_file_table.ht_fill];
+ struct member_file **members;
+ int new_index = 0;
+
+ for (members = members_0; members < end; members++)
+ {
+ struct member_file *member = *members;
+ struct file_link *flink;
+ member->mf_index = new_index++;
+ for (flink = member->mf_link;
+ !(flink->fl_flags & FL_USED); flink = flink->fl_parent)
+ flink->fl_flags |= FL_USED;
+ }
+ free (members_0);
+}
+
+/* Read and reconstruct a serialized file_link hierarchy. */
+
+struct file_link **
+deserialize_file_links (struct idhead *idhp)
+{
+ struct file_link **flinks_0 = MALLOC (struct file_link *, idhp->idh_file_links);
+ struct file_link **flinks = flinks_0;
+ struct file_link **members_0 = MALLOC (struct file_link *, idhp->idh_files + 1);
+ struct file_link **members = members_0;
+ struct file_link *flink;
+ struct file_link **slot;
+ int i;
+
+ for (i = 0; i < idhp->idh_file_links; i++)
+ {
+ unsigned long parent_index;
+ int c;
+
+ obstack_blank (&idhp->idh_file_link_obstack, offsetof (struct file_link, fl_name));
+ if (obstack_room (&idhp->idh_file_link_obstack) >= idhp->idh_max_link)
+ do
+ {
+ c = getc (idhp->idh_FILE);
+ obstack_1grow_fast (&idhp->idh_file_link_obstack, c);
+ }
+ while (c);
+ else
+ do
+ {
+ c = getc (idhp->idh_FILE);
+ obstack_1grow (&idhp->idh_file_link_obstack, c);
+ }
+ while (c);
+ flink = (struct file_link *) obstack_finish (&idhp->idh_file_link_obstack);
+ *flinks = flink;
+ io_read (idhp->idh_FILE, &flink->fl_flags, sizeof (flink->fl_flags), IO_TYPE_INT);
+ io_read (idhp->idh_FILE, &parent_index, FL_PARENT_INDEX_BYTES, IO_TYPE_INT);
+ flink->fl_parent = flinks_0[parent_index];
+ slot = (struct file_link **) hash_find_slot (&idhp->idh_file_link_table, flink);
+ if (HASH_VACANT (*slot))
+ hash_insert_at (&idhp->idh_file_link_table, flink, slot);
+ else
+ {
+ obstack_free (&idhp->idh_file_link_obstack, flink);
+ (*slot)->fl_flags = flink->fl_flags;
+ flink = *flinks = *slot;
+ }
+ flinks++;
+ if (flink->fl_flags & FL_MEMBER)
+ *members++ = flink;
+ }
+ free (flinks_0);
+ *members = 0;
+ return members_0;
+}
+
+
+#if HAVE_LINK
+
+/****************************************************************************/
+/* Return a `member_file' for this `flink' *if* the filename matches
+ some scan pattern, and no alias for the file takes precedence ([1]
+ hard-links dominate symbolic-links; [2] for two hard-links: first
+ come, first served). */
+
+struct member_file *
+maybe_get_member_file (struct file_link *flink, struct stat *st)
+{
+ char buf[PATH_MAX];
+ struct file_link *alias_link;
+ struct member_file *member;
+ struct member_file *alias_member = 0;
+
+ member = get_member_file (flink);
+ alias_link = find_alias_link (flink, st);
+ if (alias_link)
+ alias_member = find_member_file (alias_link);
+
+ if (member && alias_member)
+ {
+ char alias_buf[PATH_MAX];
+ int ancestry = symlink_ancestry (flink);
+ int alias_ancestry = symlink_ancestry (alias_link);
+ if (member->mf_lang_args != alias_member->mf_lang_args)
+ error (0, 0, _("warning: `%s' and `%s' are the same file, but yield different scans!"),
+ absolute_path (buf, flink), absolute_path (alias_buf, alias_link));
+ else if (alias_ancestry > ancestry)
+ {
+ hash_delete (&idh.idh_member_file_table, member);
+ member->mf_link->fl_flags &= ~FL_MEMBER;
+ return 0;
+ }
+ else
+ {
+ hash_delete (&idh.idh_member_file_table, alias_member);
+ alias_member->mf_link->fl_flags &= ~FL_MEMBER;
+ }
+ }
+ return member;
+}
+
+/* Return a previously registered alias for `flink', if any. */
+
+struct file_link *
+find_alias_link (struct file_link *flink, struct stat *st)
+{
+ struct dev_ino *dev_ino;
+ struct dev_ino **slot;
+
+ dev_ino = (struct dev_ino *) obstack_alloc (&idh.idh_dev_ino_obstack, sizeof (struct dev_ino));
+ dev_ino->di_dev = st->st_dev;
+ dev_ino->di_ino = st->st_ino;
+ slot = (struct dev_ino **) hash_find_slot (&idh.idh_dev_ino_table, dev_ino);
+ if (HASH_VACANT (*slot))
+ {
+ dev_ino->di_link = flink;
+ hash_insert_at (&idh.idh_dev_ino_table, dev_ino, slot);
+ return 0;
+ }
+ else
+ {
+ obstack_free (&idh.idh_dev_ino_obstack, dev_ino);
+ return (*slot)->di_link;
+ }
+}
+
+/* Return the distance from `flink' to a symbolic-link ancestor
+ directory. PATH_MAX is considered an infinite distance (e.g.,
+ there are no symlinks between `flink' and the root). */
+
+int
+symlink_ancestry (struct file_link *flink)
+{
+ int ancestry = 0;
+ while (!IS_ROOT_FILE_LINK (flink))
+ {
+ if (flink->fl_flags & FL_SYM_LINK)
+ return ancestry;
+ ancestry++;
+ flink = flink->fl_parent;
+ }
+ return PATH_MAX;
+}
+
+#endif /* HAVE_LINK */
+
+struct member_file *
+get_member_file (struct file_link *flink)
+{
+ char buf[PATH_MAX];
+ struct member_file *member;
+ struct member_file **slot;
+ struct lang_args const *args;
+
+ args = get_lang_args (flink);
+ if (args == 0)
+ {
+ DEBUG (("%s <IGNORE>\n", absolute_path (buf, flink)));
+ return 0;
+ }
+ DEBUG (("%s <%s> <%s>\n", absolute_path (buf, flink),
+ args->la_language->lg_name, (args->la_args_string
+ ? args->la_args_string : "")));
+
+ member = (struct member_file *) obstack_alloc (&idh.idh_member_file_obstack,
+ sizeof (struct member_file));
+ member->mf_link = flink;
+ slot = (struct member_file **) hash_find_slot (&idh.idh_member_file_table, member);
+ if (HASH_VACANT (*slot))
+ {
+ member->mf_index = -1;
+ hash_insert_at (&idh.idh_member_file_table, member, slot);
+ flink->fl_flags |= FL_MEMBER;
+ }
+ else
+ {
+ obstack_free (&idh.idh_member_file_obstack, member);
+#if 0
+ if (member->mf_lang_args != args)
+ {
+ error (0, 0, _("notice: scan parameters changed for `%s'"),
+ absolute_path (buf, flink));
+ member->mf_old_index = -1;
+ }
+#endif
+ member = *slot;
+ }
+ member->mf_lang_args = args;
+ return *slot;
+}
+
+struct member_file *
+find_member_file (struct file_link const *flink)
+{
+ struct member_file key;
+ struct member_file **slot;
+
+ key.mf_link = (struct file_link *) flink;
+ slot = (struct member_file **) hash_find_slot (&idh.idh_member_file_table, &key);
+ if (HASH_VACANT (*slot))
+ return 0;
+ return *slot;
+}
+
+/* March down the list of lang_args, and return the first one whose
+ pattern matches FLINK. Return the matching lang_args, if a
+ scanner exists for that language, otherwise return 0. */
+
+struct lang_args *
+get_lang_args (struct file_link const *flink)
+{
+ struct lang_args *args = lang_args_list;
+
+ while (args)
+ {
+ if (strchr (args->la_pattern, SLASH_CHAR))
+ {
+ char buf[PATH_MAX];
+ absolute_path (buf, flink);
+ if (fnmatch (args->la_pattern, buf, MAYBE_FNM_CASEFOLD | FNM_FILE_NAME) == 0)
+ return (args->la_language ? args : 0);
+ }
+ else
+ {
+ if (fnmatch (args->la_pattern, flink->fl_name, MAYBE_FNM_CASEFOLD) == 0)
+ return (args->la_language ? args : 0);
+ }
+ args = args->la_next;
+ }
+ return (lang_args_default->la_language ? lang_args_default : 0);
+}
+
+/****************************************************************************/
+/* Convert a file name string to an absolute chain of `file_link's. */
+
+struct file_link *
+parse_file_name (char *file_name, struct file_link *relative_dir_link)
+{
+ struct file_link *flink;
+
+ if (IS_ABSOLUTE (file_name))
+ {
+#if HAVE_LINK
+ flink = get_link_from_string (SLASH_STRING, 0);
+#else
+ flink = 0;
+#endif
+ }
+ else if (relative_dir_link)
+ flink = relative_dir_link;
+ else if (current_dir_link)
+ flink = current_dir_link;
+ else
+ flink = get_current_dir_link ();
+
+ for (;;)
+ {
+ char const* link_name = strtok (file_name, SLASH_STRING);
+ if (link_name == 0)
+ break;
+ file_name = 0;
+ if (*link_name == '\0' || IS_DOT (link_name))
+ ;
+ else if (IS_DOT_DOT (link_name))
+ flink = flink->fl_parent;
+ else
+ {
+ struct stat st;
+ flink = get_link_from_string (link_name, flink);
+ if (!flink->fl_flags)
+ flink->fl_flags = classify_link (flink, &st);
+ }
+ }
+ return flink;
+}
+
+/* Return an absolute chain of `file_link's representing the current
+ working directory. */
+
+struct file_link *
+get_current_dir_link (void)
+{
+ struct file_link *dir_link;
+ char *cwd_0;
+ char *cwd;
+ char *xcwd = 0;
+
+ if (current_dir_link)
+ return current_dir_link;
+
+ cwd_0 = getenv ("PWD");
+ if (cwd_0)
+ cwd_0 = strdup (cwd_0);
+ if (!same_as_dot (cwd_0))
+ cwd_0 = xcwd = xgetcwd ();
+ if (cwd_0 == 0)
+ error (1, errno, _("can't get working directory"));
+ cwd = cwd_0;
+#if HAVE_LINK
+ dir_link = get_link_from_string (SLASH_STRING, 0);
+ dir_link->fl_flags = (dir_link->fl_flags & ~FL_TYPE_MASK) | FL_TYPE_DIR;
+#else
+ dir_link = 0;
+#endif
+ for (;;)
+ {
+ struct stat st;
+ char const* link_name = strtok (cwd, SLASH_STRING);
+ if (link_name == 0)
+ break;
+ cwd = 0;
+ dir_link = get_link_from_string (link_name, dir_link);
+ if (!dir_link->fl_flags)
+ dir_link->fl_flags = classify_link (dir_link, &st);
+ }
+ chdir_to_link (dir_link);
+ if (xcwd)
+ free (xcwd);
+ current_dir_link = dir_link;
+ return dir_link;
+}
+
+static int
+same_as_dot (char const *cwd)
+{
+ struct stat cwd_st;
+ struct stat dot_st;
+
+ if (cwd == 0 || *cwd != '/'
+ || stat (cwd, &cwd_st) < 0
+ || stat (".", &dot_st) < 0)
+ return 0;
+ return ((cwd_st.st_ino == dot_st.st_ino) && (cwd_st.st_dev == dot_st.st_dev));
+}
+
+/* Change the working directory to the directory represented by
+ `dir_link'. Choose the shortest path to the destination based on
+ the cached value of the current directory. */
+
+int
+chdir_to_link (struct file_link *dir_link)
+{
+ char to_buf[PATH_MAX];
+ char from_buf[PATH_MAX];
+
+ if (current_dir_link == dir_link)
+ return 1;
+
+ if (current_dir_link)
+ maybe_relative_path (to_buf, dir_link, current_dir_link);
+ else
+ absolute_path (to_buf, dir_link);
+ if (chdir (to_buf) < 0)
+ {
+ if (IS_ABSOLUTE (to_buf))
+ error (0, errno, _("can't chdir to `%s'"), to_buf);
+ else
+ error (0, errno, _("can't chdir to `%s' from `%s'"), to_buf,
+ absolute_path (from_buf, current_dir_link));
+ return 0;
+ }
+ else
+ {
+ current_dir_link = dir_link;
+ return 1;
+ }
+}
+
+/****************************************************************************/
+/* Gather information about the link at `flink'. If it's a good
+ file or directory, return its mod-time and type. */
+
+int
+classify_link (struct file_link *flink, struct stat *st)
+{
+ char buf[PATH_MAX];
+ unsigned int flags = 0;
+
+ if (!chdir_to_link (flink->fl_parent))
+ return 0;
+
+#ifdef S_IFLNK
+ if (lstat (flink->fl_name, st) < 0)
+ {
+ error (0, errno, _("can't lstat `%s' from `%s'"), flink->fl_name, xgetcwd ());
+ return 0;
+ }
+ if (S_ISLNK (st->st_mode))
+ {
+#endif
+ if (stat (flink->fl_name, st) < 0)
+ {
+ error (0, errno, _("can't stat `%s' from `%s'"), flink->fl_name, xgetcwd ());
+ return 0;
+ }
+#ifdef S_IFLNK
+ flags |= FL_SYM_LINK;
+ }
+#endif
+ if (S_ISDIR (st->st_mode))
+ flags |= FL_TYPE_DIR;
+ else if (S_ISREG (st->st_mode))
+ flags |= FL_TYPE_FILE;
+ else
+ return 0;
+ return flags;
+}
+
+/****************************************************************************/
+/* Retrieve an existing flink; or if none exists, create one. */
+
+struct file_link *
+get_link_from_dirent (struct dirent *dirent, struct file_link *parent)
+{
+ struct file_link **slot;
+ struct file_link *new_link;
+
+ new_link = make_link_from_dirent (dirent, parent);
+ slot = (struct file_link **) hash_find_slot (&idh.idh_file_link_table, new_link);
+ if (HASH_VACANT (*slot))
+ hash_insert_at (&idh.idh_file_link_table, new_link, slot);
+ else
+ obstack_free (&idh.idh_file_link_obstack, new_link);
+ return *slot;
+}
+
+struct file_link *
+get_link_from_string (char const *name, struct file_link *parent)
+{
+ struct file_link **slot;
+ struct file_link *new_link;
+
+ new_link = make_link_from_string (name, parent);
+ slot = (struct file_link **) hash_find_slot (&idh.idh_file_link_table, new_link);
+ if (HASH_VACANT (*slot))
+ hash_insert_at (&idh.idh_file_link_table, new_link, slot);
+ else
+ obstack_free (&idh.idh_file_link_obstack, new_link);
+ return *slot;
+}
+
+struct file_link *
+make_link_from_dirent (struct dirent* dirent, struct file_link *parent)
+{
+ struct file_link *flink;
+
+ flink = (struct file_link *) obstack_alloc (&idh.idh_file_link_obstack,
+ sizeof (struct file_link) + strlen (dirent->d_name));
+ strcpy (flink->fl_name, dirent->d_name);
+ flink->fl_parent = parent ? parent : flink;
+ flink->fl_flags = 0;
+
+ return flink;
+}
+
+struct file_link *
+make_link_from_string (char const* name, struct file_link *parent)
+{
+ struct file_link *flink;
+
+ flink = (struct file_link *) obstack_alloc (&idh.idh_file_link_obstack,
+ sizeof (struct file_link) + strlen (name));
+ strcpy (flink->fl_name, name);
+ flink->fl_parent = parent ? parent : flink;
+ flink->fl_flags = 0;
+
+ return flink;
+}
+
+/****************************************************************************/
+/* Convert a `file_link' chain to a vector of component `file_link's,
+ with the root at [0]. Return a pointer beyond the final component. */
+
+struct file_link const **
+fill_link_vector (struct file_link const **vec_buf, struct file_link const *flink)
+{
+ vec_buf = fill_link_vector_1 (vec_buf, flink);
+ *vec_buf = 0;
+ return vec_buf;
+}
+
+struct file_link const **
+fill_link_vector_1 (struct file_link const **vec_buf, struct file_link const *flink)
+{
+ if (!IS_ROOT_FILE_LINK (flink))
+ vec_buf = fill_link_vector_1 (vec_buf, flink->fl_parent);
+ *vec_buf++ = flink;
+ return vec_buf;
+}
+
+/****************************************************************************/
+/* Fill BUF_0 with a path to TO_LINK. If a relative path from
+ FROM_LINK is possible (i.e., no intervening symbolic-links) and
+ shorter, return the relative path; otherwise, return an absolute
+ path. */
+
+char const *
+maybe_relative_path (char *buf_0, struct file_link const *to_link, struct file_link const *from_link)
+{
+ struct file_link const *to_link_vec_0[PATH_MAX/2];
+ struct file_link const *from_link_vec_0[PATH_MAX/2];
+ struct file_link const **to_link_vec = to_link_vec_0;
+ struct file_link const **from_link_vec = from_link_vec_0;
+ struct file_link const **from_link_end;
+ struct file_link const **from_links;
+ char *buf;
+ int levels;
+
+ if (from_link == 0)
+ from_link = current_dir_link;
+
+ /* Optimize common cases. */
+ if (to_link == from_link)
+ return strcpy (buf_0, ".");
+ else if (to_link->fl_parent == from_link)
+ return strcpy (buf_0, to_link->fl_name);
+ else if (from_link->fl_flags & FL_SYM_LINK)
+ return absolute_path (buf_0, to_link);
+ else if (to_link == from_link->fl_parent)
+ return strcpy (buf_0, "..");
+ else if (to_link->fl_parent == from_link->fl_parent)
+ {
+ strcpy (buf_0, DOT_DOT_SLASH);
+ strcpy (&buf_0[3], to_link->fl_name);
+ return buf_0;
+ }
+
+ from_link_end = fill_link_vector (from_link_vec, from_link);
+ fill_link_vector (to_link_vec, to_link);
+ while (*to_link_vec == *from_link_vec)
+ {
+ if (*to_link_vec == 0)
+ return ".";
+ to_link_vec++;
+ from_link_vec++;
+ }
+ levels = from_link_end - from_link_vec;
+ if (levels >= (from_link_vec - from_link_vec_0))
+ return absolute_path (buf_0, to_link);
+ for (from_links = from_link_vec; *from_links; from_links++)
+ if ((*from_links)->fl_flags & FL_SYM_LINK)
+ return absolute_path (buf_0, to_link);
+ buf = fill_dot_dots (buf_0, levels);
+ while (*to_link_vec)
+ {
+ strcpy (buf, (*to_link_vec)->fl_name);
+ buf += strlen (buf);
+ if ((*to_link_vec)->fl_name[0] != SLASH_CHAR && *++to_link_vec)
+ *buf++ = SLASH_CHAR;
+ }
+ return buf_0;
+}
+
+/* Fill `buf' with sequences of "../" in order to ascend so many
+ `levels' in a directory tree. */
+
+char *
+fill_dot_dots (char *buf, int levels)
+{
+ while (levels--)
+ {
+ strcpy (buf, DOT_DOT_SLASH);
+ buf += 3;
+ }
+ return buf;
+}
+
+/****************************************************************************/
+/* Fill `buffer' with the absolute path to `flink'. */
+
+char *
+absolute_path (char *buffer, struct file_link const *flink)
+{
+ char *end = absolute_path_1 (buffer, flink);
+ /* Clip the trailing slash. */
+#if HAVE_LINK
+ if (end > &buffer[1])
+ end--;
+#else
+ if (end > &buffer[3])
+ end--;
+#endif
+ *end = '\0';
+ return buffer;
+}
+
+static char *
+absolute_path_1 (char *buffer, struct file_link const *flink)
+{
+ char *end;
+ if (IS_ROOT_FILE_LINK (flink))
+ end = buffer;
+ else
+ end = absolute_path_1 (buffer, flink->fl_parent);
+ strcpy (end, flink->fl_name);
+ if (*end == SLASH_CHAR)
+ end++;
+ else
+ {
+ end += strlen (end);
+ *end++ = SLASH_CHAR;
+ }
+ return end;
+}
+
+/****************************************************************************/
+/* Hash stuff for `struct member_file'. */
+
+unsigned long
+member_file_hash_1 (void const *key)
+{
+ return_ADDRESS_HASH_1 (((struct member_file const *) key)->mf_link);
+}
+
+unsigned long
+member_file_hash_2 (void const *key)
+{
+ return_ADDRESS_HASH_2 (((struct member_file const *) key)->mf_link);
+}
+
+int
+member_file_hash_compare (void const *x, void const *y)
+{
+ return_ADDRESS_COMPARE (((struct member_file const *) x)->mf_link,
+ ((struct member_file const *) y)->mf_link);
+}
+
+/* Collating sequence:
+ - language.map index
+ - mf_link: breadth-first, alphabetical */
+
+int
+member_file_qsort_compare (void const *x, void const *y)
+{
+ struct member_file const *mfx = *(struct member_file const **) x;
+ struct member_file const *mfy = *(struct member_file const **) y;
+ int result;
+
+ INTEGER_COMPARE (mfx->mf_lang_args->la_index, mfy->mf_lang_args->la_index, result);
+ if (result)
+ return result;
+ else
+ {
+ struct file_link const *flx = mfx->mf_link;
+ struct file_link const *fly = mfy->mf_link;
+ if (flx->fl_parent == fly->fl_parent)
+ return strcmp (flx->fl_name, fly->fl_name);
+ result = (links_depth (flx) - links_depth (fly));
+ if (result)
+ return result;
+ while (flx->fl_parent != fly->fl_parent)
+ {
+ flx = flx->fl_parent;
+ fly = fly->fl_parent;
+ }
+ return strcmp (flx->fl_name, fly->fl_name);
+ }
+}
+
+/****************************************************************************/
+/* Hash stuff for `struct file_link'. */
+
+unsigned long
+file_link_hash_1 (void const *key)
+{
+ unsigned long result = 0;
+ struct file_link const *parent = (IS_ROOT_FILE_LINK (((struct file_link const *) key))
+ ? 0 : ((struct file_link const *) key)->fl_parent);
+ STRING_HASH_1 (((struct file_link const *) key)->fl_name, result);
+ ADDRESS_HASH_1 (parent, result);
+ return result;
+}
+
+unsigned long
+file_link_hash_2 (void const *key)
+{
+ unsigned long result = 0;
+ struct file_link const *parent = (IS_ROOT_FILE_LINK (((struct file_link const *) key))
+ ? 0 : ((struct file_link const *) key)->fl_parent);
+ STRING_HASH_2 (((struct file_link const *) key)->fl_name, result);
+ ADDRESS_HASH_2 (parent, result);
+ return result;
+}
+
+int
+file_link_hash_compare (void const *x, void const *y)
+{
+ int result;
+ struct file_link const *x_parent = (IS_ROOT_FILE_LINK (((struct file_link const *) x))
+ ? 0 : ((struct file_link const *) x)->fl_parent);
+ struct file_link const *y_parent = (IS_ROOT_FILE_LINK (((struct file_link const *) y))
+ ? 0 : ((struct file_link const *) y)->fl_parent);
+ ADDRESS_COMPARE (x_parent, y_parent, result);
+ if (result)
+ return result;
+ STRING_COMPARE (((struct file_link const *) x)->fl_name,
+ ((struct file_link const *) y)->fl_name, result);
+ return result;
+}
+
+/* Collation sequence:
+ - Used before unused.
+ - Among used: breadth-first (dirs before files, parent dirs before children)
+ - Among files: collate by mf_index. */
+
+int
+file_link_qsort_compare (void const *x, void const *y)
+{
+ struct file_link const *flx = *(struct file_link const **) x;
+ struct file_link const *fly = *(struct file_link const **) y;
+ unsigned int x_flags = flx->fl_flags;
+ unsigned int y_flags = fly->fl_flags;
+ int result;
+
+ result = (y_flags & FL_USED) - (x_flags & FL_USED);
+ if (result)
+ return result;
+ if (!(x_flags & FL_USED)) /* If neither link is used, we don't care... */
+ return 0;
+ result = (y_flags & FL_TYPE_DIR) - (x_flags & FL_TYPE_DIR);
+ if (result)
+ return result;
+ result = (y_flags & FL_TYPE_MASK) - (x_flags & FL_TYPE_MASK);
+ if (result)
+ return result;
+ if (FL_IS_FILE (x_flags))
+ {
+ struct member_file *x_member = find_member_file (flx);
+ struct member_file *y_member = find_member_file (fly);
+ return x_member->mf_index - y_member->mf_index;
+ }
+ else
+ {
+ int x_depth = links_depth (flx);
+ int y_depth = links_depth (fly);
+ return (x_depth - y_depth);
+ }
+}
+
+/* Count directory components between flink and its root. */
+
+int
+links_depth (struct file_link const *flink)
+{
+ int depth = 0;
+ while (!IS_ROOT_FILE_LINK (flink))
+ {
+ depth++;
+ flink = flink->fl_parent;
+ }
+ return depth;
+}
+
+#if HAVE_LINK
+
+/****************************************************************************/
+/* Hash stuff for `struct dev_ino'. */
+
+unsigned long
+dev_ino_hash_1 (void const *key)
+{
+ unsigned long result = 0;
+ INTEGER_HASH_1 (((struct dev_ino const *) key)->di_dev, result);
+ INTEGER_HASH_1 (((struct dev_ino const *) key)->di_ino, result);
+ return result;
+}
+
+unsigned long
+dev_ino_hash_2 (void const *key)
+{
+ unsigned long result = 0;
+ INTEGER_HASH_2 (((struct dev_ino const *) key)->di_dev, result);
+ INTEGER_HASH_2 (((struct dev_ino const *) key)->di_ino, result);
+ return result;
+}
+
+int
+dev_ino_hash_compare (void const *x, void const *y)
+{
+ int result;
+ INTEGER_COMPARE (((struct dev_ino const *) x)->di_ino,
+ ((struct dev_ino const *) y)->di_ino, result);
+ if (result)
+ return result;
+ INTEGER_COMPARE (((struct dev_ino const *) x)->di_dev,
+ ((struct dev_ino const *) y)->di_dev, result);
+ return result;
+}
+
+#endif
+
+/*******************************************************************/
+
+void
+init_idh_obstacks (struct idhead *idhp)
+{
+ obstack_init (&idhp->idh_member_file_obstack);
+ obstack_init (&idhp->idh_file_link_obstack);
+#if HAVE_LINK
+ obstack_init (&idhp->idh_dev_ino_obstack);
+#endif
+}
+
+void
+init_idh_tables (struct idhead *idhp)
+{
+ hash_init (&idhp->idh_member_file_table, 16*1024,
+ member_file_hash_1, member_file_hash_2, member_file_hash_compare);
+ hash_init (&idhp->idh_file_link_table, 16*1024,
+ file_link_hash_1, file_link_hash_2, file_link_hash_compare);
+#if HAVE_LINK
+ hash_init (&idhp->idh_dev_ino_table, 16*1024,
+ dev_ino_hash_1, dev_ino_hash_2, dev_ino_hash_compare);
+#endif
+}
+
+
+#if TEST_IDWALK
+/*******************************************************************/
+/* Test program. */
+
+char const *program_name;
+struct idhead idh;
+
+void print_member_file __P((void const *item));
+
+void
+print_member_file (void const *item)
+{
+ char buf[PATH_MAX];
+#define member ((struct member_file const *) item)
+#if 1
+ printf ("%s\n", maybe_relative_path (buf, member->mf_link, 0));
+#else
+ printf ("%ld %ld %s\n", member->mf_access_time, member->mf_modify_time,
+ maybe_relative_path (buf, member->mf_link, 0));
+#endif
+#undef member
+}
+
+void reset_walker (struct idhead *idhp);
+void print_hash_stats (FILE *stream, struct idhead *idhp);
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+void
+reset_walker (struct idhead *idhp)
+{
+ hash_delete_items (&idhp->idh_member_file_table);
+ hash_delete_items (&idhp->idh_file_link_table);
+#if HAVE_LINK
+ hash_delete_items (&idhp->idh_dev_ino_table);
+#endif
+}
+
+void
+print_hash_stats (FILE *stream, struct idhead *idhp)
+{
+ fprintf (stream, _("Link Table: ")); hash_print_stats (&idhp->idh_file_link_table, stream);
+ fprintf (stream, _("\nFile Table: ")); hash_print_stats (&idhp->idh_member_file_table, stream);
+#if HAVE_LINK
+ fprintf (stream, _("\nDupl Table: ")); hash_print_stats (&idhp->idh_dev_ino_table, stream);
+#endif
+ fputc ('\n', stream);
+}
+
+int
+main (int argc, char **argv)
+{
+ struct file_link *cwd_link;
+
+ program_name = ((argc--, *argv++));
+
+ init_idh_obstacks (&idh);
+ init_idh_tables (&idh);
+
+ parse_language_map (0);
+ cwd_link = get_current_dir_link ();
+ while (argc--)
+ walk_flink (parse_file_name (*argv++, cwd_link), 0);
+
+ chdir_to_link (cwd_link);
+
+#if 0
+ idh.idh_file_name = "idwalk.serial";
+ idh.idh_FILE = fopen (idh.idh_file_name, "w+");
+ if (idh.idh_FILE == 0)
+ error (1, errno, _("can't open `%s' for writing"), idh.idh_file_name);
+
+ printf (">>>>>>>>>>>>>>>> Serialize <<<<<<<<<<<<<<<<\n");
+ hash_map (&idh.idh_member_file_table, print_member_file);
+ printf (">>>>>>>>>>>>>>>> Serialize Stats <<<<<<<<<<<<<<<<\n");
+ print_hash_stats (stdout, &idh);
+
+ serialize_file_links (&idh);
+ reset_walker (&idh);
+ deserialize_file_links (&idh);
+
+ printf (">>>>>>>>>>>>>>>> Deserialize <<<<<<<<<<<<<<<<\n");
+ hash_map (&idh.idh_member_file_table, print_member_file);
+ printf (">>>>>>>>>>>>>>>> Deserialize Stats <<<<<<<<<<<<<<<<\n");
+ print_hash_stats (stdout, &idh);
+
+ printf (">>>>>>>>>>>>>>>> End <<<<<<<<<<<<<<<<\n");
+ fclose (idh.idh_FILE);
+#endif
+ return 0;
+}
+
+#endif
+
+/*
+ TODO:
+ - stream I/O
+ */
diff --git a/lib/language.map b/lib/language.map
new file mode 100644
index 0000000..e230cb6
--- /dev/null
+++ b/lib/language.map
@@ -0,0 +1,88 @@
+# Welcome to the mkid language mapper.
+#
+# The format of each line is:
+#
+# <pattern> <language> [options]
+#
+# Filenames are matched top-to-bottom against the patterns, and the
+# first match is chosen. The special language `IGNORE' means that
+# this file should be ignored by mkid. The options are
+# language-specific command-line options to mkid.
+#
+# If a file name doesn't match any pattern, it is assigned the default
+# language. The default language may be specified here with the
+# special pattern `**', or overridden from the mkid command-line with
+# the `--default-lang=LANG' option.
+#
+# The special pattern `***' means to include the named file that
+# immediately follows. If no file is named, then the default system
+# language mapper file (i.e., this file) is included.
+
+# Default language
+** IGNORE # Although this is listed first,
+ # the default language pattern is
+ # logically matched last.
+
+# Backup files
+*~ IGNORE
+*.bak IGNORE
+*.bk[0-9] IGNORE
+
+# SCCS files
+[sp].* IGNORE
+
+# C dependencies created by automake
+*/.deps/* IGNORE
+
+*.h C
+*.h.in C
+*.H C++
+*.hh C++
+*.hpp C++
+*.hxx C++
+
+*.l C
+*.lex C
+*.y C
+*.yacc C
+
+*.c C
+*.C C++
+*.cc C++
+*.cpp C++
+*.cxx C++
+
+ChangeLog* Cdoc
+
+*.[sS] asm --comment=;
+*.asm asm --comment=;
+
+# [nt]roff
+*.[0-9] roff
+*.ms roff
+*.me roff
+*.mm roff
+
+*.tex TeX
+*.ltx TeX
+*.texi texinfo
+*.texinfo texinfo
+
+# portable object (i18n)
+*.po po
+
+*.el elisp
+
+*.am make
+Makefile make
+Makefile.* make
+
+*.doc text
+*.txt text
+
+*.m4 m4
+
+*.pl perl
+
+*.gz FILTER gzip -d <%s
+*.Z FILTER gzip -d <%s
diff --git a/lib/misc.c b/lib/misc.c
new file mode 100644
index 0000000..8b7849a
--- /dev/null
+++ b/lib/misc.c
@@ -0,0 +1,72 @@
+/* misc.c -- miscellaneous common functions
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <config.h>
+#include "system.h"
+#include "strxtra.h"
+#include "misc.h"
+
+int
+tree8_count_levels (unsigned int cardinality)
+{
+ int levels = 1;
+ cardinality--;
+ while (cardinality >>= 3)
+ ++levels;
+ return levels;
+}
+
+int
+gets_past_00 (char *tok, FILE *input_FILE)
+{
+ int got = 0;
+ int c;
+ do
+ {
+ do
+ {
+ got++;
+ c = getc (input_FILE);
+ *tok++ = c;
+ }
+ while (c > 0);
+ got++;
+ c = getc (input_FILE);
+ *tok++ = c;
+ }
+ while (c > 0);
+ return got - 2;
+}
+
+int
+skip_past_00 (FILE *input_FILE)
+{
+ int skipped = 0;
+ do
+ {
+ do
+ skipped++;
+ while (getc (input_FILE) > 0);
+ skipped++;
+ }
+ while (getc (input_FILE) > 0);
+ return skipped;
+}
diff --git a/lib/misc.h b/lib/misc.h
new file mode 100644
index 0000000..4f8d707
--- /dev/null
+++ b/lib/misc.h
@@ -0,0 +1,37 @@
+/* misc.c -- defs for interface to misc.c
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _misc_h_
+#define _misc_h_
+
+#if HAVE_BASENAME
+char *basename ();
+#else
+char *basename __P((char const *path));
+#endif
+
+#if HAVE_DIRNAME
+char *dirname ();
+#else
+char *dirname __P((char const *path));
+#endif
+
+int tree8_count_levels __P((unsigned int cardinality));
+int gets_past_00 __P((char *tok, FILE *input_FILE));
+int skip_past_00 __P((FILE *input_FILE));
+
+#endif /* not _misc_h_ */
diff --git a/lib/obstack.c b/lib/obstack.c
new file mode 100644
index 0000000..f0df0d7
--- /dev/null
+++ b/lib/obstack.c
@@ -0,0 +1,493 @@
+/* obstack.c - subroutines used implicitly by object stack macros
+ Copyright (C) 1988, 89, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "obstack.h"
+
+/* This is just to get __GNU_LIBRARY__ defined. */
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+/* CYGNUS LOCAL. No, don't comment the code out. We will be using
+ ../include/obstack.h, which was changed relatively recently in a
+ way that is not binary compatible. Until we feel confident that
+ nobody is using the old obstack.c code, force the use of this code.
+ This issue will arise anytime a change is made which is not binary
+ compatible.
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+*/
+#if 1
+
+
+#if __STDC__
+#define POINTER void *
+#else
+#define POINTER char *
+#endif
+
+/* Determine default alignment. */
+struct fooalign {char x; double d;};
+#define DEFAULT_ALIGNMENT \
+ ((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
+ But in fact it might be less smart and round addresses to as much as
+ DEFAULT_ROUNDING. So we prepare for it to do that. */
+union fooround {long x; double d;};
+#define DEFAULT_ROUNDING (sizeof (union fooround))
+
+/* When we copy a long block of data, this is the unit to do it with.
+ On some machines, copying successive ints does not work;
+ in such a case, redefine COPYING_UNIT to `long' (if that works)
+ or `char' as a last resort. */
+#ifndef COPYING_UNIT
+#define COPYING_UNIT int
+#endif
+
+/* The non-GNU-C macros copy the obstack into this global variable
+ to avoid multiple evaluation. */
+
+struct obstack *_obstack;
+
+/* Define a macro that either calls functions with the traditional malloc/free
+ calling interface, or calls functions with the mmalloc/mfree interface
+ (that adds an extra first argument), based on the state of use_extra_arg.
+ For free, do not use ?:, since some compilers, like the MIPS compilers,
+ do not allow (expr) ? void : void. */
+
+#define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(h)->chunkfun) ((size)))
+
+#define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(h)->freefun) ((old_chunk)); \
+ } while (0)
+
+
+/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
+ Objects start on multiples of ALIGNMENT (0 means use default).
+ CHUNKFUN is the function to use to allocate chunks,
+ and FREEFUN the function to free them.
+
+ Return nonzero if successful, zero if out of memory.
+ To recover from an out of memory error,
+ free up some memory, then call this again. */
+
+int
+_obstack_begin (h, size, alignment, chunkfun, freefun)
+ struct obstack *h;
+ int size;
+ int alignment;
+ POINTER (*chunkfun) ();
+ void (*freefun) ();
+{
+ register struct _obstack_chunk* chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
+ h->freefun = freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->use_extra_arg = 0;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ {
+ h->alloc_failed = 1;
+ return 0;
+ }
+ h->alloc_failed = 0;
+ h->next_free = h->object_base = chunk->contents;
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ return 1;
+}
+
+int
+_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
+ struct obstack *h;
+ int size;
+ int alignment;
+ POINTER (*chunkfun) ();
+ void (*freefun) ();
+ POINTER arg;
+{
+ register struct _obstack_chunk* chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
+ h->freefun = freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->extra_arg = arg;
+ h->use_extra_arg = 1;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ {
+ h->alloc_failed = 1;
+ return 0;
+ }
+ h->alloc_failed = 0;
+ h->next_free = h->object_base = chunk->contents;
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ return 1;
+}
+
+/* Allocate a new current chunk for the obstack *H
+ on the assumption that LENGTH bytes need to be added
+ to the current object, or a new object of length LENGTH allocated.
+ Copies any partial object from the end of the old chunk
+ to the beginning of the new one. */
+
+void
+_obstack_newchunk (h, length)
+ struct obstack *h;
+ int length;
+{
+ register struct _obstack_chunk* old_chunk = h->chunk;
+ register struct _obstack_chunk* new_chunk;
+ register long new_size;
+ register int obj_size = h->next_free - h->object_base;
+ register int i;
+ int already;
+
+ /* Compute size for new chunk. */
+ new_size = (obj_size + length) + (obj_size >> 3) + 100;
+ if (new_size < h->chunk_size)
+ new_size = h->chunk_size;
+
+ /* Allocate and initialize the new chunk. */
+ new_chunk = CALL_CHUNKFUN (h, new_size);
+ if (!new_chunk)
+ {
+ h->alloc_failed = 1;
+ return;
+ }
+ h->alloc_failed = 0;
+ h->chunk = new_chunk;
+ new_chunk->prev = old_chunk;
+ new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
+
+ /* Move the existing object to the new chunk.
+ Word at a time is fast and is safe if the object
+ is sufficiently aligned. */
+ if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
+ {
+ for (i = obj_size / sizeof (COPYING_UNIT) - 1;
+ i >= 0; i--)
+ ((COPYING_UNIT *)new_chunk->contents)[i]
+ = ((COPYING_UNIT *)h->object_base)[i];
+ /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
+ but that can cross a page boundary on a machine
+ which does not do strict alignment for COPYING_UNITS. */
+ already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
+ }
+ else
+ already = 0;
+ /* Copy remaining bytes one by one. */
+ for (i = already; i < obj_size; i++)
+ new_chunk->contents[i] = h->object_base[i];
+
+ /* If the object just copied was the only data in OLD_CHUNK,
+ free that chunk and remove it from the chain.
+ But not if that chunk might contain an empty object. */
+ if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
+ {
+ new_chunk->prev = old_chunk->prev;
+ CALL_FREEFUN (h, old_chunk);
+ }
+
+ h->object_base = new_chunk->contents;
+ h->next_free = h->object_base + obj_size;
+ /* The new chunk certainly contains no empty object yet. */
+ h->maybe_empty_object = 0;
+}
+
+/* Return nonzero if object OBJ has been allocated from obstack H.
+ This is here for debugging.
+ If you use it in a program, you are probably losing. */
+
+#if __STDC__
+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
+ obstack.h because it is just for debugging. */
+int _obstack_allocated_p (struct obstack *h, POINTER obj);
+#endif
+
+int
+_obstack_allocated_p (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk* plp; /* point to previous chunk if any */
+
+ lp = (h)->chunk;
+ /* We use >= rather than > since the object cannot be exactly at
+ the beginning of the chunk but might be an empty object exactly
+ at the end of an adjacent chunk. */
+ while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ {
+ plp = lp->prev;
+ lp = plp;
+ }
+ return lp != 0;
+}
+
+/* Free objects in obstack H, including OBJ and everything allocate
+ more recently than OBJ. If OBJ is zero, free everything in H. */
+
+#undef obstack_free
+
+/* This function has two names with identical definitions.
+ This is the first one, called from non-ANSI code. */
+
+void
+_obstack_free (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk* plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *)(obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+/* This function is used from ANSI code. */
+
+void
+obstack_free (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk* plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *)(obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+#if 0
+/* These are now turned off because the applications do not use it
+ and it uses bcopy via obstack_grow, which causes trouble on sysV. */
+
+/* Now define the functional versions of the obstack macros.
+ Define them to simply use the corresponding macros to do the job. */
+
+#if __STDC__
+/* These function definitions do not work with non-ANSI preprocessors;
+ they won't pass through the macro names in parentheses. */
+
+/* The function names appear in parentheses in order to prevent
+ the macro-definitions of the names from being expanded there. */
+
+POINTER (obstack_base) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_base (obstack);
+}
+
+POINTER (obstack_next_free) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_next_free (obstack);
+}
+
+int (obstack_object_size) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_object_size (obstack);
+}
+
+int (obstack_room) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_room (obstack);
+}
+
+void (obstack_grow) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ obstack_grow (obstack, pointer, length);
+}
+
+void (obstack_grow0) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ obstack_grow0 (obstack, pointer, length);
+}
+
+void (obstack_1grow) (obstack, character)
+ struct obstack *obstack;
+ int character;
+{
+ obstack_1grow (obstack, character);
+}
+
+void (obstack_blank) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ obstack_blank (obstack, length);
+}
+
+void (obstack_1grow_fast) (obstack, character)
+ struct obstack *obstack;
+ int character;
+{
+ obstack_1grow_fast (obstack, character);
+}
+
+void (obstack_blank_fast) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ obstack_blank_fast (obstack, length);
+}
+
+POINTER (obstack_finish) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_finish (obstack);
+}
+
+POINTER (obstack_alloc) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ return obstack_alloc (obstack, length);
+}
+
+POINTER (obstack_copy) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ return obstack_copy (obstack, pointer, length);
+}
+
+POINTER (obstack_copy0) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ return obstack_copy0 (obstack, pointer, length);
+}
+
+#endif /* __STDC__ */
+
+#endif /* 0 */
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
diff --git a/lib/obstack.h b/lib/obstack.h
new file mode 100644
index 0000000..40793ca
--- /dev/null
+++ b/lib/obstack.h
@@ -0,0 +1,519 @@
+/* obstack.h - object stack macros
+ Copyright (C) 1988, 89, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Summary:
+
+All the apparent functions defined here are macros. The idea
+is that you would use these pre-tested macros to solve a
+very specific set of problems, and they would run fast.
+Caution: no side-effects in arguments please!! They may be
+evaluated MANY times!!
+
+These macros operate a stack of objects. Each object starts life
+small, and may grow to maturity. (Consider building a word syllable
+by syllable.) An object can move while it is growing. Once it has
+been "finished" it never changes address again. So the "top of the
+stack" is typically an immature growing object, while the rest of the
+stack is of mature, fixed size and fixed address objects.
+
+These routines grab large chunks of memory, using a function you
+supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
+by calling `obstack_chunk_free'. You must define them and declare
+them before using any obstack macros.
+
+Each independent stack is represented by a `struct obstack'.
+Each of the obstack macros expects a pointer to such a structure
+as the first argument.
+
+One motivation for this package is the problem of growing char strings
+in symbol tables. Unless you are "fascist pig with a read-only mind"
+--Gosper's immortal quote from HAKMEM item 154, out of context--you
+would not like to put any arbitrary upper limit on the length of your
+symbols.
+
+In practice this often means you will build many short symbols and a
+few long symbols. At the time you are reading a symbol you don't know
+how long it is. One traditional method is to read a symbol into a
+buffer, realloc()ating the buffer every time you try to read a symbol
+that is longer than the buffer. This is beaut, but you still will
+want to copy the symbol from the buffer to a more permanent
+symbol-table entry say about half the time.
+
+With obstacks, you can work differently. Use one obstack for all symbol
+names. As you read a symbol, grow the name in the obstack gradually.
+When the name is complete, finalize it. Then, if the symbol exists already,
+free the newly read name.
+
+The way we do this is to take a large chunk, allocating memory from
+low addresses. When you want to build a symbol in the chunk you just
+add chars above the current "high water mark" in the chunk. When you
+have finished adding chars, because you got to the end of the symbol,
+you know how long the chars are, and you can create a new object.
+Mostly the chars will not burst over the highest address of the chunk,
+because you would typically expect a chunk to be (say) 100 times as
+long as an average object.
+
+In case that isn't clear, when we have enough chars to make up
+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
+so we just point to it where it lies. No moving of chars is
+needed and this is the second win: potentially long strings need
+never be explicitly shuffled. Once an object is formed, it does not
+change its address during its lifetime.
+
+When the chars burst over a chunk boundary, we allocate a larger
+chunk, and then copy the partly formed object from the end of the old
+chunk to the beginning of the new larger chunk. We then carry on
+accreting characters to the end of the object as we normally would.
+
+A special macro is provided to add a single char at a time to a
+growing object. This allows the use of register variables, which
+break the ordinary 'growth' macro.
+
+Summary:
+ We allocate large chunks.
+ We carve out one object at a time from the current chunk.
+ Once carved, an object never moves.
+ We are free to append data of any size to the currently
+ growing object.
+ Exactly one object is growing in an obstack at any one time.
+ You can run one obstack per control block.
+ You may have as many control blocks as you dare.
+ Because of the way we do it, you can `unwind' an obstack
+ back to a previous state. (You may remove objects much
+ as you would with a stack.)
+*/
+
+
+/* Don't do the contents of this file more than once. */
+
+#ifndef __OBSTACK_H__
+#define __OBSTACK_H__
+
+/* We use subtraction of (char *)0 instead of casting to int
+ because on word-addressable machines a simple cast to int
+ may ignore the byte-within-word field of the pointer. */
+
+#ifndef __PTR_TO_INT
+#define __PTR_TO_INT(P) ((P) - (char *)0)
+#endif
+
+#ifndef __INT_TO_PTR
+#define __INT_TO_PTR(P) ((P) + (char *)0)
+#endif
+
+/* We need the type of the resulting object. In ANSI C it is ptrdiff_t
+ but in traditional C it is usually long. If we are in ANSI C and
+ don't already have ptrdiff_t get it. */
+
+#if __STDC__ && ! defined (offsetof)
+#if defined (__GNUC__) && defined (IN_GCC)
+/* On Next machine, the system's stddef.h screws up if included
+ after we have defined just ptrdiff_t, so include all of stddef.h.
+ Otherwise, define just ptrdiff_t, which is all we need. */
+#ifndef __NeXT__
+#define __need_ptrdiff_t
+#endif
+#endif
+
+#include <stddef.h>
+#endif
+
+#include <sys/types.h>
+
+#ifndef HAVE_PTRDIFF_T
+# define ptrdiff_t off_t
+#endif
+
+#if __STDC__
+#define PTR_INT_TYPE ptrdiff_t
+#else
+#define PTR_INT_TYPE long
+#endif
+
+struct _obstack_chunk /* Lives at front of each chunk. */
+{
+ char *limit; /* 1 past end of this chunk */
+ struct _obstack_chunk *prev; /* address of prior chunk or NULL */
+ char contents[4]; /* objects begin here */
+};
+
+struct obstack /* control current object in current chunk */
+{
+ long chunk_size; /* preferred size to allocate chunks in */
+ struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */
+ char *object_base; /* address of object we are building */
+ char *next_free; /* where to add next char to current object */
+ char *chunk_limit; /* address of char after current chunk */
+ PTR_INT_TYPE temp; /* Temporary for some macros. */
+ int alignment_mask; /* Mask of alignment for each object. */
+ struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
+ void (*freefun) (); /* User's function to free a chunk. */
+ char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+ unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
+ unsigned maybe_empty_object:1;/* There is a possibility that the current
+ chunk contains a zero-length object. This
+ prevents freeing the chunk if we allocate
+ a bigger chunk to replace it. */
+ unsigned alloc_failed:1; /* chunk alloc func returned 0 */
+};
+
+/* Declare the external functions we use; they are in obstack.c. */
+
+#if __STDC__
+extern void _obstack_newchunk (struct obstack *, int);
+extern void _obstack_free (struct obstack *, void *);
+extern int _obstack_begin (struct obstack *, int, int,
+ void *(*) (), void (*) ());
+extern int _obstack_begin_1 (struct obstack *, int, int,
+ void *(*) (), void (*) (), void *);
+#else
+extern void _obstack_newchunk ();
+extern void _obstack_free ();
+extern int _obstack_begin ();
+extern int _obstack_begin_1 ();
+#endif
+
+#if __STDC__
+
+/* Do the function-declarations after the structs
+ but before defining the macros. */
+
+void obstack_init (struct obstack *obstack);
+
+void * obstack_alloc (struct obstack *obstack, int size);
+
+void * obstack_copy (struct obstack *obstack, void *address, int size);
+void * obstack_copy0 (struct obstack *obstack, void *address, int size);
+
+void obstack_free (struct obstack *obstack, void *block);
+
+void obstack_blank (struct obstack *obstack, int size);
+
+void obstack_grow (struct obstack *obstack, void *data, int size);
+void obstack_grow0 (struct obstack *obstack, void *data, int size);
+
+void obstack_1grow (struct obstack *obstack, int data_char);
+void obstack_ptr_grow (struct obstack *obstack, void *data);
+void obstack_int_grow (struct obstack *obstack, int data);
+
+void * obstack_finish (struct obstack *obstack);
+
+int obstack_object_size (struct obstack *obstack);
+
+int obstack_room (struct obstack *obstack);
+void obstack_1grow_fast (struct obstack *obstack, int data_char);
+void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
+void obstack_int_grow_fast (struct obstack *obstack, int data);
+void obstack_blank_fast (struct obstack *obstack, int size);
+
+void * obstack_base (struct obstack *obstack);
+void * obstack_next_free (struct obstack *obstack);
+int obstack_alignment_mask (struct obstack *obstack);
+int obstack_chunk_size (struct obstack *obstack);
+
+#endif /* __STDC__ */
+
+/* Non-ANSI C cannot really support alternative functions for these macros,
+ so we do not declare them. */
+
+/* Pointer to beginning of object being allocated or to be allocated next.
+ Note that this might not be the final address of the object
+ because a new chunk might be needed to hold the final size. */
+
+#define obstack_base(h) ((h)->alloc_failed ? 0 : (h)->object_base)
+
+/* Size for allocating ordinary chunks. */
+
+#define obstack_chunk_size(h) ((h)->chunk_size)
+
+/* Pointer to next byte not yet allocated in current chunk. */
+
+#define obstack_next_free(h) ((h)->alloc_failed ? 0 : (h)->next_free)
+
+/* Mask specifying low bits that should be clear in address of an object. */
+
+#define obstack_alignment_mask(h) ((h)->alignment_mask)
+
+#define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
+
+#define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
+
+#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) ()) (chunkfun), (void (*) ()) (freefun))
+
+#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
+
+#define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)()) (newfreefun))
+
+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
+
+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#if defined (__GNUC__) && __STDC__
+#if __GNUC__ < 2
+#define __extension__
+#endif
+
+/* For GNU C, if not -traditional,
+ we can define these macros to compute all args only once
+ without using a global variable.
+ Also, we can avoid using the `temp' slot, to make faster code. */
+
+#define obstack_object_size(OBSTACK) \
+ __extension__ \
+ ({ struct obstack *__o = (OBSTACK); \
+ __o->alloc_failed ? 0 : \
+ (unsigned) (__o->next_free - __o->object_base); })
+
+#define obstack_room(OBSTACK) \
+ __extension__ \
+ ({ struct obstack *__o = (OBSTACK); \
+ (unsigned) (__o->chunk_limit - __o->next_free); })
+
+#define obstack_grow(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len); \
+ if (!__o->alloc_failed) \
+ { \
+ bcopy (where, __o->next_free, __len); \
+ __o->next_free += __len; \
+ } \
+ (void) 0; })
+
+#define obstack_grow0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len + 1); \
+ if (!__o->alloc_failed) \
+ { \
+ bcopy (where, __o->next_free, __len); \
+ __o->next_free += __len; \
+ *(__o->next_free)++ = 0; \
+ } \
+ (void) 0; })
+
+#define obstack_1grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, 1); \
+ if (!__o->alloc_failed) \
+ *(__o->next_free)++ = (datum); \
+ (void) 0; })
+
+/* These assume that the obstack alignment is good enough for pointers or ints,
+ and that the data added so far to the current object
+ shares that much alignment. */
+
+#define obstack_ptr_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (void *)); \
+ if (!__o->alloc_failed) \
+ *((void **)__o->next_free)++ = ((void *)datum); \
+ (void) 0; })
+
+#define obstack_int_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (int)); \
+ if (!__o->alloc_failed) \
+ *((int *)__o->next_free)++ = ((int)datum); \
+ (void) 0; })
+
+#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
+#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
+
+#define obstack_blank(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ if (!__o->alloc_failed) \
+ __o->next_free += __len; \
+ (void) 0; })
+
+#define obstack_alloc(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_blank (__h, (length)); \
+ obstack_finish (__h); })
+
+#define obstack_copy(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+#define obstack_copy0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow0 (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+/* The local variable is named __o1 to avoid a name conflict
+ when obstack_blank is called. */
+#define obstack_finish(OBSTACK) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ void *value; \
+ if (__o1->alloc_failed) \
+ value = 0; \
+ else \
+ { \
+ value = (void *) __o1->object_base; \
+ if (__o1->next_free == value) \
+ __o1->maybe_empty_object = 1; \
+ __o1->next_free \
+ = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
+ & ~ (__o1->alignment_mask)); \
+ if (__o1->next_free - (char *)__o1->chunk \
+ > __o1->chunk_limit - (char *)__o1->chunk) \
+ __o1->next_free = __o1->chunk_limit; \
+ __o1->object_base = __o1->next_free; \
+ } \
+ value; })
+
+#define obstack_free(OBSTACK, OBJ) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ void *__obj = (OBJ); \
+ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
+ __o->next_free = __o->object_base = __obj; \
+ else (obstack_free) (__o, __obj); })
+
+#else /* not __GNUC__ or not __STDC__ */
+
+#define obstack_object_size(h) \
+ (unsigned) ((h)->alloc_failed ? 0 : (h)->next_free - (h)->object_base)
+
+#define obstack_room(h) \
+ (unsigned) ((h)->chunk_limit - (h)->next_free)
+
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+ so that we can avoid having void expressions
+ in the arms of the conditional expression.
+ Casting the third operand to void was tried before,
+ but some compilers won't accept it. */
+
+#define obstack_grow(h,where,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ (bcopy (where, (h)->next_free, (h)->temp), \
+ (h)->next_free += (h)->temp)))
+
+#define obstack_grow0(h,where,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ (bcopy (where, (h)->next_free, (h)->temp), \
+ (h)->next_free += (h)->temp, \
+ *((h)->next_free)++ = 0)))
+
+#define obstack_1grow(h,datum) \
+( (((h)->next_free + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), 1), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ (*((h)->next_free)++ = (datum))))
+
+#define obstack_ptr_grow(h,datum) \
+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ (*((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))))
+
+#define obstack_int_grow(h,datum) \
+( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ (*((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))))
+
+#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
+#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
+
+#define obstack_blank(h,length) \
+( (h)->temp = (length), \
+ (((h)->chunk_limit - (h)->next_free < (h)->temp) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
+ ((h)->alloc_failed ? 0 : \
+ ((h)->next_free += (h)->temp)))
+
+#define obstack_alloc(h,length) \
+ (obstack_blank ((h), (length)), obstack_finish ((h)))
+
+#define obstack_copy(h,where,length) \
+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
+
+#define obstack_copy0(h,where,length) \
+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
+
+#define obstack_finish(h) \
+( (h)->alloc_failed ? 0 : \
+ (((h)->next_free == (h)->object_base \
+ ? (((h)->maybe_empty_object = 1), 0) \
+ : 0), \
+ (h)->temp = __PTR_TO_INT ((h)->object_base), \
+ (h)->next_free \
+ = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
+ & ~ ((h)->alignment_mask)), \
+ (((h)->next_free - (char *)(h)->chunk \
+ > (h)->chunk_limit - (char *)(h)->chunk) \
+ ? ((h)->next_free = (h)->chunk_limit) : 0), \
+ (h)->object_base = (h)->next_free, \
+ __INT_TO_PTR ((h)->temp)))
+
+#if __STDC__
+#define obstack_free(h,obj) \
+( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp + (char *) (h)->chunk) \
+ : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
+#else
+#define obstack_free(h,obj) \
+( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp + (char *) (h)->chunk) \
+ : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
+#endif
+
+#endif /* not __GNUC__ or not __STDC__ */
+
+#endif /* not __OBSTACK_H__ */
diff --git a/lib/pathmax.h b/lib/pathmax.h
new file mode 100644
index 0000000..86a9222
--- /dev/null
+++ b/lib/pathmax.h
@@ -0,0 +1,53 @@
+/* Define PATH_MAX somehow. Requires sys/types.h.
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _PATHMAX_H
+#define _PATHMAX_H
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define
+ PATH_MAX but might cause redefinition warnings when sys/param.h is
+ later included (as on MORE/BSD 4.3). */
+#if defined(_POSIX_VERSION) || (defined(HAVE_LIMITS_H) && !defined(__GNUC__))
+#include <limits.h>
+#endif
+
+#ifndef _POSIX_PATH_MAX
+#define _POSIX_PATH_MAX 255
+#endif
+
+#if !defined(PATH_MAX) && defined(_PC_PATH_MAX)
+#define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX))
+#endif
+
+/* Don't include sys/param.h if it already has been. */
+#if !defined(HAVE_SYS_PARAM_H) && !defined(PATH_MAX) && !defined(MAXPATHLEN)
+#include <sys/param.h>
+#endif
+
+#if !defined(PATH_MAX) && defined(MAXPATHLEN)
+#define PATH_MAX MAXPATHLEN
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX _POSIX_PATH_MAX
+#endif
+
+#endif /* _PATHMAX_H */
diff --git a/lib/regex.c b/lib/regex.c
new file mode 100644
index 0000000..ecaa8b0
--- /dev/null
+++ b/lib/regex.c
@@ -0,0 +1,5488 @@
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
+
+ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined (_LIBC)
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+#define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+#else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+#undef REL_ALLOC
+
+#if defined (STDC_HEADERS) || defined (_LIBC)
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+#ifdef INHIBIT_STRING_HEADER
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
+#if !defined (bzero) && !defined (bcopy)
+#undef INHIBIT_STRING_HEADER
+#endif
+#endif
+#endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+#ifndef INHIBIT_STRING_HEADER
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+#endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SWITCH_ENUM_BUG
+#define SWITCH_ENUM_CAST(x) ((int)(x))
+#else
+#define SWITCH_ENUM_CAST(x) (x)
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding." */
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE free
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+/* No need to do anything to free, after alloca. */
+#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+
+#define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+#define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE_STACK malloc
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE_STACK free
+
+#else /* not REGEX_MALLOC */
+
+#define REGEX_ALLOCATE_STACK alloca
+
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+#define REGEX_FREE_STACK(arg)
+
+#endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+static int re_match_2_internal ();
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ putchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ putchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ printf ("%d:\t", p - start);
+
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ putchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf ("/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p
+ && (p[1 + (c/8)] & (1 << (c % 8))))
+ {
+ /* Are we starting a range? */
+ if (last + 1 == c && ! in_range)
+ {
+ putchar ('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range)
+ {
+ putchar (last);
+ in_range = 0;
+ }
+
+ if (! in_range)
+ putchar (c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar (last);
+
+ putchar (']');
+
+ p += 1 + *p;
+ }
+ break;
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump to %d", p + mcnt - start);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt to %d", p + mcnt - start);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump to %d", p + mcnt - start);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+
+ putchar ('\n');
+ }
+
+ printf ("%d:\tend of pattern.\n", p - start);
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ putchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ putchar (string2[this_char]);
+ }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char *re_error_msgid[] =
+ {
+ gettext_noop ("Success"), /* REG_NOERROR */
+ gettext_noop ("No match"), /* REG_NOMATCH */
+ gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
+ gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
+ gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
+ gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
+ gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
+ gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
+ gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
+ gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
+ gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
+ gettext_noop ("Invalid range end"), /* REG_ERANGE */
+ gettext_noop ("Memory exhausted"), /* REG_ESPACE */
+ gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
+ gettext_noop ("Premature end of regular expression"), /* REG_EEND */
+ gettext_noop ("Regular expression too big"), /* REG_ESIZE */
+ gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
+ };
+
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+#undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
+#undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+#if defined (MATCH_MAY_ALLOCATE)
+int re_max_failures = 20000;
+#else
+int re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_INT
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ unsigned r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#ifndef TRANSLATE
+#define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while (b - bufp->buffer + (n) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static
+regex_grow_registers (num_regs)
+ int num_regs;
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (reg_info, num_regs, register_info_type);
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random temporary spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ putchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_EBRACE);
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ FREE_STACK_RETURN (REG_ESUBREG);
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN (REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH (succeed);
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: \n");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+#ifdef emacs
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#endif /* not emacs */
+ }
+
+ regex_grow_registers (num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ int syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ RE_TRANSLATE_TYPE translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
+ range_start = ((const unsigned char *) p)[-2];
+ range_end = ((const unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+#ifdef MATCH_MAY_ALLOCATE
+ fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+ /* We don't push any register information onto the failure stack. */
+ unsigned num_regs = 0;
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned long size = bufp->used;
+ unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+#ifdef REL_ALLOC
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (1)
+ {
+ if (p == pend || *p == succeed)
+ {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY ())
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ }
+ else
+ break;
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ goto done;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ {
+ int fastmap_newline = fastmap['\n'];
+
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ {
+ RESET_FAIL_STACK ();
+ return -2;
+ }
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK ();
+ return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2. */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+ common_op_match_null_string_p (),
+ group_match_null_string_p ();
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+
+/* Free everything we malloc. */
+#ifdef MATCH_MAY_ALLOCATE
+#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else
+#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+{
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ pos, regs, size);
+ alloca (0);
+ return result;
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+ alloca (0);
+ return result;
+}
+
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ unsigned char *just_past_start_mem = 0;
+
+ /* We use this to map every character in the string. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+#ifdef REL_ALLOC
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ unsigned num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **regstart, **regend;
+#endif
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **old_regstart, **old_regend;
+#endif
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **best_regstart, **best_regend;
+#endif
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
+ /* Used when we pop values we don't care about. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+#ifdef MATCH_MAY_ALLOCATE
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* MATCH_MAY_ALLOCATE */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set || best_match_p)
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ succeed_label:
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ }
+ else
+ {
+ /* These braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert (bufp->regs_allocated == REGS_FIXED);
+ }
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ FREE_VARIABLES ();
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+ case succeed:
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");
+ goto succeed_label;
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ just_past_start_mem = p;
+
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if (old_regend[r] >= regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED ();
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ else if ((re_opcode_t) *p2 == charset)
+ {
+#ifdef DEBUG
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+#endif
+
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
+ && (p2[1 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < (int) p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+#if 0
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+ macro and introducing temporary variables works around the bug. */
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+#else
+ case wordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ break;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ break;
+ goto fail;
+ }
+
+ case notwordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ goto fail;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ goto fail;
+ break;
+ }
+#endif
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ unsigned char *s1, *s2;
+ register int len;
+ RE_TRANSLATE_TYPE translate;
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ int length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ if (!ret)
+ return NULL;
+ return gettext (re_error_msgid[(int) ret]);
+}
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#ifdef _REGEX_RE_COMP
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (re_error_msgid[(int) ret]);
+}
+
+
+int
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (re_error_msgid[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/lib/regex.h b/lib/regex.h
new file mode 100644
index 0000000..195d84f
--- /dev/null
+++ b/lib/regex.h
@@ -0,0 +1,495 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+
+ Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#ifdef _REGEX_RE_COMP
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/lib/rx.c b/lib/rx.c
new file mode 100644
index 0000000..453aeed
--- /dev/null
+++ b/lib/rx.c
@@ -0,0 +1,7190 @@
+/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the librx library.
+
+Librx is free software; you can redistribute it and/or modify it under
+the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+Librx is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this software; see the file COPYING.LIB. If not,
+write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
+02139, USA. */
+
+/* NOTE!!! AIX is so losing it requires this to be the first thing in the
+ * file.
+ * Do not put ANYTHING before it!
+ */
+#if !defined (__GNUC__) && defined (_AIX)
+ #pragma alloca
+#endif
+
+/* To make linux happy? */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+const char *rx_version_string = "GNU Rx version 0.07.2";
+
+ /* ``Too hard!''
+ * -- anon.
+ */
+
+
+#include <stdio.h>
+#include <ctype.h>
+#ifndef isgraph
+#define isgraph(c) (isprint (c) && !isspace (c))
+#endif
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#include <sys/types.h>
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+#ifndef __GCC__
+#undef __inline__
+#define __inline__
+#endif
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+/* Memory management and stuff for emacs. */
+
+#define CHARBITS 8
+#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
+
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ * use `alloca' instead of `malloc' for the backtracking stack.
+ *
+ * Emacs will die miserably if we don't do this.
+ */
+
+#ifdef REGEX_MALLOC
+#define REGEX_ALLOCATE malloc
+#else /* not REGEX_MALLOC */
+#define REGEX_ALLOCATE alloca
+#endif /* not REGEX_MALLOC */
+
+
+#ifdef RX_WANT_RX_DEFS
+#define RX_DECL extern
+#define RX_DEF_QUAL
+#else
+#define RX_WANT_RX_DEFS
+#define RX_DECL static
+#define RX_DEF_QUAL static
+#endif
+#include "rx.h"
+#undef RX_DECL
+#define RX_DECL RX_DEF_QUAL
+
+
+#ifndef emacs
+
+#ifndef SYNTAX
+
+RX_DECL char re_syntax_table[CHAR_SET_SIZE];
+
+#ifdef __STDC__
+static void
+init_syntax_once (void)
+#else
+static void
+init_syntax_once ()
+#endif
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+#endif /* not SYNTAX */
+#endif /* not emacs */
+
+/* Compile with `-DRX_DEBUG' and use the following flags.
+ *
+ * Debugging flags:
+ * rx_debug - print information as a regexp is compiled
+ * rx_debug_trace - print information as a regexp is executed
+ */
+
+#ifdef RX_DEBUG
+
+int rx_debug_compile = 0;
+int rx_debug_trace = 0;
+static struct re_pattern_buffer * dbug_rxb = 0;
+
+#ifdef __STDC__
+typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
+#else
+typedef void (*side_effect_printer) ();
+#endif
+
+#ifdef __STDC__
+static void print_cset (struct rx *rx, rx_Bitset cset, FILE * fp);
+#else
+static void print_cset ();
+#endif
+
+#ifdef __STDC__
+static void
+print_rexp (struct rx *rx,
+ struct rexp_node *node, int depth,
+ side_effect_printer seprint, FILE * fp)
+#else
+static void
+print_rexp (rx, node, depth, seprint, fp)
+ struct rx *rx;
+ struct rexp_node *node;
+ int depth;
+ side_effect_printer seprint;
+ FILE * fp;
+#endif
+{
+ if (!node)
+ return;
+ else
+ {
+ switch (node->type)
+ {
+ case r_cset:
+ {
+ fprintf (fp, "%*s", depth, "");
+ print_cset (rx, node->params.cset, fp);
+ fputc ('\n', fp);
+ break;
+ }
+
+ case r_opt:
+ case r_star:
+ fprintf (fp, "%*s%s\n", depth, "",
+ node->type == r_opt ? "opt" : "star");
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ break;
+
+ case r_2phase_star:
+ fprintf (fp, "%*s2phase star\n", depth, "");
+ print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ break;
+
+
+ case r_alternate:
+ case r_concat:
+ fprintf (fp, "%*s%s\n", depth, "",
+ node->type == r_alternate ? "alt" : "concat");
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
+ break;
+ case r_side_effect:
+ fprintf (fp, "%*sSide effect: ", depth, "");
+ seprint (rx, node->params.side_effect, fp);
+ fputc ('\n', fp);
+ }
+ }
+}
+
+#ifdef __STDC__
+static void
+print_nfa (struct rx * rx,
+ struct rx_nfa_state * n,
+ side_effect_printer seprint, FILE * fp)
+#else
+static void
+print_nfa (rx, n, seprint, fp)
+ struct rx * rx;
+ struct rx_nfa_state * n;
+ side_effect_printer seprint;
+ FILE * fp;
+#endif
+{
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+ struct rx_possible_future *ec = n->futures;
+ fprintf (fp, "node %d %s\n", n->id,
+ n->is_final ? "final" : (n->is_start ? "start" : ""));
+ while (e)
+ {
+ fprintf (fp, " edge to %d, ", e->dest->id);
+ switch (e->type)
+ {
+ case ne_epsilon:
+ fprintf (fp, "epsilon\n");
+ break;
+ case ne_side_effect:
+ fprintf (fp, "side effect ");
+ seprint (rx, e->params.side_effect, fp);
+ fputc ('\n', fp);
+ break;
+ case ne_cset:
+ fprintf (fp, "cset ");
+ print_cset (rx, e->params.cset, fp);
+ fputc ('\n', fp);
+ break;
+ }
+ e = e->next;
+ }
+
+ while (ec)
+ {
+ int x;
+ struct rx_nfa_state_set * s;
+ struct rx_se_list * l;
+ fprintf (fp, " eclosure to {");
+ for (s = ec->destset; s; s = s->cdr)
+ fprintf (fp, "%d ", s->car->id);
+ fprintf (fp, "} (");
+ for (l = ec->effects; l; l = l->cdr)
+ {
+ seprint (rx, l->car, fp);
+ fputc (' ', fp);
+ }
+ fprintf (fp, ")\n");
+ ec = ec->next;
+ }
+ n = n->next;
+ }
+}
+
+static char * efnames [] =
+{
+ "bogon",
+ "re_se_try",
+ "re_se_pushback",
+ "re_se_push0",
+ "re_se_pushpos",
+ "re_se_chkpos",
+ "re_se_poppos",
+ "re_se_at_dot",
+ "re_se_syntax",
+ "re_se_not_syntax",
+ "re_se_begbuf",
+ "re_se_hat",
+ "re_se_wordbeg",
+ "re_se_wordbound",
+ "re_se_notwordbound",
+ "re_se_wordend",
+ "re_se_endbuf",
+ "re_se_dollar",
+ "re_se_fail",
+};
+
+static char * efnames2[] =
+{
+ "re_se_win",
+ "re_se_lparen",
+ "re_se_rparen",
+ "re_se_backref",
+ "re_se_iter",
+ "re_se_end_iter",
+ "re_se_tv"
+};
+
+static char * inx_names[] =
+{
+ "rx_backtrack_point",
+ "rx_do_side_effects",
+ "rx_cache_miss",
+ "rx_next_char",
+ "rx_backtrack",
+ "rx_error_inx",
+ "rx_num_instructions"
+};
+
+
+#ifdef __STDC__
+static void
+re_seprint (struct rx * rx, void * effect, FILE * fp)
+#else
+static void
+re_seprint (rx, effect, fp)
+ struct rx * rx;
+ void * effect;
+ FILE * fp;
+#endif
+{
+ if ((int)effect < 0)
+ fputs (efnames[-(int)effect], fp);
+ else if (dbug_rxb)
+ {
+ struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
+ fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
+ }
+ else
+ fprintf (fp, "[complex op # %d]", (int)effect);
+}
+
+
+/* These are so the regex.c regression tests will compile. */
+void
+print_compiled_pattern (rxb)
+ struct re_pattern_buffer * rxb;
+{
+}
+
+void
+print_fastmap (fm)
+ char * fm;
+{
+}
+
+#endif /* RX_DEBUG */
+
+
+
+/* This page: Bitsets. Completely unintersting. */
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL int
+rx_bitset_is_equal (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ RX_subset s = b[0];
+ b[0] = ~a[0];
+
+ for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
+ ;
+
+ b[0] = s;
+ return !x && s == a[0];
+}
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL int
+rx_bitset_is_subset (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets(size) - 1;
+ while (x-- && (a[x] & b[x]) == a[x]);
+ return x == -1;
+}
+
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_empty (int size, rx_Bitset set)
+#else
+RX_DECL int
+rx_bitset_empty (size, set)
+ int size;
+ rx_Bitset set;
+#endif
+{
+ int x;
+ RX_subset s = set[0];
+ set[0] = 1;
+ for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
+ ;
+ set[0] = s;
+ return !s;
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_null (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_null (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ bzero (b, rx_sizeof_bitset(size));
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_universe (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_universe (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets (size);
+ while (x--)
+ *b++ = ~(RX_subset)0;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_complement (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_complement (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets (size);
+ while (x--)
+ {
+ *b = ~*b;
+ ++b;
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_assign (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] = b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_union (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] |= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_intersection (int size,
+ rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_intersection (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] &= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_difference (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] &= ~ b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_revdifference (int size,
+ rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_revdifference (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] = ~a[x] & b[x];
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_xor (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] ^= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL unsigned long
+rx_bitset_hash (int size, rx_Bitset b)
+#else
+RX_DECL unsigned long
+rx_bitset_hash (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ unsigned long hash = (unsigned long)rx_bitset_hash;
+
+ for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
+ hash ^= rx_bitset_subset_val(b, x);
+
+ return hash;
+}
+
+
+RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
+{
+ 0x1,
+ 0x2,
+ 0x4,
+ 0x8,
+ 0x10,
+ 0x20,
+ 0x40,
+ 0x80,
+ 0x100,
+ 0x200,
+ 0x400,
+ 0x800,
+ 0x1000,
+ 0x2000,
+ 0x4000,
+ 0x8000,
+ 0x10000,
+ 0x20000,
+ 0x40000,
+ 0x80000,
+ 0x100000,
+ 0x200000,
+ 0x400000,
+ 0x800000,
+ 0x1000000,
+ 0x2000000,
+ 0x4000000,
+ 0x8000000,
+ 0x10000000,
+ 0x20000000,
+ 0x40000000,
+ 0x80000000
+};
+
+#ifdef RX_DEBUG
+
+#ifdef __STDC__
+static void
+print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
+#else
+static void
+print_cset (rx, cset, fp)
+ struct rx *rx;
+ rx_Bitset cset;
+ FILE * fp;
+#endif
+{
+ int x;
+ fputc ('[', fp);
+ for (x = 0; x < rx->local_cset_size; ++x)
+ if (RX_bitset_member (cset, x))
+ {
+ if (isprint(x))
+ fputc (x, fp);
+ else
+ fprintf (fp, "\\0%o ", x);
+ }
+ fputc (']', fp);
+}
+
+#endif /* RX_DEBUG */
+
+
+
+static unsigned long rx_hash_masks[4] =
+{
+ 0x12488421,
+ 0x96699669,
+ 0xbe7dd7eb,
+ 0xffffffff
+};
+
+
+/* Hash tables */
+#ifdef __STDC__
+RX_DECL struct rx_hash_item *
+rx_hash_find (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL struct rx_hash_item *
+rx_hash_find (table, hash, value, rules)
+ struct rx_hash * table;
+ unsigned long hash;
+ void * value;
+ struct rx_hash_rules * rules;
+#endif
+{
+ rx_hash_eq eq = rules->eq;
+ int maskc = 0;
+ long mask = rx_hash_masks [0];
+ int bucket = (hash & mask) % 13;
+
+ while (table->children [bucket])
+ {
+ table = table->children [bucket];
+ ++maskc;
+ mask = rx_hash_masks[maskc];
+ bucket = (hash & mask) % 13;
+ }
+
+ {
+ struct rx_hash_item * it = table->buckets[bucket];
+ while (it)
+ if (eq (it->data, value))
+ return it;
+ else
+ it = it->next_same_hash;
+ }
+
+ return 0;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rx_hash_item *
+rx_hash_store (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL struct rx_hash_item *
+rx_hash_store (table, hash, value, rules)
+ struct rx_hash * table;
+ unsigned long hash;
+ void * value;
+ struct rx_hash_rules * rules;
+#endif
+{
+ rx_hash_eq eq = rules->eq;
+ int maskc = 0;
+ long mask = rx_hash_masks[0];
+ int bucket = (hash & mask) % 13;
+ int depth = 0;
+
+ while (table->children [bucket])
+ {
+ table = table->children [bucket];
+ ++maskc;
+ mask = rx_hash_masks[maskc];
+ bucket = (hash & mask) % 13;
+ ++depth;
+ }
+
+ {
+ struct rx_hash_item * it = table->buckets[bucket];
+ while (it)
+ if (eq (it->data, value))
+ return it;
+ else
+ it = it->next_same_hash;
+ }
+
+ {
+ if ( (depth < 3)
+ && (table->bucket_size [bucket] >= 4))
+ {
+ struct rx_hash * newtab = ((struct rx_hash *)
+ rules->hash_alloc (rules));
+ if (!newtab)
+ goto add_to_bucket;
+ bzero (newtab, sizeof (*newtab));
+ newtab->parent = table;
+ {
+ struct rx_hash_item * them = table->buckets[bucket];
+ unsigned long newmask = rx_hash_masks[maskc + 1];
+ while (them)
+ {
+ struct rx_hash_item * save = them->next_same_hash;
+ int new_buck = (them->hash & newmask) % 13;
+ them->next_same_hash = newtab->buckets[new_buck];
+ newtab->buckets[new_buck] = them;
+ them->table = newtab;
+ them = save;
+ ++newtab->bucket_size[new_buck];
+ ++newtab->refs;
+ }
+ table->refs = (table->refs - table->bucket_size[bucket] + 1);
+ table->bucket_size[bucket] = 0;
+ table->buckets[bucket] = 0;
+ table->children[bucket] = newtab;
+ table = newtab;
+ bucket = (hash & newmask) % 13;
+ }
+ }
+ }
+ add_to_bucket:
+ {
+ struct rx_hash_item * it = ((struct rx_hash_item *)
+ rules->hash_item_alloc (rules, value));
+ if (!it)
+ return 0;
+ it->hash = hash;
+ it->table = table;
+ /* DATA and BINDING are to be set in hash_item_alloc */
+ it->next_same_hash = table->buckets [bucket];
+ table->buckets[bucket] = it;
+ ++table->bucket_size [bucket];
+ ++table->refs;
+ return it;
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
+#else
+RX_DECL void
+rx_hash_free (it, rules)
+ struct rx_hash_item * it;
+ struct rx_hash_rules * rules;
+#endif
+{
+ if (it)
+ {
+ struct rx_hash * table = it->table;
+ unsigned long hash = it->hash;
+ int depth = (table->parent
+ ? (table->parent->parent
+ ? (table->parent->parent->parent
+ ? 3
+ : 2)
+ : 1)
+ : 0);
+ int bucket = (hash & rx_hash_masks [depth]) % 13;
+ struct rx_hash_item ** pos = &table->buckets [bucket];
+
+ while (*pos != it)
+ pos = &(*pos)->next_same_hash;
+ *pos = it->next_same_hash;
+ rules->free_hash_item (it, rules);
+ --table->bucket_size[bucket];
+ --table->refs;
+ while (!table->refs && depth)
+ {
+ struct rx_hash * save = table;
+ table = table->parent;
+ --depth;
+ bucket = (hash & rx_hash_masks [depth]) % 13;
+ --table->refs;
+ table->children[bucket] = 0;
+ rules->free_hash (save, rules);
+ }
+ }
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL void
+rx_free_hash_table (tab, freefn, rules)
+ struct rx_hash * tab;
+ rx_hash_freefn freefn;
+ struct rx_hash_rules * rules;
+#endif
+{
+ int x;
+
+ for (x = 0; x < 13; ++x)
+ if (tab->children[x])
+ {
+ rx_free_hash_table (tab->children[x], freefn, rules);
+ rules->free_hash (tab->children[x], rules);
+ }
+ else
+ {
+ struct rx_hash_item * them = tab->buckets[x];
+ while (them)
+ {
+ struct rx_hash_item * that = them;
+ them = that->next_same_hash;
+ freefn (that);
+ rules->free_hash_item (that, rules);
+ }
+ }
+}
+
+
+
+/* Utilities for manipulating bitset represntations of characters sets. */
+
+#ifdef __STDC__
+RX_DECL rx_Bitset
+rx_cset (struct rx *rx)
+#else
+RX_DECL rx_Bitset
+rx_cset (rx)
+ struct rx *rx;
+#endif
+{
+ rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
+ if (b)
+ rx_bitset_null (rx->local_cset_size, b);
+ return b;
+}
+
+
+#ifdef __STDC__
+RX_DECL rx_Bitset
+rx_copy_cset (struct rx *rx, rx_Bitset a)
+#else
+RX_DECL rx_Bitset
+rx_copy_cset (rx, a)
+ struct rx *rx;
+ rx_Bitset a;
+#endif
+{
+ rx_Bitset cs = rx_cset (rx);
+
+ if (cs)
+ rx_bitset_union (rx->local_cset_size, cs, a);
+
+ return cs;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_cset (struct rx * rx, rx_Bitset c)
+#else
+RX_DECL void
+rx_free_cset (rx, c)
+ struct rx * rx;
+ rx_Bitset c;
+#endif
+{
+ if (c)
+ free ((char *)c);
+}
+
+
+/* Hash table memory allocation policy for the regexp compiler */
+
+#ifdef __STDC__
+static struct rx_hash *
+compiler_hash_alloc (struct rx_hash_rules * rules)
+#else
+static struct rx_hash *
+compiler_hash_alloc (rules)
+ struct rx_hash_rules * rules;
+#endif
+{
+ return (struct rx_hash *)malloc (sizeof (struct rx_hash));
+}
+
+
+#ifdef __STDC__
+static struct rx_hash_item *
+compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
+#else
+static struct rx_hash_item *
+compiler_hash_item_alloc (rules, value)
+ struct rx_hash_rules * rules;
+ void * value;
+#endif
+{
+ struct rx_hash_item * it;
+ it = (struct rx_hash_item *)malloc (sizeof (*it));
+ if (it)
+ {
+ it->data = value;
+ it->binding = 0;
+ }
+ return it;
+}
+
+
+#ifdef __STDC__
+static void
+compiler_free_hash (struct rx_hash * tab,
+ struct rx_hash_rules * rules)
+#else
+static void
+compiler_free_hash (tab, rules)
+ struct rx_hash * tab;
+ struct rx_hash_rules * rules;
+#endif
+{
+ free ((char *)tab);
+}
+
+
+#ifdef __STDC__
+static void
+compiler_free_hash_item (struct rx_hash_item * item,
+ struct rx_hash_rules * rules)
+#else
+static void
+compiler_free_hash_item (item, rules)
+ struct rx_hash_item * item;
+ struct rx_hash_rules * rules;
+#endif
+{
+ free ((char *)item);
+}
+
+
+/* This page: REXP_NODE (expression tree) structures. */
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rexp_node (struct rx *rx,
+ enum rexp_node_type type)
+#else
+RX_DECL struct rexp_node *
+rexp_node (rx, type)
+ struct rx *rx;
+ enum rexp_node_type type;
+#endif
+{
+ struct rexp_node *n;
+
+ n = (struct rexp_node *)malloc (sizeof (*n));
+ bzero (n, sizeof (*n));
+ if (n)
+ n->type = type;
+ return n;
+}
+
+
+/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
+ * can be freed using rx_free_cset.
+ */
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_cset (struct rx * rx,
+ rx_Bitset b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_cset (rx, b)
+ struct rx * rx;
+ rx_Bitset b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_cset);
+ if (n)
+ n->params.cset = b;
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_concat (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_concat (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_concat);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_alternate (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_alternate (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_alternate);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_opt (struct rx * rx,
+ struct rexp_node * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_opt (rx, a)
+ struct rx * rx;
+ struct rexp_node * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_opt);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_star (struct rx * rx,
+ struct rexp_node * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_star (rx, a)
+ struct rx * rx;
+ struct rexp_node * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_star);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_2phase_star (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_2phase_star (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_2phase_star);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_side_effect (struct rx * rx,
+ rx_side_effect a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_side_effect (rx, a)
+ struct rx * rx;
+ rx_side_effect a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_side_effect);
+ if (n)
+ {
+ n->params.side_effect = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_data (struct rx * rx,
+ void * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_data (rx, a)
+ struct rx * rx;
+ void * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_data);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_rexp (struct rx * rx, struct rexp_node * node)
+#else
+RX_DECL void
+rx_free_rexp (rx, node)
+ struct rx * rx;
+ struct rexp_node * node;
+#endif
+{
+ if (node)
+ {
+ switch (node->type)
+ {
+ case r_cset:
+ if (node->params.cset)
+ rx_free_cset (rx, node->params.cset);
+
+ case r_side_effect:
+ break;
+
+ case r_concat:
+ case r_alternate:
+ case r_2phase_star:
+ case r_opt:
+ case r_star:
+ rx_free_rexp (rx, node->params.pair.left);
+ rx_free_rexp (rx, node->params.pair.right);
+ break;
+
+ case r_data:
+ /* This shouldn't occur. */
+ break;
+ }
+ free ((char *)node);
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_copy_rexp (struct rx *rx,
+ struct rexp_node *node)
+#else
+RX_DECL struct rexp_node *
+rx_copy_rexp (rx, node)
+ struct rx *rx;
+ struct rexp_node *node;
+#endif
+{
+ if (!node)
+ return 0;
+ else
+ {
+ struct rexp_node *n = rexp_node (rx, node->type);
+ if (!n)
+ return 0;
+ switch (node->type)
+ {
+ case r_cset:
+ n->params.cset = rx_copy_cset (rx, node->params.cset);
+ if (!n->params.cset)
+ {
+ rx_free_rexp (rx, n);
+ return 0;
+ }
+ break;
+
+ case r_side_effect:
+ n->params.side_effect = node->params.side_effect;
+ break;
+
+ case r_concat:
+ case r_alternate:
+ case r_opt:
+ case r_2phase_star:
+ case r_star:
+ n->params.pair.left =
+ rx_copy_rexp (rx, node->params.pair.left);
+ n->params.pair.right =
+ rx_copy_rexp (rx, node->params.pair.right);
+ if ( (node->params.pair.left && !n->params.pair.left)
+ || (node->params.pair.right && !n->params.pair.right))
+ {
+ rx_free_rexp (rx, n);
+ return 0;
+ }
+ break;
+ case r_data:
+ /* shouldn't happen */
+ break;
+ }
+ return n;
+ }
+}
+
+
+
+/* This page: functions to build and destroy graphs that describe nfa's */
+
+/* Constructs a new nfa node. */
+#ifdef __STDC__
+RX_DECL struct rx_nfa_state *
+rx_nfa_state (struct rx *rx)
+#else
+RX_DECL struct rx_nfa_state *
+rx_nfa_state (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
+ if (!n)
+ return 0;
+ bzero (n, sizeof (*n));
+ n->next = rx->nfa_states;
+ rx->nfa_states = n;
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa_state (struct rx_nfa_state * n)
+#else
+RX_DECL void
+rx_free_nfa_state (n)
+ struct rx_nfa_state * n;
+#endif
+{
+ free ((char *)n);
+}
+
+
+/* This looks up an nfa node, given a numeric id. Numeric id's are
+ * assigned after the nfa has been built.
+ */
+#ifdef __STDC__
+RX_DECL struct rx_nfa_state *
+rx_id_to_nfa_state (struct rx * rx,
+ int id)
+#else
+RX_DECL struct rx_nfa_state *
+rx_id_to_nfa_state (rx, id)
+ struct rx * rx;
+ int id;
+#endif
+{
+ struct rx_nfa_state * n;
+ for (n = rx->nfa_states; n; n = n->next)
+ if (n->id == id)
+ return n;
+ return 0;
+}
+
+
+/* This adds an edge between two nodes, but doesn't initialize the
+ * edge label.
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_nfa_edge *
+rx_nfa_edge (struct rx *rx,
+ enum rx_nfa_etype type,
+ struct rx_nfa_state *start,
+ struct rx_nfa_state *dest)
+#else
+RX_DECL struct rx_nfa_edge *
+rx_nfa_edge (rx, type, start, dest)
+ struct rx *rx;
+ enum rx_nfa_etype type;
+ struct rx_nfa_state *start;
+ struct rx_nfa_state *dest;
+#endif
+{
+ struct rx_nfa_edge *e;
+ e = (struct rx_nfa_edge *)malloc (sizeof (*e));
+ if (!e)
+ return 0;
+ e->next = start->edges;
+ start->edges = e;
+ e->type = type;
+ e->dest = dest;
+ return e;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa_edge (struct rx_nfa_edge * e)
+#else
+RX_DECL void
+rx_free_nfa_edge (e)
+ struct rx_nfa_edge * e;
+#endif
+{
+ free ((char *)e);
+}
+
+
+/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
+ * of an NFA. These are added to an nfa automaticly by eclose_nfa.
+ */
+
+#ifdef __STDC__
+static struct rx_possible_future *
+rx_possible_future (struct rx * rx,
+ struct rx_se_list * effects)
+#else
+static struct rx_possible_future *
+rx_possible_future (rx, effects)
+ struct rx * rx;
+ struct rx_se_list * effects;
+#endif
+{
+ struct rx_possible_future *ec;
+ ec = (struct rx_possible_future *) malloc (sizeof (*ec));
+ if (!ec)
+ return 0;
+ ec->destset = 0;
+ ec->next = 0;
+ ec->effects = effects;
+ return ec;
+}
+
+
+#ifdef __STDC__
+static void
+rx_free_possible_future (struct rx_possible_future * pf)
+#else
+static void
+rx_free_possible_future (pf)
+ struct rx_possible_future * pf;
+#endif
+{
+ free ((char *)pf);
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa (struct rx *rx)
+#else
+RX_DECL void
+rx_free_nfa (rx)
+ struct rx *rx;
+#endif
+{
+ while (rx->nfa_states)
+ {
+ while (rx->nfa_states->edges)
+ {
+ switch (rx->nfa_states->edges->type)
+ {
+ case ne_cset:
+ rx_free_cset (rx, rx->nfa_states->edges->params.cset);
+ break;
+ default:
+ break;
+ }
+ {
+ struct rx_nfa_edge * e;
+ e = rx->nfa_states->edges;
+ rx->nfa_states->edges = rx->nfa_states->edges->next;
+ rx_free_nfa_edge (e);
+ }
+ } /* while (rx->nfa_states->edges) */
+ {
+ /* Iterate over the partial epsilon closures of rx->nfa_states */
+ struct rx_possible_future * pf = rx->nfa_states->futures;
+ while (pf)
+ {
+ struct rx_possible_future * pft = pf;
+ pf = pf->next;
+ rx_free_possible_future (pft);
+ }
+ }
+ {
+ struct rx_nfa_state *n;
+ n = rx->nfa_states;
+ rx->nfa_states = rx->nfa_states->next;
+ rx_free_nfa_state (n);
+ }
+ }
+}
+
+
+
+/* This page: translating a pattern expression into an nfa and doing the
+ * static part of the nfa->super-nfa translation.
+ */
+
+/* This is the thompson regexp->nfa algorithm.
+ * It is modified to allow for `side-effect epsilons.' Those are
+ * edges that are taken whenever a similar epsilon edge would be,
+ * but which imply that some side effect occurs when the edge
+ * is taken.
+ *
+ * Side effects are used to model parts of the pattern langauge
+ * that are not regular (in the formal sense).
+ */
+
+#ifdef __STDC__
+RX_DECL int
+rx_build_nfa (struct rx *rx,
+ struct rexp_node *rexp,
+ struct rx_nfa_state **start,
+ struct rx_nfa_state **end)
+#else
+RX_DECL int
+rx_build_nfa (rx, rexp, start, end)
+ struct rx *rx;
+ struct rexp_node *rexp;
+ struct rx_nfa_state **start;
+ struct rx_nfa_state **end;
+#endif
+{
+ struct rx_nfa_edge *edge;
+
+ /* Start & end nodes may have been allocated by the caller. */
+ *start = *start ? *start : rx_nfa_state (rx);
+
+ if (!*start)
+ return 0;
+
+ if (!rexp)
+ {
+ *end = *start;
+ return 1;
+ }
+
+ *end = *end ? *end : rx_nfa_state (rx);
+
+ if (!*end)
+ {
+ rx_free_nfa_state (*start);
+ return 0;
+ }
+
+ switch (rexp->type)
+ {
+ case r_data:
+ return 0;
+
+ case r_cset:
+ edge = rx_nfa_edge (rx, ne_cset, *start, *end);
+ if (!edge)
+ return 0;
+ edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
+ if (!edge->params.cset)
+ {
+ rx_free_nfa_edge (edge);
+ return 0;
+ }
+ return 1;
+
+ case r_opt:
+ return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, *end));
+
+ case r_star:
+ {
+ struct rx_nfa_state * star_start = 0;
+ struct rx_nfa_state * star_end = 0;
+ return (rx_build_nfa (rx, rexp->params.pair.left,
+ &star_start, &star_end)
+ && star_start
+ && star_end
+ && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
+ && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
+
+ && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
+ }
+
+ case r_2phase_star:
+ {
+ struct rx_nfa_state * star_start = 0;
+ struct rx_nfa_state * star_end = 0;
+ struct rx_nfa_state * loop_exp_start = 0;
+ struct rx_nfa_state * loop_exp_end = 0;
+
+ return (rx_build_nfa (rx, rexp->params.pair.left,
+ &star_start, &star_end)
+ && rx_build_nfa (rx, rexp->params.pair.right,
+ &loop_exp_start, &loop_exp_end)
+ && star_start
+ && star_end
+ && loop_exp_end
+ && loop_exp_start
+ && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
+ && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
+
+ && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
+ && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
+ }
+
+
+ case r_concat:
+ {
+ struct rx_nfa_state *shared = 0;
+ return
+ (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
+ && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
+ }
+
+ case r_alternate:
+ {
+ struct rx_nfa_state *ls = 0;
+ struct rx_nfa_state *le = 0;
+ struct rx_nfa_state *rs = 0;
+ struct rx_nfa_state *re = 0;
+ return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
+ && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
+ && rx_nfa_edge (rx, ne_epsilon, *start, ls)
+ && rx_nfa_edge (rx, ne_epsilon, *start, rs)
+ && rx_nfa_edge (rx, ne_epsilon, le, *end)
+ && rx_nfa_edge (rx, ne_epsilon, re, *end));
+ }
+
+ case r_side_effect:
+ edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
+ if (!edge)
+ return 0;
+ edge->params.side_effect = rexp->params.side_effect;
+ return 1;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
+ * transitions. Only these nodes can occur in super-states.
+ * All nodes are given an integer id.
+ * The id is non-negative if the node has non-epsilon out-transitions, negative
+ * otherwise (this is because we want the non-negative ids to be used as
+ * array indexes in a few places).
+ */
+
+#ifdef __STDC__
+RX_DECL void
+rx_name_nfa_states (struct rx *rx)
+#else
+RX_DECL void
+rx_name_nfa_states (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+
+ rx->nodec = 0;
+ rx->epsnodec = -1;
+
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+
+ if (n->is_start)
+ n->eclosure_needed = 1;
+
+ while (e)
+ {
+ switch (e->type)
+ {
+ case ne_epsilon:
+ case ne_side_effect:
+ break;
+
+ case ne_cset:
+ n->id = rx->nodec++;
+ {
+ struct rx_nfa_edge *from_n = n->edges;
+ while (from_n)
+ {
+ from_n->dest->eclosure_needed = 1;
+ from_n = from_n->next;
+ }
+ }
+ goto cont;
+ }
+ e = e->next;
+ }
+ n->id = rx->epsnodec--;
+ cont:
+ n = n->next;
+ }
+ rx->epsnodec = -rx->epsnodec;
+}
+
+
+/* This page: data structures for the static part of the nfa->supernfa
+ * translation.
+ *
+ * There are side effect lists -- lists of side effects occuring
+ * along an uninterrupted, acyclic path of side-effect epsilon edges.
+ * Such paths are collapsed to single edges in the course of computing
+ * epsilon closures. Such single edges are labled with a list of all
+ * the side effects entailed in crossing them. Like lists of side
+ * effects are made == by the constructors below.
+ *
+ * There are also nfa state sets. These are used to hold a list of all
+ * states reachable from a starting state for a given type of transition
+ * and side effect list. These are also hash-consed.
+ */
+
+/* The next several functions compare, construct, etc. lists of side
+ * effects. See ECLOSE_NFA (below) for details.
+ */
+
+/* Ordering of rx_se_list
+ * (-1, 0, 1 return value convention).
+ */
+
+#ifdef __STDC__
+static int
+se_list_cmp (void * va, void * vb)
+#else
+static int
+se_list_cmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_se_list * a = (struct rx_se_list *)va;
+ struct rx_se_list * b = (struct rx_se_list *)vb;
+
+ return ((va == vb)
+ ? 0
+ : (!va
+ ? -1
+ : (!vb
+ ? 1
+ : ((long)a->car < (long)b->car
+ ? 1
+ : ((long)a->car > (long)b->car
+ ? -1
+ : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
+}
+
+
+#ifdef __STDC__
+static int
+se_list_equal (void * va, void * vb)
+#else
+static int
+se_list_equal (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ return !(se_list_cmp (va, vb));
+}
+
+static struct rx_hash_rules se_list_hash_rules =
+{
+ se_list_equal,
+ compiler_hash_alloc,
+ compiler_free_hash,
+ compiler_hash_item_alloc,
+ compiler_free_hash_item
+};
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+side_effect_cons (struct rx * rx,
+ void * se, struct rx_se_list * list)
+#else
+static struct rx_se_list *
+side_effect_cons (rx, se, list)
+ struct rx * rx;
+ void * se;
+ struct rx_se_list * list;
+#endif
+{
+ struct rx_se_list * l;
+ l = ((struct rx_se_list *) malloc (sizeof (*l)));
+ if (!l)
+ return 0;
+ l->car = se;
+ l->cdr = list;
+ return l;
+}
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+hash_cons_se_prog (struct rx * rx,
+ struct rx_hash * memo,
+ void * car, struct rx_se_list * cdr)
+#else
+static struct rx_se_list *
+hash_cons_se_prog (rx, memo, car, cdr)
+ struct rx * rx;
+ struct rx_hash * memo;
+ void * car;
+ struct rx_se_list * cdr;
+#endif
+{
+ long hash = (long)car ^ (long)cdr;
+ struct rx_se_list template;
+
+ template.car = car;
+ template.cdr = cdr;
+ {
+ struct rx_hash_item * it = rx_hash_store (memo, hash,
+ (void *)&template,
+ &se_list_hash_rules);
+ if (!it)
+ return 0;
+ if (it->data == (void *)&template)
+ {
+ struct rx_se_list * consed;
+ consed = (struct rx_se_list *) malloc (sizeof (*consed));
+ *consed = template;
+ it->data = (void *)consed;
+ }
+ return (struct rx_se_list *)it->data;
+ }
+}
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
+#else
+static struct rx_se_list *
+hash_se_prog (rx, memo, prog)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_se_list * prog;
+#endif
+{
+ struct rx_se_list * answer = 0;
+ while (prog)
+ {
+ answer = hash_cons_se_prog (rx, memo, prog->car, answer);
+ if (!answer)
+ return 0;
+ prog = prog->cdr;
+ }
+ return answer;
+}
+
+#ifdef __STDC__
+static int
+nfa_set_cmp (void * va, void * vb)
+#else
+static int
+nfa_set_cmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
+ struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
+
+ return ((va == vb)
+ ? 0
+ : (!va
+ ? -1
+ : (!vb
+ ? 1
+ : (a->car->id < b->car->id
+ ? 1
+ : (a->car->id > b->car->id
+ ? -1
+ : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
+}
+
+#ifdef __STDC__
+static int
+nfa_set_equal (void * va, void * vb)
+#else
+static int
+nfa_set_equal (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ return !nfa_set_cmp (va, vb);
+}
+
+static struct rx_hash_rules nfa_set_hash_rules =
+{
+ nfa_set_equal,
+ compiler_hash_alloc,
+ compiler_free_hash,
+ compiler_hash_item_alloc,
+ compiler_free_hash_item
+};
+
+
+#ifdef __STDC__
+static struct rx_nfa_state_set *
+nfa_set_cons (struct rx * rx,
+ struct rx_hash * memo, struct rx_nfa_state * state,
+ struct rx_nfa_state_set * set)
+#else
+static struct rx_nfa_state_set *
+nfa_set_cons (rx, memo, state, set)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_nfa_state * state;
+ struct rx_nfa_state_set * set;
+#endif
+{
+ struct rx_nfa_state_set template;
+ struct rx_hash_item * node;
+ template.car = state;
+ template.cdr = set;
+ node = rx_hash_store (memo,
+ (((long)state) >> 8) ^ (long)set,
+ &template, &nfa_set_hash_rules);
+ if (!node)
+ return 0;
+ if (node->data == &template)
+ {
+ struct rx_nfa_state_set * l;
+ l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
+ node->data = (void *) l;
+ if (!l)
+ return 0;
+ *l = template;
+ }
+ return (struct rx_nfa_state_set *)node->data;
+}
+
+
+#ifdef __STDC__
+static struct rx_nfa_state_set *
+nfa_set_enjoin (struct rx * rx,
+ struct rx_hash * memo, struct rx_nfa_state * state,
+ struct rx_nfa_state_set * set)
+#else
+static struct rx_nfa_state_set *
+nfa_set_enjoin (rx, memo, state, set)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_nfa_state * state;
+ struct rx_nfa_state_set * set;
+#endif
+{
+ if (!set || state->id < set->car->id)
+ return nfa_set_cons (rx, memo, state, set);
+ if (state->id == set->car->id)
+ return set;
+ else
+ {
+ struct rx_nfa_state_set * newcdr
+ = nfa_set_enjoin (rx, memo, state, set->cdr);
+ if (newcdr != set->cdr)
+ set = nfa_set_cons (rx, memo, set->car, newcdr);
+ return set;
+ }
+}
+
+
+
+/* This page: computing epsilon closures. The closures aren't total.
+ * Each node's closures are partitioned according to the side effects entailed
+ * along the epsilon edges. Return true on success.
+ */
+
+struct eclose_frame
+{
+ struct rx_se_list *prog_backwards;
+};
+
+
+#ifdef __STDC__
+static int
+eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
+ struct rx_nfa_state *node, struct eclose_frame *frame)
+#else
+static int
+eclose_node (rx, outnode, node, frame)
+ struct rx *rx;
+ struct rx_nfa_state *outnode;
+ struct rx_nfa_state *node;
+ struct eclose_frame *frame;
+#endif
+{
+ struct rx_nfa_edge *e = node->edges;
+
+ /* For each node, we follow all epsilon paths to build the closure.
+ * The closure omits nodes that have only epsilon edges.
+ * The closure is split into partial closures -- all the states in
+ * a partial closure are reached by crossing the same list of
+ * of side effects (though not necessarily the same path).
+ */
+ if (node->mark)
+ return 1;
+ node->mark = 1;
+
+ if (node->id >= 0 || node->is_final)
+ {
+ struct rx_possible_future **ec;
+ struct rx_se_list * prog_in_order
+ = ((struct rx_se_list *)hash_se_prog (rx,
+ &rx->se_list_memo,
+ frame->prog_backwards));
+ int cmp;
+
+ ec = &outnode->futures;
+
+ while (*ec)
+ {
+ cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
+ if (cmp <= 0)
+ break;
+ ec = &(*ec)->next;
+ }
+ if (!*ec || (cmp < 0))
+ {
+ struct rx_possible_future * saved = *ec;
+ *ec = rx_possible_future (rx, prog_in_order);
+ (*ec)->next = saved;
+ if (!*ec)
+ return 0;
+ }
+ if (node->id >= 0)
+ {
+ (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
+ node, (*ec)->destset);
+ if (!(*ec)->destset)
+ return 0;
+ }
+ }
+
+ while (e)
+ {
+ switch (e->type)
+ {
+ case ne_epsilon:
+ if (!eclose_node (rx, outnode, e->dest, frame))
+ return 0;
+ break;
+ case ne_side_effect:
+ {
+ frame->prog_backwards = side_effect_cons (rx,
+ e->params.side_effect,
+ frame->prog_backwards);
+ if (!frame->prog_backwards)
+ return 0;
+ if (!eclose_node (rx, outnode, e->dest, frame))
+ return 0;
+ {
+ struct rx_se_list * dying = frame->prog_backwards;
+ frame->prog_backwards = frame->prog_backwards->cdr;
+ free ((char *)dying);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ e = e->next;
+ }
+ node->mark = 0;
+ return 1;
+}
+
+
+#ifdef __STDC__
+RX_DECL int
+rx_eclose_nfa (struct rx *rx)
+#else
+RX_DECL int
+rx_eclose_nfa (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+ struct eclose_frame frame;
+ static int rx_id = 0;
+
+ frame.prog_backwards = 0;
+ rx->rx_id = rx_id++;
+ bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
+ bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
+ while (n)
+ {
+ n->futures = 0;
+ if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
+ return 0;
+ /* clear_marks (rx); */
+ n = n->next;
+ }
+ return 1;
+}
+
+
+/* This deletes epsilon edges from an NFA. After running eclose_node,
+ * we have no more need for these edges. They are removed to simplify
+ * further operations on the NFA.
+ */
+
+#ifdef __STDC__
+RX_DECL void
+rx_delete_epsilon_transitions (struct rx *rx)
+#else
+RX_DECL void
+rx_delete_epsilon_transitions (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+ struct rx_nfa_edge **e;
+
+ while (n)
+ {
+ e = &n->edges;
+ while (*e)
+ {
+ struct rx_nfa_edge *t;
+ switch ((*e)->type)
+ {
+ case ne_epsilon:
+ case ne_side_effect:
+ t = *e;
+ *e = t->next;
+ rx_free_nfa_edge (t);
+ break;
+
+ default:
+ e = &(*e)->next;
+ break;
+ }
+ }
+ n = n->next;
+ }
+}
+
+
+/* This page: storing the nfa in a contiguous region of memory for
+ * subsequent conversion to a super-nfa.
+ */
+
+/* This is for qsort on an array of nfa_states. The order
+ * is based on state ids and goes
+ * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
+ * This way, positive ids double as array indices.
+ */
+
+#ifdef __STDC__
+static int
+nfacmp (void * va, void * vb)
+#else
+static int
+nfacmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_nfa_state **a = (struct rx_nfa_state **)va;
+ struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
+ return (*a == *b /* &&&& 3.18 */
+ ? 0
+ : (((*a)->id < 0) == ((*b)->id < 0)
+ ? (((*a)->id < (*b)->id) ? -1 : 1)
+ : (((*a)->id < 0)
+ ? 1 : -1)));
+}
+
+#ifdef __STDC__
+static int
+count_hash_nodes (struct rx_hash * st)
+#else
+static int
+count_hash_nodes (st)
+ struct rx_hash * st;
+#endif
+{
+ int x;
+ int count = 0;
+ for (x = 0; x < 13; ++x)
+ count += ((st->children[x])
+ ? count_hash_nodes (st->children[x])
+ : st->bucket_size[x]);
+
+ return count;
+}
+
+
+#ifdef __STDC__
+static void
+se_memo_freer (struct rx_hash_item * node)
+#else
+static void
+se_memo_freer (node)
+ struct rx_hash_item * node;
+#endif
+{
+ free ((char *)node->data);
+}
+
+
+#ifdef __STDC__
+static void
+nfa_set_freer (struct rx_hash_item * node)
+#else
+static void
+nfa_set_freer (node)
+ struct rx_hash_item * node;
+#endif
+{
+ free ((char *)node->data);
+}
+
+
+/* This copies an entire NFA into a single malloced block of memory.
+ * Mostly this is for compatability with regex.c, though it is convenient
+ * to have the nfa nodes in an array.
+ */
+
+#ifdef __STDC__
+RX_DECL int
+rx_compactify_nfa (struct rx *rx,
+ void **mem, unsigned long *size)
+#else
+RX_DECL int
+rx_compactify_nfa (rx, mem, size)
+ struct rx *rx;
+ void **mem;
+ unsigned long *size;
+#endif
+{
+ int total_nodec;
+ struct rx_nfa_state *n;
+ int edgec = 0;
+ int eclosec = 0;
+ int se_list_consc = count_hash_nodes (&rx->se_list_memo);
+ int nfa_setc = count_hash_nodes (&rx->set_list_memo);
+ unsigned long total_size;
+
+ /* This takes place in two stages. First, the total size of the
+ * nfa is computed, then structures are copied.
+ */
+ n = rx->nfa_states;
+ total_nodec = 0;
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+ struct rx_possible_future *ec = n->futures;
+ ++total_nodec;
+ while (e)
+ {
+ ++edgec;
+ e = e->next;
+ }
+ while (ec)
+ {
+ ++eclosec;
+ ec = ec->next;
+ }
+ n = n->next;
+ }
+
+ total_size = (total_nodec * sizeof (struct rx_nfa_state)
+ + edgec * rx_sizeof_bitset (rx->local_cset_size)
+ + edgec * sizeof (struct rx_nfa_edge)
+ + nfa_setc * sizeof (struct rx_nfa_state_set)
+ + eclosec * sizeof (struct rx_possible_future)
+ + se_list_consc * sizeof (struct rx_se_list)
+ + rx->reserved);
+
+ if (total_size > *size)
+ {
+ *mem = remalloc (*mem, total_size);
+ if (*mem)
+ *size = total_size;
+ else
+ return 0;
+ }
+ /* Now we've allocated the memory; this copies the NFA. */
+ {
+ static struct rx_nfa_state **scratch = 0;
+ static int scratch_alloc = 0;
+ struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
+ struct rx_nfa_state *new_state = state_base;
+ struct rx_nfa_edge *new_edge =
+ (struct rx_nfa_edge *)
+ ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
+ struct rx_se_list * new_se_list =
+ (struct rx_se_list *)
+ ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
+ struct rx_possible_future *new_close =
+ ((struct rx_possible_future *)
+ ((char *) new_se_list
+ + se_list_consc * sizeof (struct rx_se_list)));
+ struct rx_nfa_state_set * new_nfa_set =
+ ((struct rx_nfa_state_set *)
+ ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
+ char *new_bitset =
+ ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
+ int x;
+ struct rx_nfa_state *n;
+
+ if (scratch_alloc < total_nodec)
+ {
+ scratch = ((struct rx_nfa_state **)
+ remalloc (scratch, total_nodec * sizeof (*scratch)));
+ if (scratch)
+ scratch_alloc = total_nodec;
+ else
+ {
+ scratch_alloc = 0;
+ return 0;
+ }
+ }
+
+ for (x = 0, n = rx->nfa_states; n; n = n->next)
+ scratch[x++] = n;
+
+ qsort (scratch, total_nodec,
+ sizeof (struct rx_nfa_state *), (int (*)())nfacmp);
+
+ for (x = 0; x < total_nodec; ++x)
+ {
+ struct rx_possible_future *eclose = scratch[x]->futures;
+ struct rx_nfa_edge *edge = scratch[x]->edges;
+ struct rx_nfa_state *cn = new_state++;
+ cn->futures = 0;
+ cn->edges = 0;
+ cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
+ cn->id = scratch[x]->id;
+ cn->is_final = scratch[x]->is_final;
+ cn->is_start = scratch[x]->is_start;
+ cn->mark = 0;
+ while (edge)
+ {
+ int indx = (edge->dest->id < 0
+ ? (total_nodec + edge->dest->id)
+ : edge->dest->id);
+ struct rx_nfa_edge *e = new_edge++;
+ rx_Bitset cset = (rx_Bitset) new_bitset;
+ new_bitset += rx_sizeof_bitset (rx->local_cset_size);
+ rx_bitset_null (rx->local_cset_size, cset);
+ rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
+ e->next = cn->edges;
+ cn->edges = e;
+ e->type = edge->type;
+ e->dest = state_base + indx;
+ e->params.cset = cset;
+ edge = edge->next;
+ }
+ while (eclose)
+ {
+ struct rx_possible_future *ec = new_close++;
+ struct rx_hash_item * sp;
+ struct rx_se_list ** sepos;
+ struct rx_se_list * sesrc;
+ struct rx_nfa_state_set * destlst;
+ struct rx_nfa_state_set ** destpos;
+ ec->next = cn->futures;
+ cn->futures = ec;
+ for (sepos = &ec->effects, sesrc = eclose->effects;
+ sesrc;
+ sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
+ {
+ sp = rx_hash_find (&rx->se_list_memo,
+ (long)sesrc->car ^ (long)sesrc->cdr,
+ sesrc, &se_list_hash_rules);
+ if (sp->binding)
+ {
+ sesrc = (struct rx_se_list *)sp->binding;
+ break;
+ }
+ *new_se_list = *sesrc;
+ sp->binding = (void *)new_se_list;
+ *sepos = new_se_list;
+ ++new_se_list;
+ }
+ *sepos = sesrc;
+ for (destpos = &ec->destset, destlst = eclose->destset;
+ destlst;
+ destpos = &(*destpos)->cdr, destlst = destlst->cdr)
+ {
+ sp = rx_hash_find (&rx->set_list_memo,
+ ((((long)destlst->car) >> 8)
+ ^ (long)destlst->cdr),
+ destlst, &nfa_set_hash_rules);
+ if (sp->binding)
+ {
+ destlst = (struct rx_nfa_state_set *)sp->binding;
+ break;
+ }
+ *new_nfa_set = *destlst;
+ new_nfa_set->car = state_base + destlst->car->id;
+ sp->binding = (void *)new_nfa_set;
+ *destpos = new_nfa_set;
+ ++new_nfa_set;
+ }
+ *destpos = destlst;
+ eclose = eclose->next;
+ }
+ }
+ }
+ rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
+ bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
+ rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
+ bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
+
+ rx_free_nfa (rx);
+ rx->nfa_states = (struct rx_nfa_state *)*mem;
+ return 1;
+}
+
+
+/* The functions in the next several pages define the lazy-NFA-conversion used
+ * by matchers. The input to this construction is an NFA such as
+ * is built by compactify_nfa (rx.c). The output is the superNFA.
+ */
+
+/* Match engines can use arbitrary values for opcodes. So, the parse tree
+ * is built using instructions names (enum rx_opcode), but the superstate
+ * nfa is populated with mystery opcodes (void *).
+ *
+ * For convenience, here is an id table. The opcodes are == to their inxs
+ *
+ * The lables in re_search_2 would make good values for instructions.
+ */
+
+void * rx_id_instruction_table[rx_num_instructions] =
+{
+ (void *) rx_backtrack_point,
+ (void *) rx_do_side_effects,
+ (void *) rx_cache_miss,
+ (void *) rx_next_char,
+ (void *) rx_backtrack,
+ (void *) rx_error_inx
+};
+
+
+
+/* Memory mgt. for superstate graphs. */
+
+#ifdef __STDC__
+static char *
+rx_cache_malloc (struct rx_cache * cache, int bytes)
+#else
+static char *
+rx_cache_malloc (cache, bytes)
+ struct rx_cache * cache;
+ int bytes;
+#endif
+{
+ while (cache->bytes_left < bytes)
+ {
+ if (cache->memory_pos)
+ cache->memory_pos = cache->memory_pos->next;
+ if (!cache->memory_pos)
+ {
+ cache->morecore (cache);
+ if (!cache->memory_pos)
+ return 0;
+ }
+ cache->bytes_left = cache->memory_pos->bytes;
+ cache->memory_addr = ((char *)cache->memory_pos
+ + sizeof (struct rx_blocklist));
+ }
+ cache->bytes_left -= bytes;
+ {
+ char * addr = cache->memory_addr;
+ cache->memory_addr += bytes;
+ return addr;
+ }
+}
+
+#ifdef __STDC__
+static void
+rx_cache_free (struct rx_cache * cache,
+ struct rx_freelist ** freelist, char * mem)
+#else
+static void
+rx_cache_free (cache, freelist, mem)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+ char * mem;
+#endif
+{
+ struct rx_freelist * it = (struct rx_freelist *)mem;
+ it->next = *freelist;
+ *freelist = it;
+}
+
+
+/* The partially instantiated superstate graph has a transition
+ * table at every node. There is one entry for every character.
+ * This fills in the transition for a set.
+ */
+#ifdef __STDC__
+static void
+install_transition (struct rx_superstate *super,
+ struct rx_inx *answer, rx_Bitset trcset)
+#else
+static void
+install_transition (super, answer, trcset)
+ struct rx_superstate *super;
+ struct rx_inx *answer;
+ rx_Bitset trcset;
+#endif
+{
+ struct rx_inx * transitions = super->transitions;
+ int chr;
+ for (chr = 0; chr < 256; )
+ if (!*trcset)
+ {
+ ++trcset;
+ chr += 32;
+ }
+ else
+ {
+ RX_subset sub = *trcset;
+ RX_subset mask = 1;
+ int bound = chr + 32;
+ while (chr < bound)
+ {
+ if (sub & mask)
+ transitions [chr] = *answer;
+ ++chr;
+ mask <<= 1;
+ }
+ ++trcset;
+ }
+}
+
+
+#ifdef __STDC__
+static int
+qlen (struct rx_superstate * q)
+#else
+static int
+qlen (q)
+ struct rx_superstate * q;
+#endif
+{
+ int count = 1;
+ struct rx_superstate * it;
+ if (!q)
+ return 0;
+ for (it = q->next_recyclable; it != q; it = it->next_recyclable)
+ ++count;
+ return count;
+}
+
+#ifdef __STDC__
+static void
+check_cache (struct rx_cache * cache)
+#else
+static void
+check_cache (cache)
+ struct rx_cache * cache;
+#endif
+{
+ struct rx_cache * you_fucked_up = 0;
+ int total = cache->superstates;
+ int semi = cache->semifree_superstates;
+ if (semi != qlen (cache->semifree_superstate))
+ check_cache (you_fucked_up);
+ if ((total - semi) != qlen (cache->lru_superstate))
+ check_cache (you_fucked_up);
+}
+
+/* When a superstate is old and neglected, it can enter a
+ * semi-free state. A semi-free state is slated to die.
+ * Incoming transitions to a semi-free state are re-written
+ * to cause an (interpreted) fault when they are taken.
+ * The fault handler revives the semi-free state, patches
+ * incoming transitions back to normal, and continues.
+ *
+ * The idea is basicly to free in two stages, aborting
+ * between the two if the state turns out to be useful again.
+ * When a free is aborted, the rescued superstate is placed
+ * in the most-favored slot to maximize the time until it
+ * is next semi-freed.
+ */
+
+#ifdef __STDC__
+static void
+semifree_superstate (struct rx_cache * cache)
+#else
+static void
+semifree_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ int disqualified = cache->semifree_superstates;
+ if (disqualified == cache->superstates)
+ return;
+ while (cache->lru_superstate->locks)
+ {
+ cache->lru_superstate = cache->lru_superstate->next_recyclable;
+ ++disqualified;
+ if (disqualified == cache->superstates)
+ return;
+ }
+ {
+ struct rx_superstate * it = cache->lru_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->lru_superstate = (it == it->next_recyclable
+ ? 0
+ : it->next_recyclable);
+ if (!cache->semifree_superstate)
+ {
+ cache->semifree_superstate = it;
+ it->next_recyclable = it;
+ it->prev_recyclable = it;
+ }
+ else
+ {
+ it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
+ it->next_recyclable = cache->semifree_superstate;
+ it->prev_recyclable->next_recyclable = it;
+ it->next_recyclable->prev_recyclable = it;
+ }
+ {
+ struct rx_distinct_future *df;
+ it->is_semifree = 1;
+ ++cache->semifree_superstates;
+ df = it->transition_refs;
+ if (df)
+ {
+ df->prev_same_dest->next_same_dest = 0;
+ for (df = it->transition_refs; df; df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_cache_miss];
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = (void *) df;
+ /* If there are any NEXT-CHAR instruction frames that
+ * refer to this state, we convert them to CACHE-MISS frames.
+ */
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0]
+ == df->edge->options))
+ install_transition (df->present, &df->future_frame,
+ df->edge->cset);
+ }
+ df = it->transition_refs;
+ df->prev_same_dest->next_same_dest = df;
+ }
+ }
+ }
+}
+
+
+#ifdef __STDC__
+static void
+refresh_semifree_superstate (struct rx_cache * cache,
+ struct rx_superstate * super)
+#else
+static void
+refresh_semifree_superstate (cache, super)
+ struct rx_cache * cache;
+ struct rx_superstate * super;
+#endif
+{
+ struct rx_distinct_future *df;
+
+ if (super->transition_refs)
+ {
+ super->transition_refs->prev_same_dest->next_same_dest = 0;
+ for (df = super->transition_refs; df; df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_next_char];
+ df->future_frame.data = (void *) super->transitions;
+ /* CACHE-MISS instruction frames that refer to this state,
+ * must be converted to NEXT-CHAR frames.
+ */
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0]
+ == df->edge->options))
+ install_transition (df->present, &df->future_frame,
+ df->edge->cset);
+ }
+ super->transition_refs->prev_same_dest->next_same_dest
+ = super->transition_refs;
+ }
+ if (cache->semifree_superstate == super)
+ cache->semifree_superstate = (super->prev_recyclable == super
+ ? 0
+ : super->prev_recyclable);
+ super->next_recyclable->prev_recyclable = super->prev_recyclable;
+ super->prev_recyclable->next_recyclable = super->next_recyclable;
+
+ if (!cache->lru_superstate)
+ (cache->lru_superstate
+ = super->next_recyclable
+ = super->prev_recyclable
+ = super);
+ else
+ {
+ super->next_recyclable = cache->lru_superstate;
+ super->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ super->next_recyclable->prev_recyclable = super;
+ super->prev_recyclable->next_recyclable = super;
+ }
+ super->is_semifree = 0;
+ --cache->semifree_superstates;
+}
+
+#ifdef __STDC__
+static void
+rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
+#else
+static void
+rx_refresh_this_superstate (cache, superstate)
+ struct rx_cache * cache;
+ struct rx_superstate * superstate;
+#endif
+{
+ if (superstate->is_semifree)
+ refresh_semifree_superstate (cache, superstate);
+ else if (cache->lru_superstate == superstate)
+ cache->lru_superstate = superstate->next_recyclable;
+ else if (superstate != cache->lru_superstate->prev_recyclable)
+ {
+ superstate->next_recyclable->prev_recyclable
+ = superstate->prev_recyclable;
+ superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable;
+ superstate->next_recyclable = cache->lru_superstate;
+ superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ superstate->next_recyclable->prev_recyclable = superstate;
+ superstate->prev_recyclable->next_recyclable = superstate;
+ }
+}
+
+#ifdef __STDC__
+static void
+release_superset_low (struct rx_cache * cache,
+ struct rx_superset *set)
+#else
+static void
+release_superset_low (cache, set)
+ struct rx_cache * cache;
+ struct rx_superset *set;
+#endif
+{
+ if (!--set->refs)
+ {
+ if (set->cdr)
+ release_superset_low (cache, set->cdr);
+
+ set->starts_for = 0;
+
+ rx_hash_free
+ (rx_hash_find
+ (&cache->superset_table,
+ (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
+ (void *)set,
+ &cache->superset_hash_rules),
+ &cache->superset_hash_rules);
+ rx_cache_free (cache, &cache->free_supersets, (char *)set);
+ }
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_release_superset (struct rx *rx,
+ struct rx_superset *set)
+#else
+RX_DECL void
+rx_release_superset (rx, set)
+ struct rx *rx;
+ struct rx_superset *set;
+#endif
+{
+ release_superset_low (rx->cache, set);
+}
+
+/* This tries to add a new superstate to the superstate freelist.
+ * It might, as a result, free some edge pieces or hash tables.
+ * If nothing can be freed because too many locks are being held, fail.
+ */
+
+#ifdef __STDC__
+static int
+rx_really_free_superstate (struct rx_cache * cache)
+#else
+static int
+rx_really_free_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ int locked_superstates = 0;
+ struct rx_superstate * it;
+
+ if (!cache->superstates)
+ return 0;
+
+ {
+ /* This is a total guess. The idea is that we should expect as
+ * many misses as we've recently experienced. I.e., cache->misses
+ * should be the same as cache->semifree_superstates.
+ */
+ while ((cache->hits + cache->misses) > cache->superstates_allowed)
+ {
+ cache->hits >>= 1;
+ cache->misses >>= 1;
+ }
+ if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
+ < (cache->superstates * cache->misses))
+ {
+ semifree_superstate (cache);
+ semifree_superstate (cache);
+ }
+ }
+
+ while (cache->semifree_superstate && cache->semifree_superstate->locks)
+ {
+ refresh_semifree_superstate (cache, cache->semifree_superstate);
+ ++locked_superstates;
+ if (locked_superstates == cache->superstates)
+ return 0;
+ }
+
+ if (cache->semifree_superstate)
+ {
+ it = cache->semifree_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->semifree_superstate = ((it == it->next_recyclable)
+ ? 0
+ : it->next_recyclable);
+ --cache->semifree_superstates;
+ }
+ else
+ {
+ while (cache->lru_superstate->locks)
+ {
+ cache->lru_superstate = cache->lru_superstate->next_recyclable;
+ ++locked_superstates;
+ if (locked_superstates == cache->superstates)
+ return 0;
+ }
+ it = cache->lru_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->lru_superstate = ((it == it->next_recyclable)
+ ? 0
+ : it->next_recyclable);
+ }
+
+ if (it->transition_refs)
+ {
+ struct rx_distinct_future *df;
+ for (df = it->transition_refs,
+ df->prev_same_dest->next_same_dest = 0;
+ df;
+ df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_cache_miss];
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = (void *) df;
+ df->future = 0;
+ }
+ it->transition_refs->prev_same_dest->next_same_dest =
+ it->transition_refs;
+ }
+ {
+ struct rx_super_edge *tc = it->edges;
+ while (tc)
+ {
+ struct rx_distinct_future * df;
+ struct rx_super_edge *tct = tc->next;
+ df = tc->options;
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (df)
+ {
+ struct rx_distinct_future *dft = df;
+ df = df->next_same_super_edge[0];
+
+
+ if (dft->future && dft->future->transition_refs == dft)
+ {
+ dft->future->transition_refs = dft->next_same_dest;
+ if (dft->future->transition_refs == dft)
+ dft->future->transition_refs = 0;
+ }
+ dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
+ dft->prev_same_dest->next_same_dest = dft->next_same_dest;
+ rx_cache_free (cache, &cache->free_discernable_futures,
+ (char *)dft);
+ }
+ rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
+ tc = tct;
+ }
+ }
+
+ if (it->contents->superstate == it)
+ it->contents->superstate = 0;
+ release_superset_low (cache, it->contents);
+ rx_cache_free (cache, &cache->free_superstates, (char *)it);
+ --cache->superstates;
+ return 1;
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_get (struct rx_cache * cache,
+ struct rx_freelist ** freelist)
+#else
+static char *
+rx_cache_get (cache, freelist)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+#endif
+{
+ while (!*freelist && rx_really_free_superstate (cache))
+ ;
+ if (!*freelist)
+ return 0;
+ {
+ struct rx_freelist * it = *freelist;
+ *freelist = it->next;
+ return (char *)it;
+ }
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_malloc_or_get (struct rx_cache * cache,
+ struct rx_freelist ** freelist, int bytes)
+#else
+static char *
+rx_cache_malloc_or_get (cache, freelist, bytes)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+ int bytes;
+#endif
+{
+ if (!*freelist)
+ {
+ char * answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ return answer;
+ }
+
+ return rx_cache_get (cache, freelist);
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_get_superstate (struct rx_cache * cache)
+#else
+static char *
+rx_cache_get_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ char * answer;
+ int bytes = ( sizeof (struct rx_superstate)
+ + cache->local_cset_size * sizeof (struct rx_inx));
+ if (!cache->free_superstates
+ && (cache->superstates < cache->superstates_allowed))
+ {
+ answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ {
+ ++cache->superstates;
+ return answer;
+ }
+ }
+ answer = rx_cache_get (cache, &cache->free_superstates);
+ if (!answer)
+ {
+ answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ ++cache->superstates_allowed;
+ }
+ ++cache->superstates;
+ return answer;
+}
+
+
+
+#ifdef __STDC__
+static int
+supersetcmp (void * va, void * vb)
+#else
+static int
+supersetcmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_superset * a = (struct rx_superset *)va;
+ struct rx_superset * b = (struct rx_superset *)vb;
+ return ( (a == b)
+ || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
+}
+
+#ifdef __STDC__
+static struct rx_hash_item *
+superset_allocator (struct rx_hash_rules * rules, void * val)
+#else
+static struct rx_hash_item *
+superset_allocator (rules, val)
+ struct rx_hash_rules * rules;
+ void * val;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ ((char *)rules
+ - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
+ struct rx_superset * template = (struct rx_superset *)val;
+ struct rx_superset * newset
+ = ((struct rx_superset *)
+ rx_cache_malloc_or_get (cache,
+ &cache->free_supersets,
+ sizeof (*template)));
+ if (!newset)
+ return 0;
+ newset->refs = 0;
+ newset->car = template->car;
+ newset->id = template->car->id;
+ newset->cdr = template->cdr;
+ newset->superstate = 0;
+ rx_protect_superset (rx, template->cdr);
+ newset->hash_item.data = (void *)newset;
+ newset->hash_item.binding = 0;
+ return &newset->hash_item;
+}
+
+#ifdef __STDC__
+static struct rx_hash *
+super_hash_allocator (struct rx_hash_rules * rules)
+#else
+static struct rx_hash *
+super_hash_allocator (rules)
+ struct rx_hash_rules * rules;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ ((char *)rules
+ - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
+ return ((struct rx_hash *)
+ rx_cache_malloc_or_get (cache,
+ &cache->free_hash, sizeof (struct rx_hash)));
+}
+
+
+#ifdef __STDC__
+static void
+super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
+#else
+static void
+super_hash_liberator (hash, rules)
+ struct rx_hash * hash;
+ struct rx_hash_rules * rules;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
+ rx_cache_free (cache, &cache->free_hash, (char *)hash);
+}
+
+#ifdef __STDC__
+static void
+superset_hash_item_liberator (struct rx_hash_item * it,
+ struct rx_hash_rules * rules)
+#else
+static void
+superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
+ struct rx_hash_item * it;
+ struct rx_hash_rules * rules;
+#endif
+{
+}
+
+int rx_cache_bound = 128;
+static int rx_default_cache_got = 0;
+
+#ifdef __STDC__
+static int
+bytes_for_cache_size (int supers, int cset_size)
+#else
+static int
+bytes_for_cache_size (supers, cset_size)
+ int supers;
+ int cset_size;
+#endif
+{
+ /* What the hell is this? !!!*/
+ return (int)
+ ((float)supers *
+ ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
+ + sizeof (struct rx_super_edge)))
+ + (1.80 * (float) sizeof (struct rx_possible_future))
+ + (float) ( sizeof (struct rx_superstate)
+ + cset_size * sizeof (struct rx_inx))));
+}
+
+#ifdef __STDC__
+static void
+rx_morecore (struct rx_cache * cache)
+#else
+static void
+rx_morecore (cache)
+ struct rx_cache * cache;
+#endif
+{
+ if (rx_default_cache_got >= rx_cache_bound)
+ return;
+
+ rx_default_cache_got += 16;
+ cache->superstates_allowed = rx_cache_bound;
+ {
+ struct rx_blocklist ** pos = &cache->memory;
+ int size = bytes_for_cache_size (16, cache->local_cset_size);
+ while (*pos)
+ pos = &(*pos)->next;
+ *pos = ((struct rx_blocklist *)
+ malloc (size + sizeof (struct rx_blocklist)));
+ if (!*pos)
+ return;
+
+ (*pos)->next = 0;
+ (*pos)->bytes = size;
+ cache->memory_pos = *pos;
+ cache->memory_addr = (char *)*pos + sizeof (**pos);
+ cache->bytes_left = size;
+ }
+}
+
+static struct rx_cache default_cache =
+{
+ {
+ supersetcmp,
+ super_hash_allocator,
+ super_hash_liberator,
+ superset_allocator,
+ superset_hash_item_liberator,
+ },
+ 0,
+ 0,
+ 0,
+ 0,
+ rx_morecore,
+
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+
+ 0,
+ 0,
+
+ 0,
+
+ 0,
+ 0,
+ 0,
+ 0,
+ 128,
+
+ 256,
+ rx_id_instruction_table,
+
+ {
+ 0,
+ 0,
+ {0},
+ {0},
+ {0}
+ }
+};
+
+/* This adds an element to a superstate set. These sets are lists, such
+ * that lists with == elements are ==. The empty set is returned by
+ * superset_cons (rx, 0, 0) and is NOT equivelent to
+ * (struct rx_superset)0.
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_superset *
+rx_superset_cons (struct rx * rx,
+ struct rx_nfa_state *car, struct rx_superset *cdr)
+#else
+RX_DECL struct rx_superset *
+rx_superset_cons (rx, car, cdr)
+ struct rx * rx;
+ struct rx_nfa_state *car;
+ struct rx_superset *cdr;
+#endif
+{
+ struct rx_cache * cache = rx->cache;
+ if (!car && !cdr)
+ {
+ if (!cache->empty_superset)
+ {
+ cache->empty_superset
+ = ((struct rx_superset *)
+ rx_cache_malloc_or_get (cache, &cache->free_supersets,
+ sizeof (struct rx_superset)));
+ if (!cache->empty_superset)
+ return 0;
+ bzero (cache->empty_superset, sizeof (struct rx_superset));
+ cache->empty_superset->refs = 1000;
+ }
+ return cache->empty_superset;
+ }
+ {
+ struct rx_superset template;
+ struct rx_hash_item * hit;
+ template.car = car;
+ template.cdr = cdr;
+ template.id = car->id;
+ hit = rx_hash_store (&cache->superset_table,
+ (unsigned long)car ^ car->id ^ (unsigned long)cdr,
+ (void *)&template,
+ &cache->superset_hash_rules);
+ return (hit
+ ? (struct rx_superset *)hit->data
+ : 0);
+ }
+}
+
+/* This computes a union of two NFA state sets. The sets do not have the
+ * same representation though. One is a RX_SUPERSET structure (part
+ * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_superset *
+rx_superstate_eclosure_union
+ (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
+#else
+RX_DECL struct rx_superset *
+rx_superstate_eclosure_union (rx, set, ecl)
+ struct rx * rx;
+ struct rx_superset *set;
+ struct rx_nfa_state_set *ecl;
+#endif
+{
+ if (!ecl)
+ return set;
+
+ if (!set->car)
+ return rx_superset_cons (rx, ecl->car,
+ rx_superstate_eclosure_union (rx, set, ecl->cdr));
+ if (set->car == ecl->car)
+ return rx_superstate_eclosure_union (rx, set, ecl->cdr);
+
+ {
+ struct rx_superset * tail;
+ struct rx_nfa_state * first;
+
+ if (set->car > ecl->car)
+ {
+ tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
+ first = set->car;
+ }
+ else
+ {
+ tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
+ first = ecl->car;
+ }
+ if (!tail)
+ return 0;
+ else
+ {
+ struct rx_superset * answer;
+ answer = rx_superset_cons (rx, first, tail);
+ if (!answer)
+ {
+ rx_protect_superset (rx, tail);
+ rx_release_superset (rx, tail);
+ return 0;
+ }
+ else
+ return answer;
+ }
+ }
+}
+
+
+
+
+/*
+ * This makes sure that a list of rx_distinct_futures contains
+ * a future for each possible set of side effects in the eclosure
+ * of a given state. This is some of the work of filling in a
+ * superstate transition.
+ */
+
+#ifdef __STDC__
+static struct rx_distinct_future *
+include_futures (struct rx *rx,
+ struct rx_distinct_future *df, struct rx_nfa_state
+ *state, struct rx_superstate *superstate)
+#else
+static struct rx_distinct_future *
+include_futures (rx, df, state, superstate)
+ struct rx *rx;
+ struct rx_distinct_future *df;
+ struct rx_nfa_state *state;
+ struct rx_superstate *superstate;
+#endif
+{
+ struct rx_possible_future *future;
+ struct rx_cache * cache = rx->cache;
+ for (future = state->futures; future; future = future->next)
+ {
+ struct rx_distinct_future *dfp;
+ struct rx_distinct_future *insert_before = 0;
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
+ if (dfp->effects == future->effects)
+ break;
+ else
+ {
+ int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
+ if (order > 0)
+ {
+ insert_before = dfp;
+ dfp = 0;
+ break;
+ }
+ }
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = df;
+ if (!dfp)
+ {
+ dfp
+ = ((struct rx_distinct_future *)
+ rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
+ sizeof (struct rx_distinct_future)));
+ if (!dfp)
+ return 0;
+ if (!df)
+ {
+ df = insert_before = dfp;
+ df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
+ }
+ else if (!insert_before)
+ insert_before = df;
+ else if (insert_before == df)
+ df = dfp;
+
+ dfp->next_same_super_edge[0] = insert_before;
+ dfp->next_same_super_edge[1]
+ = insert_before->next_same_super_edge[1];
+ dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
+ dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
+ dfp->next_same_dest = dfp->prev_same_dest = dfp;
+ dfp->future = 0;
+ dfp->present = superstate;
+ dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
+ dfp->future_frame.data = 0;
+ dfp->future_frame.data_2 = (void *) dfp;
+ dfp->side_effects_frame.inx
+ = rx->instruction_table[rx_do_side_effects];
+ dfp->side_effects_frame.data = 0;
+ dfp->side_effects_frame.data_2 = (void *) dfp;
+ dfp->effects = future->effects;
+ }
+ }
+ return df;
+}
+
+
+
+/* This constructs a new superstate from its state set. The only
+ * complexity here is memory management.
+ */
+#ifdef __STDC__
+RX_DECL struct rx_superstate *
+rx_superstate (struct rx *rx,
+ struct rx_superset *set)
+#else
+RX_DECL struct rx_superstate *
+rx_superstate (rx, set)
+ struct rx *rx;
+ struct rx_superset *set;
+#endif
+{
+ struct rx_cache * cache = rx->cache;
+ struct rx_superstate * superstate = 0;
+
+ /* Does the superstate already exist in the cache? */
+ if (set->superstate)
+ {
+ if (set->superstate->rx_id != rx->rx_id)
+ {
+ /* Aha. It is in the cache, but belongs to a superstate
+ * that refers to an NFA that no longer exists.
+ * (We know it no longer exists because it was evidently
+ * stored in the same region of memory as the current nfa
+ * yet it has a different id.)
+ */
+ superstate = set->superstate;
+ if (!superstate->is_semifree)
+ {
+ if (cache->lru_superstate == superstate)
+ {
+ cache->lru_superstate = superstate->next_recyclable;
+ if (cache->lru_superstate == superstate)
+ cache->lru_superstate = 0;
+ }
+ {
+ superstate->next_recyclable->prev_recyclable
+ = superstate->prev_recyclable;
+ superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable;
+ if (!cache->semifree_superstate)
+ {
+ (cache->semifree_superstate
+ = superstate->next_recyclable
+ = superstate->prev_recyclable
+ = superstate);
+ }
+ else
+ {
+ superstate->next_recyclable = cache->semifree_superstate;
+ superstate->prev_recyclable
+ = cache->semifree_superstate->prev_recyclable;
+ superstate->next_recyclable->prev_recyclable
+ = superstate;
+ superstate->prev_recyclable->next_recyclable
+ = superstate;
+ cache->semifree_superstate = superstate;
+ }
+ ++cache->semifree_superstates;
+ }
+ }
+ set->superstate = 0;
+ goto handle_cache_miss;
+ }
+ ++cache->hits;
+ superstate = set->superstate;
+
+ rx_refresh_this_superstate (cache, superstate);
+ return superstate;
+ }
+
+ handle_cache_miss:
+
+ /* This point reached only for cache misses. */
+ ++cache->misses;
+#if RX_DEBUG
+ if (rx_debug_trace > 1)
+ {
+ struct rx_superset * setp = set;
+ fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "(%d)\n", set);
+ }
+#endif
+ superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
+ if (!superstate)
+ return 0;
+
+ if (!cache->lru_superstate)
+ (cache->lru_superstate
+ = superstate->next_recyclable
+ = superstate->prev_recyclable
+ = superstate);
+ else
+ {
+ superstate->next_recyclable = cache->lru_superstate;
+ superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ ( superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable->prev_recyclable
+ = superstate);
+ }
+ superstate->rx_id = rx->rx_id;
+ superstate->transition_refs = 0;
+ superstate->locks = 0;
+ superstate->is_semifree = 0;
+ set->superstate = superstate;
+ superstate->contents = set;
+ rx_protect_superset (rx, set);
+ superstate->edges = 0;
+ {
+ int x;
+ /* None of the transitions from this superstate are known yet. */
+ for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
+ {
+ struct rx_inx * ifr = &superstate->transitions[x];
+ ifr->inx = rx->instruction_table [rx_cache_miss];
+ ifr->data = ifr->data_2 = 0;
+ }
+ }
+ return superstate;
+}
+
+
+/* This computes the destination set of one edge of the superstate NFA.
+ * Note that a RX_DISTINCT_FUTURE is a superstate edge.
+ * Returns 0 on an allocation failure.
+ */
+
+#ifdef __STDC__
+static int
+solve_destination (struct rx *rx, struct rx_distinct_future *df)
+#else
+static int
+solve_destination (rx, df)
+ struct rx *rx;
+ struct rx_distinct_future *df;
+#endif
+{
+ struct rx_super_edge *tc = df->edge;
+ struct rx_superset *nfa_state;
+ struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
+ struct rx_superset *solution = nil_set;
+ struct rx_superstate *dest;
+
+ rx_protect_superset (rx, solution);
+ /* Iterate over all NFA states in the state set of this superstate. */
+ for (nfa_state = df->present->contents;
+ nfa_state->car;
+ nfa_state = nfa_state->cdr)
+ {
+ struct rx_nfa_edge *e;
+ /* Iterate over all edges of each NFA state. */
+ for (e = nfa_state->car->edges; e; e = e->next)
+ /* If we find an edge that is labeled with
+ * the characters we are solving for.....
+ */
+ if (rx_bitset_is_subset (rx->local_cset_size,
+ tc->cset, e->params.cset))
+ {
+ struct rx_nfa_state *n = e->dest;
+ struct rx_possible_future *pf;
+ /* ....search the partial epsilon closures of the destination
+ * of that edge for a path that involves the same set of
+ * side effects we are solving for.
+ * If we find such a RX_POSSIBLE_FUTURE, we add members to the
+ * stateset we are computing.
+ */
+ for (pf = n->futures; pf; pf = pf->next)
+ if (pf->effects == df->effects)
+ {
+ struct rx_superset * old_sol;
+ old_sol = solution;
+ solution = rx_superstate_eclosure_union (rx, solution,
+ pf->destset);
+ if (!solution)
+ return 0;
+ rx_protect_superset (rx, solution);
+ rx_release_superset (rx, old_sol);
+ }
+ }
+ }
+ /* It is possible that the RX_DISTINCT_FUTURE we are working on has
+ * the empty set of NFA states as its definition. In that case, this
+ * is a failure point.
+ */
+ if (solution == nil_set)
+ {
+ df->future_frame.inx = (void *) rx_backtrack;
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = 0;
+ return 1;
+ }
+ dest = rx_superstate (rx, solution);
+ rx_release_superset (rx, solution);
+ if (!dest)
+ return 0;
+
+ {
+ struct rx_distinct_future *dft;
+ dft = df;
+ df->prev_same_dest->next_same_dest = 0;
+ while (dft)
+ {
+ dft->future = dest;
+ dft->future_frame.inx = rx->instruction_table[rx_next_char];
+ dft->future_frame.data = (void *) dest->transitions;
+ dft = dft->next_same_dest;
+ }
+ df->prev_same_dest->next_same_dest = df;
+ }
+ if (!dest->transition_refs)
+ dest->transition_refs = df;
+ else
+ {
+ struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
+ dest->transition_refs->next_same_dest = df->next_same_dest;
+ df->next_same_dest->prev_same_dest = dest->transition_refs;
+ df->next_same_dest = dft;
+ dft->prev_same_dest = df;
+ }
+ return 1;
+}
+
+
+/* This takes a superstate and a character, and computes some edges
+ * from the superstate NFA. In particular, this computes all edges
+ * that lead from SUPERSTATE given CHR. This function also
+ * computes the set of characters that share this edge set.
+ * This returns 0 on allocation error.
+ * The character set and list of edges are returned through
+ * the paramters CSETOUT and DFOUT.
+} */
+
+#ifdef __STDC__
+static int
+compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
+ rx_Bitset csetout, struct rx_superstate *superstate,
+ unsigned char chr)
+#else
+static int
+compute_super_edge (rx, dfout, csetout, superstate, chr)
+ struct rx *rx;
+ struct rx_distinct_future **dfout;
+ rx_Bitset csetout;
+ struct rx_superstate *superstate;
+ unsigned char chr;
+#endif
+{
+ struct rx_superset *stateset = superstate->contents;
+
+ /* To compute the set of characters that share edges with CHR,
+ * we start with the full character set, and subtract.
+ */
+ rx_bitset_universe (rx->local_cset_size, csetout);
+ *dfout = 0;
+
+ /* Iterate over the NFA states in the superstate state-set. */
+ while (stateset->car)
+ {
+ struct rx_nfa_edge *e;
+ for (e = stateset->car->edges; e; e = e->next)
+ if (RX_bitset_member (e->params.cset, chr))
+ {
+ /* If we find an NFA edge that applies, we make sure there
+ * are corresponding edges in the superstate NFA.
+ */
+ {
+ struct rx_distinct_future * saved;
+ saved = *dfout;
+ *dfout = include_futures (rx, *dfout, e->dest, superstate);
+ if (!*dfout)
+ {
+ struct rx_distinct_future * df;
+ df = saved;
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (df)
+ {
+ struct rx_distinct_future *dft;
+ dft = df;
+ df = df->next_same_super_edge[0];
+
+ if (dft->future && dft->future->transition_refs == dft)
+ {
+ dft->future->transition_refs = dft->next_same_dest;
+ if (dft->future->transition_refs == dft)
+ dft->future->transition_refs = 0;
+ }
+ dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
+ dft->prev_same_dest->next_same_dest = dft->next_same_dest;
+ rx_cache_free (rx->cache,
+ &rx->cache->free_discernable_futures,
+ (char *)dft);
+ }
+ return 0;
+ }
+ }
+ /* We also trim the character set a bit. */
+ rx_bitset_intersection (rx->local_cset_size,
+ csetout, e->params.cset);
+ }
+ else
+ /* An edge that doesn't apply at least tells us some characters
+ * that don't share the same edge set as CHR.
+ */
+ rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
+ stateset = stateset->cdr;
+ }
+ return 1;
+}
+
+
+/* This is a constructor for RX_SUPER_EDGE structures. These are
+ * wrappers for lists of superstate NFA edges that share character sets labels.
+ * If a transition class contains more than one rx_distinct_future (superstate
+ * edge), then it represents a non-determinism in the superstate NFA.
+ */
+
+#ifdef __STDC__
+static struct rx_super_edge *
+rx_super_edge (struct rx *rx,
+ struct rx_superstate *super, rx_Bitset cset,
+ struct rx_distinct_future *df)
+#else
+static struct rx_super_edge *
+rx_super_edge (rx, super, cset, df)
+ struct rx *rx;
+ struct rx_superstate *super;
+ rx_Bitset cset;
+ struct rx_distinct_future *df;
+#endif
+{
+ struct rx_super_edge *tc =
+ (struct rx_super_edge *)rx_cache_malloc_or_get
+ (rx->cache, &rx->cache->free_transition_classes,
+ sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
+
+ if (!tc)
+ return 0;
+ tc->next = super->edges;
+ super->edges = tc;
+ tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
+ tc->rx_backtrack_frame.data = 0;
+ tc->rx_backtrack_frame.data_2 = (void *) tc;
+ tc->options = df;
+ tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
+ rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
+ if (df)
+ {
+ struct rx_distinct_future * dfp = df;
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (dfp)
+ {
+ dfp->edge = tc;
+ dfp = dfp->next_same_super_edge[0];
+ }
+ df->next_same_super_edge[1]->next_same_super_edge[0] = df;
+ }
+ return tc;
+}
+
+
+/* There are three kinds of cache miss. The first occurs when a
+ * transition is taken that has never been computed during the
+ * lifetime of the source superstate. That cache miss is handled by
+ * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
+ * occurs when the destination superstate of a transition doesn't
+ * exist. SOLVE_DESTINATION is used to construct the destination superstate.
+ * Finally, the third kind of cache miss occurs when the destination
+ * superstate of a transition is in a `semi-free state'. That case is
+ * handled by UNFREE_SUPERSTATE.
+ *
+ * The function of HANDLE_CACHE_MISS is to figure out which of these
+ * cases applies.
+ */
+
+#ifdef __STDC__
+static void
+install_partial_transition (struct rx_superstate *super,
+ struct rx_inx *answer,
+ RX_subset set, int offset)
+#else
+static void
+install_partial_transition (super, answer, set, offset)
+ struct rx_superstate *super;
+ struct rx_inx *answer;
+ RX_subset set;
+ int offset;
+#endif
+{
+ int start = offset;
+ int end = start + 32;
+ RX_subset pos = 1;
+ struct rx_inx * transitions = super->transitions;
+
+ while (start < end)
+ {
+ if (set & pos)
+ transitions[start] = *answer;
+ pos <<= 1;
+ ++start;
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rx_inx *
+rx_handle_cache_miss
+ (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
+#else
+RX_DECL struct rx_inx *
+rx_handle_cache_miss (rx, super, chr, data)
+ struct rx *rx;
+ struct rx_superstate *super;
+ unsigned char chr;
+ void *data;
+#endif
+{
+ int offset = chr / RX_subset_bits;
+ struct rx_distinct_future *df = data;
+
+ if (!df) /* must be the shared_cache_miss_frame */
+ {
+ /* Perhaps this is just a transition waiting to be filled. */
+ struct rx_super_edge *tc;
+ RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
+
+ for (tc = super->edges; tc; tc = tc->next)
+ if (tc->cset[offset] & mask)
+ {
+ struct rx_inx * answer;
+ df = tc->options;
+ answer = ((tc->options->next_same_super_edge[0] != tc->options)
+ ? &tc->rx_backtrack_frame
+ : (df->effects
+ ? &df->side_effects_frame
+ : &df->future_frame));
+ install_partial_transition (super, answer,
+ tc->cset [offset], offset * 32);
+ return answer;
+ }
+ /* Otherwise, it's a flushed or newly encountered edge. */
+ {
+ char cset_space[1024]; /* this limit is far from unreasonable */
+ rx_Bitset trcset;
+ struct rx_inx *answer;
+
+ if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
+ return 0; /* If the arbitrary limit is hit, always fail */
+ /* cleanly. */
+ trcset = (rx_Bitset)cset_space;
+ rx_lock_superstate (rx, super);
+ if (!compute_super_edge (rx, &df, trcset, super, chr))
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ if (!df) /* We just computed the fail transition. */
+ {
+ static struct rx_inx
+ shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
+ answer = &shared_fail_frame;
+ }
+ else
+ {
+ tc = rx_super_edge (rx, super, trcset, df);
+ if (!tc)
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ answer = ((tc->options->next_same_super_edge[0] != tc->options)
+ ? &tc->rx_backtrack_frame
+ : (df->effects
+ ? &df->side_effects_frame
+ : &df->future_frame));
+ }
+ install_partial_transition (super, answer,
+ trcset[offset], offset * 32);
+ rx_unlock_superstate (rx, super);
+ return answer;
+ }
+ }
+ else if (df->future) /* A cache miss on an edge with a future? Must be
+ * a semi-free destination. */
+ {
+ if (df->future->is_semifree)
+ refresh_semifree_superstate (rx->cache, df->future);
+ return &df->future_frame;
+ }
+ else
+ /* no future superstate on an existing edge */
+ {
+ rx_lock_superstate (rx, super);
+ if (!solve_destination (rx, df))
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0] == df->edge->options))
+ install_partial_transition (super, &df->future_frame,
+ df->edge->cset[offset], offset * 32);
+ rx_unlock_superstate (rx, super);
+ return &df->future_frame;
+ }
+}
+
+
+
+
+/* The rest of the code provides a regex.c compatable interface. */
+
+
+__const__ char *re_error_msg[] =
+{
+ 0, /* REG_NOUT */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+};
+
+
+
+/*
+ * Macros used while compiling patterns.
+ *
+ * By convention, PEND points just past the end of the uncompiled pattern,
+ * P points to the read position in the pattern. `translate' is the name
+ * of the translation table (`TRANSLATE' is the name of a macro that looks
+ * things up in `translate').
+ */
+
+
+/*
+ * Fetch the next character in the uncompiled pattern---translating it
+ * if necessary. *Also cast from a signed character in the constant
+ * string passed to us by the user to an unsigned char that we can use
+ * as an array index (in, e.g., `translate').
+ */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ c = translate[c]; \
+ } while (0)
+
+/*
+ * Fetch the next character in the uncompiled pattern, with no
+ * translation.
+ */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+#define TRANSLATE(d) translate[(unsigned char) (d)]
+
+typedef unsigned regnum_t;
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
+ */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ struct rexp_node ** top_expression; /* was begalt */
+ struct rexp_node ** last_expression; /* was laststart */
+ pattern_offset_t inner_group_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / CHARBITS] \
+ |= 1 << (((unsigned char) c) % CHARBITS))
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (!strcmp (string, "alpha") || !strcmp (string, "upper") \
+ || !strcmp (string, "lower") || !strcmp (string, "digit") \
+ || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
+ || !strcmp (string, "space") || !strcmp (string, "print") \
+ || !strcmp (string, "punct") || !strcmp (string, "graph") \
+ || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
+
+
+/* These predicates are used in regex_compile. */
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ * after an alternative or a begin-subexpression. We assume there is at
+ * least one character before the ^.
+ */
+
+#ifdef __STDC__
+static boolean
+at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
+#else
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ __const__ char *pattern;
+ __const__ char * p;
+ reg_syntax_t syntax;
+#endif
+{
+ __const__ char *prev = p - 2;
+ boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
+
+ return
+
+ (/* After a subexpression? */
+ ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
+ ||
+ /* After an alternative? */
+ ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
+ );
+}
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ * at least one character after the $, i.e., `P < PEND'.
+ */
+
+#ifdef __STDC__
+static boolean
+at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
+#else
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ __const__ char *p;
+ __const__ char *pend;
+ int syntax;
+#endif
+{
+ __const__ char *next = p;
+ boolean next_backslash = (*next == '\\');
+ __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
+
+ return
+ (
+ /* Before a subexpression? */
+ ((syntax & RE_NO_BK_PARENS)
+ ? (*next == ')')
+ : (next_backslash && next_next && (*next_next == ')')))
+ ||
+ /* Before an alternative? */
+ ((syntax & RE_NO_BK_VBAR)
+ ? (*next == '|')
+ : (next_backslash && next_next && (*next_next == '|')))
+ );
+}
+
+
+unsigned char rx_id_translation[256] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
+
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
+ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
+ 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
+ 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
+ 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
+
+ 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
+ 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
+ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
+ 250, 251, 252, 253, 254, 255
+};
+
+/* The compiler keeps an inverted translation table.
+ * This looks up/inititalize elements.
+ * VALID is an array of booleans that validate CACHE.
+ */
+
+#ifdef __STDC__
+static rx_Bitset
+inverse_translation (struct re_pattern_buffer * rxb,
+ char * valid, rx_Bitset cache,
+ unsigned char * translate, int c)
+#else
+static rx_Bitset
+inverse_translation (rxb, valid, cache, translate, c)
+ struct re_pattern_buffer * rxb;
+ char * valid;
+ rx_Bitset cache;
+ unsigned char * translate;
+ int c;
+#endif
+{
+ rx_Bitset cs
+ = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
+
+ if (!valid[c])
+ {
+ int x;
+ int c_tr = TRANSLATE(c);
+ rx_bitset_null (rxb->rx.local_cset_size, cs);
+ for (x = 0; x < 256; ++x) /* &&&& 13.37 */
+ if (TRANSLATE(x) == c_tr)
+ RX_bitset_enjoin (cs, x);
+ valid[c] = 1;
+ }
+ return cs;
+}
+
+
+
+
+/* More subroutine declarations and macros for regex_compile. */
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+#ifdef __STDC__
+static boolean
+group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
+#else
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+#endif
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/*
+ * Read the ending character of a range (in a bracket expression) from the
+ * uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ * Then we set the translation of all bits between the starting and
+ * ending characters (inclusive) in the compiled pattern B.
+ *
+ * Return an error code.
+ *
+ * We use these short variable names so we can use the same macros as
+ * `regex_compile' itself.
+ */
+
+#ifdef __STDC__
+static reg_errcode_t
+compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
+ __const__ char ** p_ptr, __const__ char * pend,
+ unsigned char * translate, reg_syntax_t syntax,
+ rx_Bitset inv_tr, char * valid_inv_tr)
+#else
+static reg_errcode_t
+compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
+ struct re_pattern_buffer * rxb;
+ rx_Bitset cs;
+ __const__ char ** p_ptr;
+ __const__ char * pend;
+ unsigned char * translate;
+ reg_syntax_t syntax;
+ rx_Bitset inv_tr;
+ char * valid_inv_tr;
+#endif
+{
+ unsigned this_char;
+
+ __const__ char *p = *p_ptr;
+
+ unsigned char range_end;
+ unsigned char range_start = TRANSLATE(p[-2]);
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ PATFETCH (range_end);
+
+ (*p_ptr)++;
+
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ rx_Bitset it =
+ inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+
+ return REG_NOERROR;
+}
+
+
+/* This searches a regexp for backreference side effects.
+ * It fills in the array OUT with 1 at the index of every register pair
+ * referenced by a backreference.
+ *
+ * This is used to help optimize patterns for searching. The information is
+ * useful because, if the caller doesn't want register values, backreferenced
+ * registers are the only registers for which we need rx_backtrack.
+ */
+
+#ifdef __STDC__
+static void
+find_backrefs (char * out, struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static void
+find_backrefs (out, rexp, params)
+ char * out;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ if (rexp)
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return;
+ case r_alternate:
+ case r_concat:
+ case r_opt:
+ case r_star:
+ case r_2phase_star:
+ find_backrefs (out, rexp->params.pair.left, params);
+ find_backrefs (out, rexp->params.pair.right, params);
+ return;
+ case r_side_effect:
+ if ( ((long)rexp->params.side_effect >= 0)
+ && (params [(long)rexp->params.side_effect].se == re_se_backref))
+ out[ params [(long)rexp->params.side_effect].op1] = 1;
+ return;
+ }
+}
+
+
+
+/* Returns 0 unless the pattern can match the empty string. */
+
+#ifdef __STDC__
+static int
+compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
+#else
+static int
+compute_fastset (rxb, rexp)
+ struct re_pattern_buffer * rxb;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 1;
+ switch (rexp->type)
+ {
+ case r_data:
+ return 1;
+ case r_cset:
+ {
+ rx_bitset_union (rxb->rx.local_cset_size,
+ rxb->fastset, rexp->params.cset);
+ }
+ return 0;
+ case r_concat:
+ return (compute_fastset (rxb, rexp->params.pair.left)
+ && compute_fastset (rxb, rexp->params.pair.right));
+ case r_2phase_star:
+ compute_fastset (rxb, rexp->params.pair.left);
+ /* compute_fastset (rxb, rexp->params.pair.right); nope... */
+ return 1;
+ case r_alternate:
+ return !!(compute_fastset (rxb, rexp->params.pair.left)
+ + compute_fastset (rxb, rexp->params.pair.right));
+ case r_opt:
+ case r_star:
+ compute_fastset (rxb, rexp->params.pair.left);
+ return 1;
+ case r_side_effect:
+ return 1;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* returns
+ * 1 -- yes, definately anchored by the given side effect.
+ * 2 -- maybe anchored, maybe the empty string.
+ * 0 -- definately not anchored
+ * There is simply no other possibility.
+ */
+
+#ifdef __STDC__
+static int
+is_anchored (struct rexp_node * rexp, rx_side_effect se)
+#else
+static int
+is_anchored (rexp, se)
+ struct rexp_node * rexp;
+ rx_side_effect se;
+#endif
+{
+ if (!rexp)
+ return 2;
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+ case r_concat:
+ case r_2phase_star:
+ {
+ int l = is_anchored (rexp->params.pair.left, se);
+ return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
+ }
+ case r_alternate:
+ {
+ int l = is_anchored (rexp->params.pair.left, se);
+ int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
+
+ if (l == r)
+ return l;
+ else if ((l == 0) || (r == 0))
+ return 0;
+ else
+ return 2;
+ }
+ case r_opt:
+ case r_star:
+ return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
+
+ case r_side_effect:
+ return ((rexp->params.side_effect == se)
+ ? 1 : 2);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* This removes register assignments that aren't required by backreferencing.
+ * This can speed up explore_future, especially if it eliminates
+ * non-determinism in the superstate NFA.
+ *
+ * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
+ * The non-zero elements of the array indicate which register assignments
+ * can NOT be removed from the expression.
+ */
+
+#ifdef __STDC__
+static struct rexp_node *
+remove_unecessary_side_effects (struct rx * rx, char * needed,
+ struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static struct rexp_node *
+remove_unecessary_side_effects (rx, needed, rexp, params)
+ struct rx * rx;
+ char * needed;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ struct rexp_node * l;
+ struct rexp_node * r;
+ if (!rexp)
+ return 0;
+ else
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return rexp;
+ case r_alternate:
+ case r_concat:
+ case r_2phase_star:
+ l = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.left, params);
+ r = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.right, params);
+ if ((l && r) || (rexp->type != r_concat))
+ {
+ rexp->params.pair.left = l;
+ rexp->params.pair.right = r;
+ return rexp;
+ }
+ else
+ {
+ rexp->params.pair.left = rexp->params.pair.right = 0;
+ rx_free_rexp (rx, rexp);
+ return l ? l : r;
+ }
+ case r_opt:
+ case r_star:
+ l = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.left, params);
+ if (l)
+ {
+ rexp->params.pair.left = l;
+ return rexp;
+ }
+ else
+ {
+ rexp->params.pair.left = 0;
+ rx_free_rexp (rx, rexp);
+ return 0;
+ }
+ case r_side_effect:
+ {
+ int se = (long)rexp->params.side_effect;
+ if ( (se >= 0)
+ && ( ((enum re_side_effects)params[se].se == re_se_lparen)
+ || ((enum re_side_effects)params[se].se == re_se_rparen))
+ && (params [se].op1 > 0)
+ && (!needed [params [se].op1]))
+ {
+ rx_free_rexp (rx, rexp);
+ return 0;
+ }
+ else
+ return rexp;
+ }
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+#ifdef __STDC__
+static int
+pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
+#else
+static int
+pointless_if_repeated (node, params)
+ struct rexp_node * node;
+ struct re_se_params * params;
+#endif
+{
+ if (!node)
+ return 1;
+ switch (node->type)
+ {
+ case r_cset:
+ return 0;
+ case r_alternate:
+ case r_concat:
+ case r_2phase_star:
+ return (pointless_if_repeated (node->params.pair.left, params)
+ && pointless_if_repeated (node->params.pair.right, params));
+ case r_opt:
+ case r_star:
+ return pointless_if_repeated (node->params.pair.left, params);
+ case r_side_effect:
+ switch (((long)node->params.side_effect < 0)
+ ? (enum re_side_effects)node->params.side_effect
+ : (enum re_side_effects)params[(long)node->params.side_effect].se)
+ {
+ case re_se_try:
+ case re_se_at_dot:
+ case re_se_begbuf:
+ case re_se_hat:
+ case re_se_wordbeg:
+ case re_se_wordbound:
+ case re_se_notwordbound:
+ case re_se_wordend:
+ case re_se_endbuf:
+ case re_se_dollar:
+ case re_se_fail:
+ case re_se_win:
+ return 1;
+ case re_se_lparen:
+ case re_se_rparen:
+ case re_se_iter:
+ case re_se_end_iter:
+ case re_se_syntax:
+ case re_se_not_syntax:
+ case re_se_backref:
+ return 0;
+ }
+ case r_data:
+ default:
+ return 0;
+ }
+}
+
+
+
+#ifdef __STDC__
+static int
+registers_on_stack (struct re_pattern_buffer * rxb,
+ struct rexp_node * rexp, int in_danger,
+ struct re_se_params * params)
+#else
+static int
+registers_on_stack (rxb, rexp, in_danger, params)
+ struct re_pattern_buffer * rxb;
+ struct rexp_node * rexp;
+ int in_danger;
+ struct re_se_params * params;
+#endif
+{
+ if (!rexp)
+ return 0;
+ else
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+ case r_alternate:
+ case r_concat:
+ return ( registers_on_stack (rxb, rexp->params.pair.left,
+ in_danger, params)
+ || (registers_on_stack
+ (rxb, rexp->params.pair.right,
+ in_danger, params)));
+ case r_opt:
+ return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
+ case r_star:
+ return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
+ case r_2phase_star:
+ return
+ ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
+ || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
+ case r_side_effect:
+ {
+ int se = (long)rexp->params.side_effect;
+ if ( in_danger
+ && (se >= 0)
+ && (params [se].op1 > 0)
+ && ( ((enum re_side_effects)params[se].se == re_se_lparen)
+ || ((enum re_side_effects)params[se].se == re_se_rparen)))
+ return 1;
+ else
+ return 0;
+ }
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+static char idempotent_complex_se[] =
+{
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE)
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
+#include "rx.h"
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ 23
+};
+
+static char idempotent_se[] =
+{
+ 13,
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
+#include "rx.h"
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ 42
+};
+
+
+
+
+#ifdef __STDC__
+static int
+has_any_se (struct rx * rx,
+ struct rexp_node * rexp)
+#else
+static int
+has_any_se (rx, rexp)
+ struct rx * rx;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+
+ case r_side_effect:
+ return 1;
+
+ case r_2phase_star:
+ case r_concat:
+ case r_alternate:
+ return
+ ( has_any_se (rx, rexp->params.pair.left)
+ || has_any_se (rx, rexp->params.pair.right));
+
+ case r_opt:
+ case r_star:
+ return has_any_se (rx, rexp->params.pair.left);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+/* This must be called AFTER `convert_hard_loops' for a given REXP. */
+#ifdef __STDC__
+static int
+has_non_idempotent_epsilon_path (struct rx * rx,
+ struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static int
+has_non_idempotent_epsilon_path (rx, rexp, params)
+ struct rx * rx;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ case r_star:
+ return 0;
+
+ case r_side_effect:
+ return
+ !((long)rexp->params.side_effect > 0
+ ? idempotent_complex_se [ params [(long)rexp->params.side_effect].se ]
+ : idempotent_se [-(long)rexp->params.side_effect]);
+
+ case r_alternate:
+ return
+ ( has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params)
+ || has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.right, params));
+
+ case r_2phase_star:
+ case r_concat:
+ return
+ ( has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params)
+ && has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.right, params));
+
+ case r_opt:
+ return has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+/* This computes rougly what it's name suggests. It can (and does) go wrong
+ * in the direction of returning spurious 0 without causing disasters.
+ */
+#ifdef __STDC__
+static int
+begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
+#else
+static int
+begins_with_complex_se (rx, rexp)
+ struct rx * rx;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+
+ case r_side_effect:
+ return ((long)rexp->params.side_effect >= 0);
+
+ case r_alternate:
+ return
+ ( begins_with_complex_se (rx, rexp->params.pair.left)
+ && begins_with_complex_se (rx, rexp->params.pair.right));
+
+
+ case r_concat:
+ return has_any_se (rx, rexp->params.pair.left);
+ case r_opt:
+ case r_star:
+ case r_2phase_star:
+ return 0;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* This destructively removes some of the re_se_tv side effects from
+ * a rexp tree. In particular, during parsing re_se_tv was inserted on the
+ * right half of every | to guarantee that posix path preference could be
+ * honored. This function removes some which it can be determined aren't
+ * needed.
+ */
+
+#ifdef __STDC__
+static void
+speed_up_alt (struct rx * rx,
+ struct rexp_node * rexp,
+ int unposix)
+#else
+static void
+speed_up_alt (rx, rexp, unposix)
+ struct rx * rx;
+ struct rexp_node * rexp;
+ int unposix;
+#endif
+{
+ if (!rexp)
+ return;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ case r_side_effect:
+ return;
+
+ case r_opt:
+ case r_star:
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ return;
+
+ case r_2phase_star:
+ case r_concat:
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ speed_up_alt (rx, rexp->params.pair.right, unposix);
+ return;
+
+ case r_alternate:
+ /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
+
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
+
+ if ( unposix
+ || (begins_with_complex_se
+ (rx, rexp->params.pair.right->params.pair.right))
+ || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
+ || has_any_se (rx, rexp->params.pair.left)))
+ {
+ struct rexp_node * conc = rexp->params.pair.right;
+ rexp->params.pair.right = conc->params.pair.right;
+ conc->params.pair.right = 0;
+ rx_free_rexp (rx, conc);
+ }
+ }
+}
+
+
+
+
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is set to zero;
+ `re_nsub' is set to the number of groups in PATTERN;
+ `not_bol' and `not_eol' are set to zero.
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+
+
+#ifdef __STDC__
+RX_DECL reg_errcode_t
+rx_compile (__const__ char *pattern, int size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer * rxb)
+#else
+RX_DECL reg_errcode_t
+rx_compile (pattern, size, syntax, rxb)
+ __const__ char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ RX_subset
+ inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
+ char
+ validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
+
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ __const__ char *p1;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ __const__ char *p = pattern;
+ __const__ char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ unsigned char *translate = (rxb->translate
+ ? rxb->translate
+ : rx_id_translation);
+
+ /* When parsing is done, this will hold the expression tree. */
+ struct rexp_node * rexp = 0;
+
+ /* In the midst of compilation, this holds onto the regexp
+ * first parst while rexp goes on to aquire additional constructs.
+ */
+ struct rexp_node * orig_rexp = 0;
+ struct rexp_node * fewer_side_effects = 0;
+
+ /* This and top_expression are saved on the compile stack. */
+ struct rexp_node ** top_expression = &rexp;
+ struct rexp_node ** last_expression = top_expression;
+
+ /* Parameter to `goto append_node' */
+ struct rexp_node * append;
+
+ /* Counts open-groups as they are encountered. This is the index of the
+ * innermost group being compiled.
+ */
+ regnum_t regnum = 0;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ * which to go back if the interval is invalid.
+ */
+ __const__ char *beg_interval;
+
+ struct re_se_params * params = 0;
+ int paramc = 0; /* How many complex side effects so far? */
+
+ rx_side_effect side; /* param to `goto add_side_effect' */
+
+ bzero (validate_inv_tr, sizeof (validate_inv_tr));
+
+ rxb->rx.instruction_table = rx_id_instruction_table;
+
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
+ if (compile_stack.stack == 0)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ rxb->rx.cache = &default_cache;
+ rxb->syntax = syntax;
+ rxb->fastmap_accurate = 0;
+ rxb->not_bol = rxb->not_eol = 0;
+ rxb->least_subs = 0;
+
+ /* Always count groups, whether or not rxb->no_sub is set.
+ * The whole pattern is implicitly group 0, so counting begins
+ * with 1.
+ */
+ rxb->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ {
+ struct rexp_node * n
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
+ if (!n)
+ return REG_ESPACE;
+ append = n;
+ goto append_node;
+ }
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ {
+ struct rexp_node * n
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
+ if (!n)
+ return REG_ESPACE;
+ append = n;
+ goto append_node;
+ }
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (pointless_if_repeated (*last_expression, params))
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!last_expression)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ * and also whether or not two or more matches is allowed.
+ */
+
+ {
+ struct rexp_node * inner_exp = *last_expression;
+ int need_sync = 0;
+
+ if (many_times_ok
+ && has_non_idempotent_epsilon_path (&rxb->rx,
+ inner_exp, params))
+ {
+ struct rexp_node * pusher
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushpos);
+ struct rexp_node * checker
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_chkpos);
+ struct rexp_node * pushback
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushback);
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * lit_t = rx_mk_r_cset (&rxb->rx, cs);
+ struct rexp_node * fake_state
+ = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
+ struct rexp_node * phase2
+ = rx_mk_r_concat (&rxb->rx, checker, fake_state);
+ struct rexp_node * popper
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_poppos);
+ struct rexp_node * star
+ = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
+ struct rexp_node * a
+ = rx_mk_r_concat (&rxb->rx, pusher, star);
+ struct rexp_node * whole_thing
+ = rx_mk_r_concat (&rxb->rx, a, popper);
+ if (!(pusher && star && pushback && lit_t && fake_state
+ && lit_t && phase2 && checker && popper
+ && a && whole_thing))
+ return REG_ESPACE;
+ RX_bitset_enjoin (cs, 't');
+ *last_expression = whole_thing;
+ }
+ else
+ {
+ struct rexp_node * star =
+ (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
+ (&rxb->rx, *last_expression);
+ if (!star)
+ return REG_ESPACE;
+ *last_expression = star;
+ need_sync = has_any_se (&rxb->rx, *last_expression);
+ }
+ if (!zero_times_ok)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, inner_exp,
+ rx_copy_rexp (&rxb->rx,
+ *last_expression));
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ }
+ if (need_sync)
+ {
+ int sync_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params [sync_se].se = re_se_tv;
+ side = (rx_side_effect)sync_se;
+ goto add_side_effect;
+ }
+ }
+ /* The old regex.c used to optimize `.*\n'.
+ * Maybe rx should too?
+ */
+ }
+ break;
+
+
+ case '.':
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
+ if (!(cs && n))
+ return REG_ESPACE;
+
+ rx_bitset_universe (rxb->rx.local_cset_size, cs);
+ if (!(rxb->syntax & RE_DOT_NEWLINE))
+ RX_bitset_remove (cs, '\n');
+ if (!(rxb->syntax & RE_DOT_NOT_NULL))
+ RX_bitset_remove (cs, 0);
+
+ append = n;
+ goto append_node;
+ break;
+ }
+
+
+ case '[':
+ if (p == pend) return REG_EBRACK;
+ {
+ boolean had_char_class = false;
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
+ int is_inverted = *p == '^';
+
+ if (!(node && cs))
+ return REG_ESPACE;
+
+ /* This branch of the switch is normally exited with
+ *`goto append_node'
+ */
+ append = node;
+
+ if (is_inverted)
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ {
+ rx_Bitset it = inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ c1);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ goto finalize_class_and_append;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (rxb, cs, &p, pend, translate, syntax,
+ inverse_translate, validate_inv_tr);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (rxb, cs, &p, pend, translate, syntax,
+ inverse_translate, validate_inv_tr);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if ((syntax & RE_CHAR_CLASSES)
+ && (c == '[') && (*p == ':'))
+ {
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = !strcmp (str, "alnum");
+ boolean is_alpha = !strcmp (str, "alpha");
+ boolean is_blank = !strcmp (str, "blank");
+ boolean is_cntrl = !strcmp (str, "cntrl");
+ boolean is_digit = !strcmp (str, "digit");
+ boolean is_graph = !strcmp (str, "graph");
+ boolean is_lower = !strcmp (str, "lower");
+ boolean is_print = !strcmp (str, "print");
+ boolean is_punct = !strcmp (str, "punct");
+ boolean is_space = !strcmp (str, "space");
+ boolean is_upper = !strcmp (str, "upper");
+ boolean is_xdigit = !strcmp (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << CHARBITS; ch++)
+ {
+ if ( (is_alnum && isalnum (ch))
+ || (is_alpha && isalpha (ch))
+ || (is_blank && isblank (ch))
+ || (is_cntrl && iscntrl (ch))
+ || (is_digit && isdigit (ch))
+ || (is_graph && isgraph (ch))
+ || (is_lower && islower (ch))
+ || (is_print && isprint (ch))
+ || (is_punct && ispunct (ch))
+ || (is_space && isspace (ch))
+ || (is_upper && isupper (ch))
+ || (is_xdigit && isxdigit (ch)))
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ ch);
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ '[');
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ ':');
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ {
+ rx_Bitset it = inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ c);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+ }
+ }
+
+ finalize_class_and_append:
+ if (is_inverted)
+ {
+ rx_bitset_complement (rxb->rx.local_cset_size, cs);
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ RX_bitset_remove (cs, '\n');
+ }
+ goto append_node;
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ rxb->re_nsub++;
+ regnum++;
+ if (COMPILE_STACK_FULL)
+ {
+ ((compile_stack.stack) =
+ (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
+ compile_stack_elt_t)));
+ if (compile_stack.stack == 0) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ if (*last_expression)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ last_expression = &concat->params.pair.right;
+ }
+
+ /*
+ * These are the values to restore when we hit end of this
+ * group.
+ */
+ COMPILE_STACK_TOP.top_expression = top_expression;
+ COMPILE_STACK_TOP.last_expression = last_expression;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ compile_stack.avail++;
+
+ top_expression = last_expression;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ handle_close:
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+ struct rexp_node ** inner = top_expression;
+
+ compile_stack.avail--;
+ top_expression = COMPILE_STACK_TOP.top_expression;
+ last_expression = COMPILE_STACK_TOP.last_expression;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ {
+ int left_se = paramc;
+ int right_se = paramc + 1;
+
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ (paramc + 2) * sizeof (params[0])))
+ : ((struct re_se_params *)
+ malloc (2 * sizeof (params[0]))));
+ if (!params)
+ return REG_ESPACE;
+ paramc += 2;
+
+ params[left_se].se = re_se_lparen;
+ params[left_se].op1 = this_group_regnum;
+ params[right_se].se = re_se_rparen;
+ params[right_se].op1 = this_group_regnum;
+ {
+ struct rexp_node * left
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)left_se);
+ struct rexp_node * right
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)right_se);
+ struct rexp_node * c1
+ = (*inner
+ ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
+ struct rexp_node * c2
+ = rx_mk_r_concat (&rxb->rx, c1, right);
+ if (!(left && right && c1 && c2))
+ return REG_ESPACE;
+ *inner = c2;
+ }
+ }
+ break;
+ }
+
+ case '|': /* `\|'. */
+ if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ {
+ struct rexp_node * alt
+ = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
+ if (!alt)
+ return REG_ESPACE;
+ *top_expression = alt;
+ last_expression = &alt->params.pair.right;
+ {
+ int sync_se = paramc;
+
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ (paramc + 1) * sizeof (params[0])))
+ : ((struct re_se_params *)
+ malloc (sizeof (params[0]))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+
+ params[sync_se].se = re_se_tv;
+ {
+ struct rexp_node * sync
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)sync_se);
+ struct rexp_node * conc
+ = rx_mk_r_concat (&rxb->rx, sync, 0);
+
+ if (!sync || !conc)
+ return REG_ESPACE;
+
+ *last_expression = conc;
+ last_expression = &conc->params.pair.right;
+ }
+ }
+ }
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (pointless_if_repeated (*last_expression, params))
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto unfetch_interval;
+ /* was: else laststart = b; */
+ }
+
+ /* If the upper bound is zero, don't want to iterate
+ * at all.
+ */
+ if (upper_bound == 0)
+ {
+ if (*last_expression)
+ {
+ rx_free_rexp (&rxb->rx, *last_expression);
+ *last_expression = 0;
+ }
+ }
+ else
+ /* Otherwise, we have a nontrivial interval. */
+ {
+ int iter_se = paramc;
+ int end_se = paramc + 1;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (2 + paramc)))
+ : ((struct re_se_params *)
+ malloc (2 * sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ paramc += 2;
+ params [iter_se].se = re_se_iter;
+ params [iter_se].op1 = lower_bound;
+ params[iter_se].op2 = upper_bound;
+
+ params[end_se].se = re_se_end_iter;
+ params[end_se].op1 = lower_bound;
+ params[end_se].op2 = upper_bound;
+ {
+ struct rexp_node * push0
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_push0);
+ struct rexp_node * start_one_iter
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)iter_se);
+ struct rexp_node * phase1
+ = rx_mk_r_concat (&rxb->rx, start_one_iter,
+ *last_expression);
+ struct rexp_node * pushback
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushback);
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * lit_t
+ = rx_mk_r_cset (&rxb->rx, cs);
+ struct rexp_node * phase2
+ = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
+ struct rexp_node * loop
+ = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
+ struct rexp_node * push_n_loop
+ = rx_mk_r_concat (&rxb->rx, push0, loop);
+ struct rexp_node * final_test
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)end_se);
+ struct rexp_node * full_exp
+ = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
+
+ if (!(push0 && start_one_iter && phase1
+ && pushback && lit_t && phase2
+ && loop && push_n_loop && final_test && full_exp))
+ return REG_ESPACE;
+
+ RX_bitset_enjoin(cs, 't');
+
+ *last_expression = full_exp;
+ }
+ }
+ beg_interval = 0;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ p = beg_interval;
+ beg_interval = 0;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ side = (rx_side_effect)rx_se_at_dot;
+ goto add_side_effect;
+ break;
+
+ case 's':
+ case 'S':
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
+ if (!(cs && set))
+ return REG_ESPACE;
+ if (c == 'S')
+ rx_bitset_universe (rxb->rx.local_cset_size, cs);
+
+ PATFETCH (c);
+ {
+ int x;
+ enum syntaxcode code = syntax_spec_code [c];
+ for (x = 0; x < 256; ++x)
+ {
+
+ if (SYNTAX (x) == code)
+ {
+ rx_Bitset it =
+ inverse_translation (rxb, validate_inv_tr,
+ inverse_translate,
+ translate, x);
+ rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
+ }
+ }
+ }
+ append = set;
+ goto append_node;
+ }
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ case 'W':
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
+ if (!(cs && n))
+ return REG_ESPACE;
+ if (c == 'W')
+ rx_bitset_universe (rxb->rx.local_cset_size ,cs);
+ {
+ int x;
+ for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
+ if (SYNTAX(x) & Sword)
+ RX_bitset_toggle (cs, x);
+ }
+ append = n;
+ goto append_node;
+ }
+ break;
+
+/* With a little extra work, some of these side effects could be optimized
+ * away (basicly by looking at what we already know about the surrounding
+ * chars).
+ */
+ case '<':
+ side = (rx_side_effect)re_se_wordbeg;
+ goto add_side_effect;
+ break;
+
+ case '>':
+ side = (rx_side_effect)re_se_wordend;
+ goto add_side_effect;
+ break;
+
+ case 'b':
+ side = (rx_side_effect)re_se_wordbound;
+ goto add_side_effect;
+ break;
+
+ case 'B':
+ side = (rx_side_effect)re_se_notwordbound;
+ goto add_side_effect;
+ break;
+
+ case '`':
+ side = (rx_side_effect)re_se_begbuf;
+ goto add_side_effect;
+ break;
+
+ case '\'':
+ side = (rx_side_effect)re_se_endbuf;
+ goto add_side_effect;
+ break;
+
+ add_side_effect:
+ {
+ struct rexp_node * se
+ = rx_mk_r_side_effect (&rxb->rx, side);
+ if (!se)
+ return REG_ESPACE;
+ append = se;
+ goto append_node;
+ }
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ return REG_ESUBREG;
+
+ {
+ int backref_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params[backref_se].se = re_se_backref;
+ params[backref_se].op1 = c1;
+ side = (rx_side_effect)backref_se;
+ goto add_side_effect;
+ }
+ break;
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ {
+ rx_Bitset cs = rx_cset(&rxb->rx);
+ struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
+ rx_Bitset it;
+ if (!(cs && match))
+ return REG_ESPACE;
+ it = inverse_translation (rxb, validate_inv_tr,
+ inverse_translate, translate, c);
+ rx_bitset_union (CHAR_SET_SIZE, cs, it);
+ append = match;
+
+ append_node:
+ /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
+ * and then parses the next character normally.
+ */
+ if (*last_expression)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, *last_expression, append);
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ last_expression = &concat->params.pair.right;
+ }
+ else
+ *last_expression = append;
+ }
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ {
+ int win_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params[win_se].se = re_se_win;
+ {
+ struct rexp_node * se
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, rexp, se);
+ if (!(se && concat))
+ return REG_ESPACE;
+ rexp = concat;
+ }
+ }
+
+
+ /* Through the pattern now. */
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ orig_rexp = rexp;
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("\n\nCompiling ", stdout);
+ fwrite (pattern, 1, size, stdout);
+ fputs (":\n", stdout);
+ rxb->se_params = params;
+ print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
+ }
+#endif
+ {
+ rx_Bitset cs = rx_cset(&rxb->rx);
+ rx_Bitset cs2 = rx_cset(&rxb->rx);
+ char * se_map = (char *) alloca (paramc);
+ struct rexp_node * new_rexp = 0;
+
+
+ bzero (se_map, paramc);
+ find_backrefs (se_map, rexp, params);
+ fewer_side_effects =
+ remove_unecessary_side_effects (&rxb->rx, se_map,
+ rx_copy_rexp (&rxb->rx, rexp), params);
+
+ speed_up_alt (&rxb->rx, rexp, 0);
+ speed_up_alt (&rxb->rx, fewer_side_effects, 1);
+
+ {
+ char * syntax_parens = rxb->syntax_parens;
+ if (syntax_parens == (char *)0x1)
+ rexp = remove_unecessary_side_effects
+ (&rxb->rx, se_map, rexp, params);
+ else if (syntax_parens)
+ {
+ int x;
+ for (x = 0; x < paramc; ++x)
+ if (( (params[x].se == re_se_lparen)
+ || (params[x].se == re_se_rparen))
+ && (!syntax_parens [params[x].op1]))
+ se_map [x] = 1;
+ rexp = remove_unecessary_side_effects
+ (&rxb->rx, se_map, rexp, params);
+ }
+ }
+
+ /* At least one more optimization would be nice to have here but i ran out
+ * of time. The idea would be to delay side effects.
+ * For examle, `(abc)' is the same thing as `abc()' except that the
+ * left paren is offset by 3 (which we know at compile time).
+ * (In this comment, write that second pattern `abc(:3:)'
+ * where `(:3:' is a syntactic unit.)
+ *
+ * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
+ * (The paren nesting may be hard to follow -- that's an alternation
+ * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
+ * followed by the closing paren from the original expression.)
+ *
+ * Neither the expression tree representation nor the the nfa make
+ * this very easy to write. :(
+ */
+
+ /* What we compile is different than what the parser returns.
+ * Suppose the parser returns expression R.
+ * Let R' be R with unnecessary register assignments removed
+ * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
+ *
+ * What we will compile is the expression:
+ *
+ * m{try}R{win}\|s{try}R'{win}
+ *
+ * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
+ *
+ * When trying a match, we insert an `m' at the beginning of the
+ * string if the user wants registers to be filled, `s' if not.
+ */
+ new_rexp =
+ rx_mk_r_alternate
+ (&rxb->rx,
+ rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
+ rx_mk_r_concat (&rxb->rx,
+ rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
+
+ if (!(new_rexp && cs && cs2))
+ return REG_ESPACE;
+ RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
+ RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
+ rexp = new_rexp;
+ }
+
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ fputs ("\n...which is compiled as:\n", stdout);
+ print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
+ }
+#endif
+ {
+ struct rx_nfa_state *start = 0;
+ struct rx_nfa_state *end = 0;
+
+ if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
+ return REG_ESPACE; /* */
+ else
+ {
+ void * mem = (void *)rxb->buffer;
+ unsigned long size = rxb->allocated;
+ int start_id;
+ char * perm_mem;
+ int iterator_size = paramc * sizeof (params[0]);
+
+ end->is_final = 1;
+ start->is_start = 1;
+ rx_name_nfa_states (&rxb->rx);
+ start_id = start->id;
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ fputs ("...giving the NFA: \n", stdout);
+ dbug_rxb = rxb;
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ if (!rx_eclose_nfa (&rxb->rx))
+ return REG_ESPACE;
+ else
+ {
+ rx_delete_epsilon_transitions (&rxb->rx);
+
+ /* For compatability reasons, we need to shove the
+ * compiled nfa into one chunk of malloced memory.
+ */
+ rxb->rx.reserved = ( sizeof (params[0]) * paramc
+ + rx_sizeof_bitset (rxb->rx.local_cset_size));
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("...which cooks down (uncompactified) to: \n", stdout);
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
+ return REG_ESPACE;
+ rxb->buffer = mem;
+ rxb->allocated = size;
+ rxb->rx.buffer = mem;
+ rxb->rx.allocated = size;
+ perm_mem = ((char *)rxb->rx.buffer
+ + rxb->rx.allocated - rxb->rx.reserved);
+ rxb->se_params = ((struct re_se_params *)perm_mem);
+ bcopy (params, rxb->se_params, iterator_size);
+ perm_mem += iterator_size;
+ rxb->fastset = (rx_Bitset) perm_mem;
+ rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
+ }
+ rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
+ rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
+ rxb->match_regs_on_stack =
+ registers_on_stack (rxb, orig_rexp, 0, params);
+ rxb->search_regs_on_stack =
+ registers_on_stack (rxb, fewer_side_effects, 0, params);
+ if (rxb->can_match_empty)
+ rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
+ rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
+ rxb->begbuf_only = is_anchored (orig_rexp,
+ (rx_side_effect) re_se_begbuf);
+ }
+ rx_free_rexp (&rxb->rx, rexp);
+ if (params)
+ free (params);
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("...which cooks down to: \n", stdout);
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ }
+ return REG_NOERROR;
+}
+
+
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+__const__ char * rx_error_msg[] =
+{ 0, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+};
+
+
+
+
+char rx_slowmap [256] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+#ifdef __STDC__
+RX_DECL void
+rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
+#else
+RX_DECL void
+rx_blow_up_fastmap (rxb)
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ int x;
+ for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
+ rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
+ rxb->fastmap_accurate = 1;
+}
+
+
+
+
+#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
+#define RE_SEARCH_2_FN inner_re_search_2
+#define RE_S2_QUAL static
+#else
+#define RE_SEARCH_2_FN re_search_2
+#define RE_S2_QUAL
+#endif
+
+struct re_search_2_closure
+{
+ __const__ unsigned char * string1;
+ int size1;
+ __const__ unsigned char * string2;
+ int size2;
+};
+
+
+static __inline__ enum rx_get_burst_return
+re_search_2_get_burst (pos, vclosure, stop)
+ struct rx_string_position * pos;
+ void * vclosure;
+ int stop;
+{
+ struct re_search_2_closure * closure;
+ closure = (struct re_search_2_closure *)vclosure;
+ if (!closure->string2)
+ {
+ int inset;
+
+ inset = pos->pos - pos->string;
+ if ((inset < -1) || (inset > closure->size1))
+ return rx_get_burst_no_more;
+ else
+ {
+ pos->pos = (__const__ unsigned char *) closure->string1 + inset;
+ pos->string = (__const__ unsigned char *) closure->string1;
+ pos->size = closure->size1;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string1 + closure->size1,
+ closure->string1 + stop));
+ pos->offset = 0;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ }
+ else if (!closure->string1)
+ {
+ int inset;
+
+ inset = pos->pos - pos->string;
+ pos->pos = (__const__ unsigned char *) closure->string2 + inset;
+ pos->string = (__const__ unsigned char *) closure->string2;
+ pos->size = closure->size2;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string2 + closure->size2,
+ closure->string2 + stop));
+ pos->offset = 0;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ else
+ {
+ int inset;
+
+ inset = pos->pos - pos->string + pos->offset;
+ if (inset < closure->size1)
+ {
+ pos->pos = (__const__ unsigned char *) closure->string1 + inset;
+ pos->string = (__const__ unsigned char *) closure->string1;
+ pos->size = closure->size1;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string1 + closure->size1,
+ closure->string1 + stop));
+ pos->offset = 0;
+ return rx_get_burst_ok;
+ }
+ else
+ {
+ pos->pos = ((__const__ unsigned char *)
+ closure->string2 + inset - closure->size1);
+ pos->string = (__const__ unsigned char *) closure->string2;
+ pos->size = closure->size2;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string2 + closure->size2,
+ closure->string2 + stop - closure->size1));
+ pos->offset = closure->size1;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ }
+}
+
+
+static __inline__ enum rx_back_check_return
+re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
+ struct rx_string_position * pos;
+ int lparen;
+ int rparen;
+ unsigned char * translate;
+ void * vclosure;
+ int stop;
+{
+ struct rx_string_position there;
+ struct rx_string_position past;
+
+ there = *pos;
+ there.pos = there.string + lparen - there.offset;
+ re_search_2_get_burst (&there, vclosure, stop);
+
+ past = *pos;
+ past.pos = past.string + rparen - there.offset;
+ re_search_2_get_burst (&past, vclosure, stop);
+
+ ++pos->pos;
+ re_search_2_get_burst (pos, vclosure, stop);
+
+ while ( (there.pos != past.pos)
+ && (pos->pos != pos->end))
+ if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
+ return rx_back_check_fail;
+ else
+ {
+ ++there.pos;
+ ++pos->pos;
+ if (there.pos == there.end)
+ re_search_2_get_burst (&there, vclosure, stop);
+ if (pos->pos == pos->end)
+ re_search_2_get_burst (pos, vclosure, stop);
+ }
+
+ if (there.pos != past.pos)
+ return rx_back_check_fail;
+ --pos->pos;
+ re_search_2_get_burst (pos, vclosure, stop);
+ return rx_back_check_pass;
+}
+
+static __inline__ int
+re_search_2_fetch_char (pos, offset, app_closure, stop)
+ struct rx_string_position * pos;
+ int offset;
+ void * app_closure;
+ int stop;
+{
+ struct re_search_2_closure * closure;
+ closure = (struct re_search_2_closure *)app_closure;
+ if (offset == 0)
+ {
+ if (pos->pos >= pos->string)
+ return *pos->pos;
+ else
+ {
+ if ( (pos->string == closure->string2)
+ && (closure->string1)
+ && (closure->size1))
+ return closure->string1[closure->size1 - 1];
+ else
+ return 0; /* sure, why not. */
+ }
+ }
+ if (pos->pos == pos->end)
+ return *closure->string2;
+ else
+ return pos->pos[1];
+}
+
+
+#ifdef __STDC__
+RE_S2_QUAL int
+RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop)
+#else
+RE_S2_QUAL int
+RE_SEARCH_2_FN (rxb,
+ string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ int answer;
+ struct re_search_2_closure closure;
+ closure.string1 = (__const__ unsigned char *) string1;
+ closure.size1 = size1;
+ closure.string2 = (__const__ unsigned char *) string2;
+ closure.size2 = size2;
+ answer = rx_search (rxb, startpos, range, stop, size1 + size2,
+ re_search_2_get_burst,
+ re_search_2_back_check,
+ re_search_2_fetch_char,
+ (void *)&closure,
+ regs,
+ 0,
+ 0);
+ switch (answer)
+ {
+ case rx_search_continuation:
+ abort ();
+ case rx_search_error:
+ return -2;
+ case rx_search_soft_fail:
+ case rx_search_fail:
+ return -1;
+ default:
+ return answer;
+ }
+}
+
+/* Export rx_search to callers outside this file. */
+
+int
+re_rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char,
+ app_closure, regs, resume_state, save_state)
+ struct re_pattern_buffer * rxb;
+ int startpos;
+ int range;
+ int stop;
+ int total_size;
+ rx_get_burst_fn get_burst;
+ rx_back_check_fn back_check;
+ rx_fetch_char_fn fetch_char;
+ void * app_closure;
+ struct re_registers * regs;
+ struct rx_search_state * resume_state;
+ struct rx_search_state * save_state;
+{
+ return rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char, app_closure,
+ regs, resume_state, save_state);
+}
+
+#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
+#ifdef __STDC__
+int
+re_search_2 (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop)
+#else
+int
+re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ int ret;
+ ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
+ range, regs, stop);
+ alloca (0);
+ return ret;
+}
+#endif
+
+
+/* Like re_search_2, above, but only one string is specified, and
+ * doesn't let you say where to stop matching.
+ */
+
+#ifdef __STDC__
+int
+re_search (struct re_pattern_buffer * rxb, __const__ char *string,
+ int size, int startpos, int range,
+ struct re_registers *regs)
+#else
+int
+re_search (rxb, string, size, startpos, range, regs)
+ struct re_pattern_buffer * rxb;
+ __const__ char * string;
+ int size;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+#endif
+{
+ return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
+}
+
+#ifdef __STDC__
+int
+re_match_2 (struct re_pattern_buffer * rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int pos, struct re_registers *regs, int stop)
+#else
+int
+re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer * rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ struct re_registers some_regs;
+ regoff_t start;
+ regoff_t end;
+ int srch;
+ int save = rxb->regs_allocated;
+ struct re_registers * regs_to_pass = regs;
+
+ if (!regs)
+ {
+ some_regs.start = &start;
+ some_regs.end = &end;
+ some_regs.num_regs = 1;
+ regs_to_pass = &some_regs;
+ rxb->regs_allocated = REGS_FIXED;
+ }
+
+ srch = re_search_2 (rxb, string1, size1, string2, size2,
+ pos, 1, regs_to_pass, stop);
+ if (regs_to_pass != regs)
+ rxb->regs_allocated = save;
+ if (srch < 0)
+ return srch;
+ return regs_to_pass->end[0] - regs_to_pass->start[0];
+}
+
+/* re_match is like re_match_2 except it takes only a single string. */
+
+#ifdef __STDC__
+int
+re_match (struct re_pattern_buffer * rxb,
+ __const__ char * string,
+ int size, int pos,
+ struct re_registers *regs)
+#else
+int
+re_match (rxb, string, size, pos, regs)
+ struct re_pattern_buffer * rxb;
+ __const__ char *string;
+ int size;
+ int pos;
+ struct re_registers *regs;
+#endif
+{
+ return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
+}
+
+
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+#ifdef __STDC__
+reg_syntax_t
+re_set_syntax (reg_syntax_t syntax)
+#else
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+#endif
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+#ifdef __STDC__
+void
+re_set_registers (struct re_pattern_buffer *bufp,
+ struct re_registers *regs,
+ unsigned num_regs,
+ regoff_t * starts, regoff_t * ends)
+#else
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t * starts;
+ regoff_t * ends;
+#endif
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+
+
+
+
+#ifdef __STDC__
+static int
+cplx_se_sublist_len (struct rx_se_list * list)
+#else
+static int
+cplx_se_sublist_len (list)
+ struct rx_se_list * list;
+#endif
+{
+ int x = 0;
+ while (list)
+ {
+ if ((long)list->car >= 0)
+ ++x;
+ list = list->cdr;
+ }
+ return x;
+}
+
+
+/* For rx->se_list_cmp */
+
+#ifdef __STDC__
+static int
+posix_se_list_order (struct rx * rx,
+ struct rx_se_list * a, struct rx_se_list * b)
+#else
+static int
+posix_se_list_order (rx, a, b)
+ struct rx * rx;
+ struct rx_se_list * a;
+ struct rx_se_list * b;
+#endif
+{
+ int al = cplx_se_sublist_len (a);
+ int bl = cplx_se_sublist_len (b);
+
+ if (!al && !bl)
+ return ((a == b)
+ ? 0
+ : ((a < b) ? -1 : 1));
+
+ else if (!al)
+ return -1;
+
+ else if (!bl)
+ return 1;
+
+ else
+ {
+ rx_side_effect * av = ((rx_side_effect *)
+ alloca (sizeof (rx_side_effect) * (al + 1)));
+ rx_side_effect * bv = ((rx_side_effect *)
+ alloca (sizeof (rx_side_effect) * (bl + 1)));
+ struct rx_se_list * ap = a;
+ struct rx_se_list * bp = b;
+ int ai, bi;
+
+ for (ai = al - 1; ai >= 0; --ai)
+ {
+ while ((long)ap->car < 0)
+ ap = ap->cdr;
+ av[ai] = ap->car;
+ ap = ap->cdr;
+ }
+ av[al] = (rx_side_effect)-2;
+ for (bi = bl - 1; bi >= 0; --bi)
+ {
+ while ((long)bp->car < 0)
+ bp = bp->cdr;
+ bv[bi] = bp->car;
+ bp = bp->cdr;
+ }
+ bv[bl] = (rx_side_effect)-1;
+
+ {
+ int ret;
+ int x = 0;
+ while (av[x] == bv[x])
+ ++x;
+ ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
+ return ret;
+ }
+ }
+}
+
+
+
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in RXB.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in RXB on entry.
+
+ We call rx_compile to do the actual compilation. */
+
+#ifdef __STDC__
+__const__ char *
+re_compile_pattern (__const__ char *pattern,
+ int length,
+ struct re_pattern_buffer * rxb)
+#else
+__const__ char *
+re_compile_pattern (pattern, length, rxb)
+ __const__ char *pattern;
+ int length;
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ rxb->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ rxb->no_sub = 0;
+
+ rxb->rx.local_cset_size = 256;
+
+ /* Match anchors at newline. */
+ rxb->newline_anchor = 1;
+
+ rxb->re_nsub = 0;
+ rxb->start = 0;
+ rxb->se_params = 0;
+ rxb->rx.nodec = 0;
+ rxb->rx.epsnodec = 0;
+ rxb->rx.instruction_table = 0;
+ rxb->rx.nfa_states = 0;
+ rxb->rx.se_list_cmp = posix_se_list_order;
+ rxb->rx.start_set = 0;
+
+ ret = rx_compile (pattern, length, re_syntax_options, rxb);
+ alloca (0);
+ return rx_error_msg[(int) ret];
+}
+
+
+
+#ifdef __STDC__
+int
+re_compile_fastmap (struct re_pattern_buffer * rxb)
+#else
+int
+re_compile_fastmap (rxb)
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ rx_blow_up_fastmap (rxb);
+ return 0;
+}
+
+
+
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer rx_comp_buf;
+
+#ifdef __STDC__
+char *
+re_comp (__const__ char *s)
+#else
+char *
+re_comp (s)
+ __const__ char *s;
+#endif
+{
+ reg_errcode_t ret;
+
+ if (!s || (*s == '\0'))
+ {
+ if (!rx_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!rx_comp_buf.fastmap)
+ {
+ rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
+ if (!rx_comp_buf.fastmap)
+ return "Memory exhausted";
+ }
+
+ /* Since `rx_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ rx_comp_buf.newline_anchor = 1;
+
+ rx_comp_buf.fastmap_accurate = 0;
+ rx_comp_buf.re_nsub = 0;
+ rx_comp_buf.start = 0;
+ rx_comp_buf.se_params = 0;
+ rx_comp_buf.rx.nodec = 0;
+ rx_comp_buf.rx.epsnodec = 0;
+ rx_comp_buf.rx.instruction_table = 0;
+ rx_comp_buf.rx.nfa_states = 0;
+ rx_comp_buf.rx.start = 0;
+ rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
+ rx_comp_buf.rx.start_set = 0;
+ rx_comp_buf.rx.local_cset_size = 256;
+
+ ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
+ alloca (0);
+
+ /* Yes, we're discarding `__const__' here. */
+ return (char *) rx_error_msg[(int) ret];
+}
+
+
+#ifdef __STDC__
+int
+re_exec (__const__ char *s)
+#else
+int
+re_exec (s)
+ __const__ char *s;
+#endif
+{
+ __const__ int len = strlen (s);
+ return
+ 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#if !defined(emacs)
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+
+#ifdef __STDC__
+int
+regcomp (regex_t * preg, __const__ char * pattern, int cflags)
+#else
+int
+regcomp (preg, pattern, cflags)
+ regex_t * preg;
+ __const__ char * pattern;
+ int cflags;
+#endif
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->fastmap = malloc (256);
+ if (!preg->fastmap)
+ return REG_ESPACE;
+ preg->fastmap_accurate = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (unsigned char *) malloc (256);
+ if (!preg->translate)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = isupper (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ preg->re_nsub = 0;
+ preg->start = 0;
+ preg->se_params = 0;
+ preg->syntax_parens = 0;
+ preg->rx.nodec = 0;
+ preg->rx.epsnodec = 0;
+ preg->rx.instruction_table = 0;
+ preg->rx.nfa_states = 0;
+ preg->rx.local_cset_size = 256;
+ preg->rx.start = 0;
+ preg->rx.se_list_cmp = posix_se_list_order;
+ preg->rx.start_set = 0;
+ ret = rx_compile (pattern, strlen (pattern), syntax, preg);
+ alloca (0);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+#ifdef __STDC__
+int
+regexec (__const__ regex_t *preg, __const__ char *string,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags)
+#else
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ __const__ regex_t *preg;
+ __const__ char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+#endif
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ * information about, via `nmatch'. We have to pass that on to the
+ * matching routines.
+ */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
+ regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
+ if (regs.start == 0 || regs.end == 0)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg,
+ string, len,
+ /* start: */ 0,
+ /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. */
+
+#ifdef __STDC__
+size_t
+regerror (int errcode, __const__ regex_t *preg,
+ char *errbuf, size_t errbuf_size)
+#else
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ __const__ regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+#endif
+{
+ __const__ char *msg
+ = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
+ size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+#ifdef __STDC__
+void
+regfree (regex_t *preg)
+#else
+void
+regfree (preg)
+ regex_t *preg;
+#endif
+{
+ if (preg->buffer != 0)
+ free (preg->buffer);
+ preg->buffer = 0;
+ preg->allocated = 0;
+
+ if (preg->fastmap != 0)
+ free (preg->fastmap);
+ preg->fastmap = 0;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != 0)
+ free (preg->translate);
+ preg->translate = 0;
+}
+
+#endif /* not emacs */
+
+
+
+
+
diff --git a/lib/rx.h b/lib/rx.h
new file mode 100644
index 0000000..b85c92a
--- /dev/null
+++ b/lib/rx.h
@@ -0,0 +1,3732 @@
+#if !defined(RXH) || defined(RX_WANT_SE_DEFS)
+#define RXH
+
+/* Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+
+This file is part of the librx library.
+
+Librx is free software; you can redistribute it and/or modify it under
+the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+Librx is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this software; see the file COPYING.LIB. If not,
+write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
+02139, USA. */
+/* t. lord Wed Sep 23 18:20:57 1992 */
+
+
+
+
+
+
+
+
+#ifndef RX_WANT_SE_DEFS
+
+/* This page: Bitsets */
+
+#ifndef RX_subset
+typedef unsigned int RX_subset;
+#define RX_subset_bits (32)
+#define RX_subset_mask (RX_subset_bits - 1)
+#endif
+
+typedef RX_subset * rx_Bitset;
+
+#ifdef __STDC__
+typedef void (*rx_bitset_iterator) (void *, int member_index);
+#else
+typedef void (*rx_bitset_iterator) ();
+#endif
+
+#define rx_bitset_subset(N) ((N) / RX_subset_bits)
+#define rx_bitset_subset_val(B,N) ((B)[rx_bitset_subset(N)])
+#define RX_bitset_access(B,N,OP) \
+ ((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask])
+#define RX_bitset_member(B,N) RX_bitset_access(B, N, &)
+#define RX_bitset_enjoin(B,N) RX_bitset_access(B, N, |=)
+#define RX_bitset_remove(B,N) RX_bitset_access(B, N, &= ~)
+#define RX_bitset_toggle(B,N) RX_bitset_access(B, N, ^= )
+#define rx_bitset_numb_subsets(N) (((N) + RX_subset_bits - 1) / RX_subset_bits)
+#define rx_sizeof_bitset(N) (rx_bitset_numb_subsets(N) * sizeof(RX_subset))
+
+
+
+/* This page: Splay trees. */
+
+#ifdef __STDC__
+typedef int (*rx_sp_comparer) (void * a, void * b);
+#else
+typedef int (*rx_sp_comparer) ();
+#endif
+
+struct rx_sp_node
+{
+ void * key;
+ void * data;
+ struct rx_sp_node * kids[2];
+};
+
+#ifdef __STDC__
+typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *);
+#else
+typedef void (*rx_sp_key_data_freer) ();
+#endif
+
+
+/* giant inflatable hash trees */
+
+struct rx_hash_item
+{
+ struct rx_hash_item * next_same_hash;
+ struct rx_hash * table;
+ unsigned long hash;
+ void * data;
+ void * binding;
+};
+
+struct rx_hash
+{
+ struct rx_hash * parent;
+ int refs;
+ struct rx_hash * children[13];
+ struct rx_hash_item * buckets [13];
+ int bucket_size [13];
+};
+
+struct rx_hash_rules;
+
+#ifdef __STDC__
+/* should return like == */
+typedef int (*rx_hash_eq)(void *, void *);
+typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *);
+typedef void (*rx_free_hash)(struct rx_hash *,
+ struct rx_hash_rules *);
+typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *,
+ void *);
+typedef void (*rx_free_hash_item)(struct rx_hash_item *,
+ struct rx_hash_rules *);
+#else
+typedef int (*rx_hash_eq)();
+typedef struct rx_hash * (*rx_alloc_hash)();
+typedef void (*rx_free_hash)();
+typedef struct rx_hash_item * (*rx_alloc_hash_item)();
+typedef void (*rx_free_hash_item)();
+#endif
+
+struct rx_hash_rules
+{
+ rx_hash_eq eq;
+ rx_alloc_hash hash_alloc;
+ rx_free_hash free_hash;
+ rx_alloc_hash_item hash_item_alloc;
+ rx_free_hash_item free_hash_item;
+};
+
+
+/* Forward declarations */
+
+struct rx_cache;
+struct rx_superset;
+struct rx;
+struct rx_se_list;
+
+
+
+/*
+ * GLOSSARY
+ *
+ * regexp
+ * regular expression
+ * expression
+ * pattern - a `regular' expression. The expression
+ * need not be formally regular -- it can contain
+ * constructs that don't correspond to purely regular
+ * expressions.
+ *
+ * buffer
+ * string - the string (or strings) being searched or matched.
+ *
+ * pattern buffer - a structure of type `struct re_pattern_buffer'
+ * This in turn contains a `struct rx', which holds the
+ * NFA compiled from a pattern, as well as some of the state
+ * of a matcher using the pattern.
+ *
+ * NFA - nondeterministic finite automata. Some people
+ * use this term to a member of the class of
+ * regular automata (those corresponding to a regular
+ * language). However, in this code, the meaning is
+ * more general. The automata used by Rx are comperable
+ * in power to what are usually called `push down automata'.
+ *
+ * Two NFA are built by rx for every pattern. One is built
+ * by the compiler. The other is built from the first, on
+ * the fly, by the matcher. The latter is called the `superstate
+ * NFA' because its states correspond to sets of states from
+ * the first NFA. (Joe Keane gets credit for the name
+ * `superstate NFA').
+ *
+ * NFA edges
+ * epsilon edges
+ * side-effect edges - The NFA compiled from a pattern can have three
+ * kinds of edges. Epsilon edges can be taken freely anytime
+ * their source state is reached. Character set edges can be
+ * taken when their source state is reached and when the next
+ * character in the buffer is a member of the set. Side effect
+ * edges imply a transition that can only be taken after the
+ * indicated side effect has been successfully accomplished.
+ * Some examples of side effects are:
+ *
+ * Storing the current match position to record the
+ * location of a parentesized subexpression.
+ *
+ * Advancing the matcher over N characters if they
+ * match the N characters previously matched by a
+ * parentesized subexpression.
+ *
+ * Both of those kinds of edges occur in the NFA generated
+ * by the pattern: \(.\)\1
+ *
+ * Epsilon and side effect edges are similar. Unfortunately,
+ * some of the code uses the name `epsilon edge' to mean
+ * both epsilon and side effect edges. For example, the
+ * function has_non_idempotent_epsilon_path computes the existance
+ * of a non-trivial path containing only a mix of epsilon and
+ * side effect edges. In that case `nonidempotent epsilon' is being
+ * used to mean `side effect'.
+ */
+
+
+
+
+
+/* LOW LEVEL PATTERN BUFFERS */
+
+/* Suppose that from some NFA state, more than one path through
+ * side-effect edges is possible. In what order should the paths
+ * be tried? A function of type rx_se_list_order answers that
+ * question. It compares two lists of side effects, and says
+ * which list comes first.
+ */
+
+#ifdef __STDC__
+typedef int (*rx_se_list_order) (struct rx *,
+ struct rx_se_list *,
+ struct rx_se_list *);
+#else
+typedef int (*rx_se_list_order) ();
+#endif
+
+
+
+/* Struct RX holds a compiled regular expression - that is, an nfa
+ * ready to be converted on demand to a more efficient superstate nfa.
+ * This is for the low level interface. The high-level interfaces enclose
+ * this in a `struct re_pattern_buffer'.
+ */
+struct rx
+{
+ /* The compiler assigns a unique id to every pattern.
+ * Like sequence numbers in X, there is a subtle bug here
+ * if you use Rx in a system that runs for a long time.
+ * But, because of the way the caches work out, it is almost
+ * impossible to trigger the Rx version of this bug.
+ *
+ * The id is used to validate superstates found in a cache
+ * of superstates. It isn't sufficient to let a superstate
+ * point back to the rx for which it was compiled -- the caller
+ * may be re-using a `struct rx' in which case the superstate
+ * is not really valid. So instead, superstates are validated
+ * by checking the sequence number of the pattern for which
+ * they were built.
+ */
+ int rx_id;
+
+ /* This is memory mgt. state for superstates. This may be
+ * shared by more than one struct rx.
+ */
+ struct rx_cache * cache;
+
+ /* Every regex defines the size of its own character set.
+ * A superstate has an array of this size, with each element
+ * a `struct rx_inx'. So, don't make this number too large.
+ * In particular, don't make it 2^16.
+ */
+ int local_cset_size;
+
+ /* After the NFA is built, it is copied into a contiguous region
+ * of memory (mostly for compatability with GNU regex).
+ * Here is that region, and it's size:
+ */
+ void * buffer;
+ unsigned long allocated;
+
+ /* Clients of RX can ask for some extra storage in the space pointed
+ * to by BUFFER. The field RESERVED is an input parameter to the
+ * compiler. After compilation, this much space will be available
+ * at (buffer + allocated - reserved)
+ */
+ unsigned long reserved;
+
+ /* --------- The remaining fields are for internal use only. --------- */
+ /* --------- But! they must be initialized to 0. --------- */
+
+ /* NODEC is the number of nodes in the NFA with non-epsilon
+ * transitions.
+ */
+ int nodec;
+
+ /* EPSNODEC is the number of nodes with only epsilon transitions. */
+ int epsnodec;
+
+ /* The sum (NODEC + EPSNODEC) is the total number of states in the
+ * compiled NFA.
+ */
+
+ /* Lists of side effects as stored in the NFA are `hash consed'..meaning
+ * that lists with the same elements are ==. During compilation,
+ * this table facilitates hash-consing.
+ */
+ struct rx_hash se_list_memo;
+
+ /* Lists of NFA states are also hashed.
+ */
+ struct rx_hash set_list_memo;
+
+
+
+
+ /* The compiler and matcher must build a number of instruction frames.
+ * The format of these frames is fixed (c.f. struct rx_inx). The values
+ * of the instructions is not fixed.
+ *
+ * An enumerated type (enum rx_opcode) defines the set of instructions
+ * that the compiler or matcher might generate. When filling an instruction
+ * frame, the INX field is found by indexing this instruction table
+ * with an opcode:
+ */
+ void ** instruction_table;
+
+ /* The list of all states in an NFA.
+ * During compilation, the NEXT field of NFA states links this list.
+ * After compilation, all the states are compacted into an array,
+ * ordered by state id numbers. At that time, this points to the base
+ * of that array.
+ */
+ struct rx_nfa_state *nfa_states;
+
+ /* Every nfa begins with one distinguished starting state:
+ */
+ struct rx_nfa_state *start;
+
+ /* This orders the search through super-nfa paths.
+ * See the comment near the typedef of rx_se_list_order.
+ */
+ rx_se_list_order se_list_cmp;
+
+ struct rx_superset * start_set;
+};
+
+
+
+
+/* SYNTAX TREES */
+
+/* Compilation is in stages.
+ *
+ * In the first stage, a pattern specified by a string is
+ * translated into a syntax tree. Later stages will convert
+ * the syntax tree into an NFA optimized for conversion to a
+ * superstate-NFA.
+ *
+ * This page is about syntax trees.
+ */
+
+enum rexp_node_type
+{
+ r_cset, /* Match from a character set. `a' or `[a-z]'*/
+ r_concat, /* Concat two subexpressions. `ab' */
+ r_alternate, /* Choose one of two subexpressions. `a\|b' */
+ r_opt, /* Optional subexpression. `a?' */
+ r_star, /* Repeated subexpression. `a*' */
+
+
+ /* A 2phase-star is a variation on a repeated subexpression.
+ * In this case, there are two subexpressions. The first, if matched,
+ * begins a repitition (otherwise, the whole expression is matches the
+ * empth string).
+ *
+ * After matching the first subexpression, a 2phase star either finishes,
+ * or matches the second subexpression. If the second subexpression is
+ * matched, then the whole construct repeats.
+ *
+ * 2phase stars are used in two circumstances. First, they
+ * are used as part of the implementation of POSIX intervals (counted
+ * repititions). Second, they are used to implement proper star
+ * semantics when the repeated subexpression contains paths of
+ * only side effects. See rx_compile for more information.
+ */
+ r_2phase_star,
+
+
+ /* c.f. "typedef void * rx_side_effect" */
+ r_side_effect,
+
+ /* This is an extension type: It is for transient use in source->source
+ * transformations (implemented over syntax trees).
+ */
+ r_data
+};
+
+/* A side effect is a matcher-specific action associated with
+ * transitions in the NFA. The details of side effects are up
+ * to the matcher. To the compiler and superstate constructors
+ * side effects are opaque:
+ */
+
+typedef void * rx_side_effect;
+
+/* Nodes in a syntax tree are of this type:
+ */
+struct rexp_node
+{
+ enum rexp_node_type type;
+ union
+ {
+ rx_Bitset cset;
+ rx_side_effect side_effect;
+ struct
+ {
+ struct rexp_node *left;
+ struct rexp_node *right;
+ } pair;
+ void * data;
+ } params;
+};
+
+
+
+/* NFA
+ *
+ * A syntax tree is compiled into an NFA. This page defines the structure
+ * of that NFA.
+ */
+
+struct rx_nfa_state
+{
+ /* These are kept in a list as the NFA is being built. */
+ struct rx_nfa_state *next;
+
+ /* After the NFA is built, states are given integer id's.
+ * States whose outgoing transitions are all either epsilon or
+ * side effect edges are given ids less than 0. Other states
+ * are given successive non-negative ids starting from 0.
+ */
+ int id;
+
+ /* The list of NFA edges that go from this state to some other. */
+ struct rx_nfa_edge *edges;
+
+ /* If you land in this state, then you implicitly land
+ * in all other states reachable by only epsilon translations.
+ * Call the set of maximal paths to such states the epsilon closure
+ * of this state.
+ *
+ * There may be other states that are reachable by a mixture of
+ * epsilon and side effect edges. Consider the set of maximal paths
+ * of that sort from this state. Call it the epsilon-side-effect
+ * closure of the state.
+ *
+ * The epsilon closure of the state is a subset of the epsilon-side-
+ * effect closure. It consists of all the paths that contain
+ * no side effects -- only epsilon edges.
+ *
+ * The paths in the epsilon-side-effect closure can be partitioned
+ * into equivalance sets. Two paths are equivalant if they have the
+ * same set of side effects, in the same order. The epsilon-closure
+ * is one of these equivalance sets. Let's call these equivalance
+ * sets: observably equivalant path sets. That name is chosen
+ * because equivalance of two paths means they cause the same side
+ * effects -- so they lead to the same subsequent observations other
+ * than that they may wind up in different target states.
+ *
+ * The superstate nfa, which is derived from this nfa, is based on
+ * the observation that all of the paths in an observably equivalant
+ * path set can be explored at the same time, provided that the
+ * matcher keeps track not of a single nfa state, but of a set of
+ * states. In particular, after following all the paths in an
+ * observably equivalant set, you wind up at a set of target states.
+ * That set of target states corresponds to one state in the
+ * superstate NFA.
+ *
+ * Staticly, before matching begins, it is convenient to analyze the
+ * nfa. Each state is labeled with a list of the observably
+ * equivalant path sets who's union covers all the
+ * epsilon-side-effect paths beginning in this state. This list is
+ * called the possible futures of the state.
+ *
+ * A trivial example is this NFA:
+ * s1
+ * A ---> B
+ *
+ * s2
+ * ---> C
+ *
+ * epsilon s1
+ * ---------> D ------> E
+ *
+ *
+ * In this example, A has two possible futures.
+ * One invokes the side effect `s1' and contains two paths,
+ * one ending in state B, the other in state E.
+ * The other invokes the side effect `s2' and contains only
+ * one path, landing in state C.
+ */
+ struct rx_possible_future *futures;
+
+
+ /* There are exactly two distinguished states in every NFA: */
+ unsigned int is_final:1;
+ unsigned int is_start:1;
+
+ /* These are used during NFA construction... */
+ unsigned int eclosure_needed:1;
+ unsigned int mark:1;
+};
+
+
+/* An edge in an NFA is typed: */
+enum rx_nfa_etype
+{
+ /* A cset edge is labled with a set of characters one of which
+ * must be matched for the edge to be taken.
+ */
+ ne_cset,
+
+ /* An epsilon edge is taken whenever its starting state is
+ * reached.
+ */
+ ne_epsilon,
+
+ /* A side effect edge is taken whenever its starting state is
+ * reached. Side effects may cause the match to fail or the
+ * position of the matcher to advance.
+ */
+ ne_side_effect /* A special kind of epsilon. */
+};
+
+struct rx_nfa_edge
+{
+ struct rx_nfa_edge *next;
+ enum rx_nfa_etype type;
+ struct rx_nfa_state *dest;
+ union
+ {
+ rx_Bitset cset;
+ rx_side_effect side_effect;
+ } params;
+};
+
+
+
+/* A possible future consists of a list of side effects
+ * and a set of destination states. Below are their
+ * representations. These structures are hash-consed which
+ * means that lists with the same elements share a representation
+ * (their addresses are ==).
+ */
+
+struct rx_nfa_state_set
+{
+ struct rx_nfa_state * car;
+ struct rx_nfa_state_set * cdr;
+};
+
+struct rx_se_list
+{
+ rx_side_effect car;
+ struct rx_se_list * cdr;
+};
+
+struct rx_possible_future
+{
+ struct rx_possible_future *next;
+ struct rx_se_list * effects;
+ struct rx_nfa_state_set * destset;
+};
+
+
+
+/* This begins the description of the superstate NFA.
+ *
+ * The superstate NFA corresponds to the NFA in these ways:
+ *
+ * Every superstate NFA states SUPER correspond to sets of NFA states,
+ * nfa_states(SUPER).
+ *
+ * Superstate edges correspond to NFA paths.
+ *
+ * The superstate has no epsilon transitions;
+ * every edge has a character label, and a (possibly empty) side
+ * effect label. The side effect label corresponds to a list of
+ * side effects that occur in the NFA. These parts are referred
+ * to as: superedge_character(EDGE) and superedge_sides(EDGE).
+ *
+ * For a superstate edge EDGE starting in some superstate SUPER,
+ * the following is true (in pseudo-notation :-):
+ *
+ * exists DEST in nfa_states s.t.
+ * exists nfaEDGE in nfa_edges s.t.
+ * origin (nfaEDGE) == DEST
+ * && origin (nfaEDGE) is a member of nfa_states(SUPER)
+ * && exists PF in possible_futures(dest(nfaEDGE)) s.t.
+ * sides_of_possible_future (PF) == superedge_sides (EDGE)
+ *
+ * also:
+ *
+ * let SUPER2 := superedge_destination(EDGE)
+ * nfa_states(SUPER2)
+ * == union of all nfa state sets S s.t.
+ * exists PF in possible_futures(dest(nfaEDGE)) s.t.
+ * sides_of_possible_future (PF) == superedge_sides (EDGE)
+ * && S == dests_of_possible_future (PF) }
+ *
+ * Or in english, every superstate is a set of nfa states. A given
+ * character and a superstate implies many transitions in the NFA --
+ * those that begin with an edge labeled with that character from a
+ * state in the set corresponding to the superstate.
+ *
+ * The destinations of those transitions each have a set of possible
+ * futures. A possible future is a list of side effects and a set of
+ * destination NFA states. Two sets of possible futures can be
+ * `merged' by combining all pairs of possible futures that have the
+ * same side effects. A pair is combined by creating a new future
+ * with the same side effect but the union of the two destination sets.
+ * In this way, all the possible futures suggested by a superstate
+ * and a character can be merged into a set of possible futures where
+ * no two elements of the set have the same set of side effects.
+ *
+ * The destination of a possible future, being a set of NFA states,
+ * corresponds to a supernfa state. So, the merged set of possible
+ * futures we just created can serve as a set of edges in the
+ * supernfa.
+ *
+ * The representation of the superstate nfa and the nfa is critical.
+ * The nfa has to be compact, but has to facilitate the rapid
+ * computation of missing superstates. The superstate nfa has to
+ * be fast to interpret, lazilly constructed, and bounded in space.
+ *
+ * To facilitate interpretation, the superstate data structures are
+ * peppered with `instruction frames'. There is an instruction set
+ * defined below which matchers using the supernfa must be able to
+ * interpret.
+ *
+ * We'd like to make it possible but not mandatory to use code
+ * addresses to represent instructions (c.f. gcc's computed goto).
+ * Therefore, we define an enumerated type of opcodes, and when
+ * writing one of these instructions into a data structure, use
+ * the opcode as an index into a table of instruction values.
+ *
+ * Here are the opcodes that occur in the superstate nfa:
+ */
+
+
+/* Every superstate contains a table of instruction frames indexed
+ * by characters. A normal `move' in a matcher is to fetch the next
+ * character and use it as an index into a superstates transition
+ * table.
+ *
+ * In the fasted case, only one edge follows from that character.
+ * In other cases there is more work to do.
+ *
+ * The descriptions of the opcodes refer to data structures that are
+ * described further below.
+ */
+
+enum rx_opcode
+{
+ /*
+ * BACKTRACK_POINT is invoked when a character transition in
+ * a superstate leads to more than one edge. In that case,
+ * the edges have to be explored independently using a backtracking
+ * strategy.
+ *
+ * A BACKTRACK_POINT instruction is stored in a superstate's
+ * transition table for some character when it is known that that
+ * character crosses more than one edge. On encountering this
+ * instruction, the matcher saves enough state to backtrack to this
+ * point in the match later.
+ */
+ rx_backtrack_point = 0, /* data is (struct transition_class *) */
+
+ /*
+ * RX_DO_SIDE_EFFECTS evaluates the side effects of an epsilon path.
+ * There is one occurence of this instruction per rx_distinct_future.
+ * This instruction is skipped if a rx_distinct_future has no side effects.
+ */
+ rx_do_side_effects = rx_backtrack_point + 1,
+
+ /* data is (struct rx_distinct_future *) */
+
+ /*
+ * RX_CACHE_MISS instructions are stored in rx_distinct_futures whose
+ * destination superstate has been reclaimed (or was never built).
+ * It recomputes the destination superstate.
+ * RX_CACHE_MISS is also stored in a superstate transition table before
+ * any of its edges have been built.
+ */
+ rx_cache_miss = rx_do_side_effects + 1,
+ /* data is (struct rx_distinct_future *) */
+
+ /*
+ * RX_NEXT_CHAR is called to consume the next character and take the
+ * corresponding transition. This is the only instruction that uses
+ * the DATA field of the instruction frame instead of DATA_2.
+ * (see EXPLORE_FUTURE in regex.c).
+ */
+ rx_next_char = rx_cache_miss + 1, /* data is (struct superstate *) */
+
+ /* RX_BACKTRACK indicates that a transition fails.
+ */
+ rx_backtrack = rx_next_char + 1, /* no data */
+
+ /*
+ * RX_ERROR_INX is stored only in places that should never be executed.
+ */
+ rx_error_inx = rx_backtrack + 1, /* Not supposed to occur. */
+
+ rx_num_instructions = rx_error_inx + 1
+};
+
+/* An id_instruction_table holds the values stored in instruction
+ * frames. The table is indexed by the enums declared above.
+ */
+extern void * rx_id_instruction_table[rx_num_instructions];
+
+/* The heart of the matcher is a `word-code-interpreter'
+ * (like a byte-code interpreter, except that instructions
+ * are a full word wide).
+ *
+ * Instructions are not stored in a vector of code, instead,
+ * they are scattered throughout the data structures built
+ * by the regexp compiler and the matcher. One word-code instruction,
+ * together with the arguments to that instruction, constitute
+ * an instruction frame (struct rx_inx).
+ *
+ * This structure type is padded by hand to a power of 2 because
+ * in one of the dominant cases, we dispatch by indexing a table
+ * of instruction frames. If that indexing can be accomplished
+ * by just a shift of the index, we're happy.
+ *
+ * Instructions take at most one argument, but there are two
+ * slots in an instruction frame that might hold that argument.
+ * These are called data and data_2. The data slot is only
+ * used for one instruction (RX_NEXT_CHAR). For all other
+ * instructions, data should be set to 0.
+ *
+ * RX_NEXT_CHAR is the most important instruction by far.
+ * By reserving the data field for its exclusive use,
+ * instruction dispatch is sped up in that case. There is
+ * no need to fetch both the instruction and the data,
+ * only the data is needed. In other words, a `cycle' begins
+ * by fetching the field data. If that is non-0, then it must
+ * be the destination state of a next_char transition, so
+ * make that value the current state, advance the match position
+ * by one character, and start a new cycle. On the other hand,
+ * if data is 0, fetch the instruction and do a more complicated
+ * dispatch on that.
+ */
+
+struct rx_inx
+{
+ void * data;
+ void * data_2;
+ void * inx;
+ void * fnord;
+};
+
+#ifndef RX_TAIL_ARRAY
+#define RX_TAIL_ARRAY 1
+#endif
+
+/* A superstate corresponds to a set of nfa states. Those sets are
+ * represented by STRUCT RX_SUPERSET. The constructors
+ * guarantee that only one (shared) structure is created for a given set.
+ */
+struct rx_superset
+{
+ int refs; /* This is a reference counted structure. */
+
+ /* We keep these sets in a cache because (in an unpredictable way),
+ * the same set is often created again and again. But that is also
+ * problematic -- compatibility with POSIX and GNU regex requires
+ * that we not be able to tell when a program discards a particular
+ * NFA (thus invalidating the supersets created from it).
+ *
+ * But when a cache hit appears to occur, we will have in hand the
+ * nfa for which it may have happened. That is why every nfa is given
+ * its own sequence number. On a cache hit, the cache is validated
+ * by comparing the nfa sequence number to this field:
+ */
+ int id;
+
+ struct rx_nfa_state * car; /* May or may not be a valid addr. */
+ struct rx_superset * cdr;
+
+ /* If the corresponding superstate exists: */
+ struct rx_superstate * superstate;
+
+
+ /* There is another bookkeeping problem. It is expensive to
+ * compute the starting nfa state set for an nfa. So, once computed,
+ * it is cached in the `struct rx'.
+ *
+ * But, the state set can be flushed from the superstate cache.
+ * When that happens, we can't know if the corresponding `struct rx'
+ * is still alive or if it has been freed or re-used by the program.
+ * So, the cached pointer to this set in a struct rx might be invalid
+ * and we need a way to validate it.
+ *
+ * Fortunately, even if this set is flushed from the cache, it is
+ * not freed. It just goes on the free-list of supersets.
+ * So we can still examine it.
+ *
+ * So to validate a starting set memo, check to see if the
+ * starts_for field still points back to the struct rx in question,
+ * and if the ID matches the rx sequence number.
+ */
+ struct rx * starts_for;
+
+ /* This is used to link into a hash bucket so these objects can
+ * be `hash-consed'.
+ */
+ struct rx_hash_item hash_item;
+};
+
+#define rx_protect_superset(RX,CON) (++(CON)->refs)
+
+/* The terminology may be confusing (rename this structure?).
+ * Every character occurs in at most one rx_super_edge per super-state.
+ * But, that structure might have more than one option, indicating a point
+ * of non-determinism.
+ *
+ * In other words, this structure holds a list of superstate edges
+ * sharing a common starting state and character label. The edges
+ * are in the field OPTIONS. All superstate edges sharing the same
+ * starting state and character are in this list.
+ */
+struct rx_super_edge
+{
+ struct rx_super_edge *next;
+ struct rx_inx rx_backtrack_frame;
+ int cset_size;
+ rx_Bitset cset;
+ struct rx_distinct_future *options;
+};
+
+/* A superstate is a set of nfa states (RX_SUPERSET) along
+ * with a transition table. Superstates are built on demand and reclaimed
+ * without warning. To protect a superstate from this ghastly fate,
+ * use LOCK_SUPERSTATE.
+ */
+struct rx_superstate
+{
+ int rx_id; /* c.f. the id field of rx_superset */
+ int locks; /* protection from reclamation */
+
+ /* Within a superstate cache, all the superstates are kept in a big
+ * queue. The tail of the queue is the state most likely to be
+ * reclaimed. The *recyclable fields hold the queue position of
+ * this state.
+ */
+ struct rx_superstate * next_recyclable;
+ struct rx_superstate * prev_recyclable;
+
+ /* The supernfa edges that exist in the cache and that have
+ * this state as their destination are kept in this list:
+ */
+ struct rx_distinct_future * transition_refs;
+
+ /* The list of nfa states corresponding to this superstate: */
+ struct rx_superset * contents;
+
+ /* The list of edges in the cache beginning from this state. */
+ struct rx_super_edge * edges;
+
+ /* A tail of the recyclable queue is marked as semifree. A semifree
+ * state has no incoming next_char transitions -- any transition
+ * into a semifree state causes a complex dispatch with the side
+ * effect of rescuing the state from its semifree state.
+ *
+ * An alternative to this might be to make next_char more expensive,
+ * and to move a state to the head of the recyclable queue whenever
+ * it is entered. That way, popular states would never be recycled.
+ *
+ * But unilaterally making next_char more expensive actually loses.
+ * So, incoming transitions are only made expensive for states near
+ * the tail of the recyclable queue. The more cache contention
+ * there is, the more frequently a state will have to prove itself
+ * and be moved back to the front of the queue. If there is less
+ * contention, then popular states just aggregate in the front of
+ * the queue and stay there.
+ */
+ int is_semifree;
+
+
+ /* This keeps track of the size of the transition table for this
+ * state. There is a half-hearted attempt to support variable sized
+ * superstates.
+ */
+ int trans_size;
+
+ /* Indexed by characters... */
+ struct rx_inx transitions[RX_TAIL_ARRAY];
+};
+
+
+/* A list of distinct futures define the edges that leave from a
+ * given superstate on a given character. c.f. rx_super_edge.
+ */
+
+struct rx_distinct_future
+{
+ struct rx_distinct_future * next_same_super_edge[2];
+ struct rx_distinct_future * next_same_dest;
+ struct rx_distinct_future * prev_same_dest;
+ struct rx_superstate * present; /* source state */
+ struct rx_superstate * future; /* destination state */
+ struct rx_super_edge * edge;
+
+
+ /* The future_frame holds the instruction that should be executed
+ * after all the side effects are done, when it is time to complete
+ * the transition to the next state.
+ *
+ * Normally this is a next_char instruction, but it may be a
+ * cache_miss instruction as well, depending on whether or not
+ * the superstate is in the cache and semifree.
+ *
+ * If this is the only future for a given superstate/char, and
+ * if there are no side effects to be performed, this frame is
+ * not used (directly) at all. Instead, its contents are copied
+ * into the transition table of the starting state of this dist. future.
+ */
+ struct rx_inx future_frame;
+
+ struct rx_inx side_effects_frame;
+ struct rx_se_list * effects;
+};
+
+#define rx_lock_superstate(R,S) ((S)->locks++)
+#define rx_unlock_superstate(R,S) (--(S)->locks)
+
+
+/* This page destined for rx.h */
+
+struct rx_blocklist
+{
+ struct rx_blocklist * next;
+ int bytes;
+};
+
+struct rx_freelist
+{
+ struct rx_freelist * next;
+};
+
+struct rx_cache;
+
+#ifdef __STDC__
+typedef void (*rx_morecore_fn)(struct rx_cache *);
+#else
+typedef void (*rx_morecore_fn)();
+#endif
+
+/* You use this to control the allocation of superstate data
+ * during matching. Most of it should be initialized to 0.
+ *
+ * A MORECORE function is necessary. It should allocate
+ * a new block of memory or return 0.
+ * A default that uses malloc is called `rx_morecore'.
+ *
+ * The number of SUPERSTATES_ALLOWED indirectly limits how much memory
+ * the system will try to allocate. The default is 128. Batch style
+ * applications that are very regexp intensive should use as high a number
+ * as possible without thrashing.
+ *
+ * The LOCAL_CSET_SIZE is the number of characters in a character set.
+ * It is therefore the number of entries in a superstate transition table.
+ * Generally, it should be 256. If your character set has 16 bits,
+ * it is better to translate your regexps into equivalent 8 bit patterns.
+ */
+
+struct rx_cache
+{
+ struct rx_hash_rules superset_hash_rules;
+
+ /* Objects are allocated by incrementing a pointer that
+ * scans across rx_blocklists.
+ */
+ struct rx_blocklist * memory;
+ struct rx_blocklist * memory_pos;
+ int bytes_left;
+ char * memory_addr;
+ rx_morecore_fn morecore;
+
+ /* Freelists. */
+ struct rx_freelist * free_superstates;
+ struct rx_freelist * free_transition_classes;
+ struct rx_freelist * free_discernable_futures;
+ struct rx_freelist * free_supersets;
+ struct rx_freelist * free_hash;
+
+ /* Two sets of superstates -- those that are semifreed, and those
+ * that are being used.
+ */
+ struct rx_superstate * lru_superstate;
+ struct rx_superstate * semifree_superstate;
+
+ struct rx_superset * empty_superset;
+
+ int superstates;
+ int semifree_superstates;
+ int hits;
+ int misses;
+ int superstates_allowed;
+
+ int local_cset_size;
+ void ** instruction_table;
+
+ struct rx_hash superset_table;
+};
+
+
+
+/* The lowest-level search function supports arbitrarily fragmented
+ * strings and (optionally) suspendable/resumable searches.
+ *
+ * Callers have to provide a few hooks.
+ */
+
+#ifndef __GNUC__
+#ifdef __STDC__
+#define __const__ const
+#else
+#define __const__
+#endif
+#endif
+
+/* This holds a matcher position */
+struct rx_string_position
+{
+ __const__ unsigned char * pos; /* The current pos. */
+ __const__ unsigned char * string; /* The current string burst. */
+ __const__ unsigned char * end; /* First invalid position >= POS. */
+ int offset; /* Integer address of the current burst. */
+ int size; /* Current string's size. */
+ int search_direction; /* 1 or -1 */
+ int search_end; /* First position to not try. */
+};
+
+
+enum rx_get_burst_return
+{
+ rx_get_burst_continuation,
+ rx_get_burst_error,
+ rx_get_burst_ok,
+ rx_get_burst_no_more
+};
+
+
+/* A call to get burst should make POS valid. It might be invalid
+ * if the STRING field doesn't point to a burst that actually
+ * contains POS.
+ *
+ * GET_BURST should take a clue from SEARCH_DIRECTION (1 or -1) as to
+ * whether or not to pad to the left. Padding to the right is always
+ * appropriate, but need not go past the point indicated by STOP.
+ *
+ * If a continuation is returned, then the reentering call to
+ * a search function will retry the get_burst.
+ */
+
+#ifdef __STDC__
+typedef enum rx_get_burst_return
+ (*rx_get_burst_fn) (struct rx_string_position * pos,
+ void * app_closure,
+ int stop);
+
+#else
+typedef enum rx_get_burst_return (*rx_get_burst_fn) ();
+#endif
+
+
+enum rx_back_check_return
+{
+ rx_back_check_continuation,
+ rx_back_check_error,
+ rx_back_check_pass,
+ rx_back_check_fail
+};
+
+/* Back_check should advance the position it is passed
+ * over rparen - lparen characters and return pass iff
+ * the characters starting at POS match those indexed
+ * by [LPAREN..RPAREN].
+ *
+ * If a continuation is returned, then the reentering call to
+ * a search function will retry the back_check.
+ */
+
+#ifdef __STDC__
+typedef enum rx_back_check_return
+ (*rx_back_check_fn) (struct rx_string_position * pos,
+ int lparen,
+ int rparen,
+ unsigned char * translate,
+ void * app_closure,
+ int stop);
+
+#else
+typedef enum rx_back_check_return (*rx_back_check_fn) ();
+#endif
+
+
+
+
+/* A call to fetch_char should return the character at POS or POS + 1.
+ * Returning continuations here isn't supported. OFFSET is either 0 or 1
+ * and indicates which characters is desired.
+ */
+
+#ifdef __STDC__
+typedef int (*rx_fetch_char_fn) (struct rx_string_position * pos,
+ int offset,
+ void * app_closure,
+ int stop);
+#else
+typedef int (*rx_fetch_char_fn) ();
+#endif
+
+
+enum rx_search_return
+{
+ rx_search_continuation = -4,
+ rx_search_error = -3,
+ rx_search_soft_fail = -2, /* failed by running out of string */
+ rx_search_fail = -1 /* failed only by reaching failure states */
+ /* return values >= 0 indicate the position of a successful match */
+};
+
+
+
+
+
+
+/* regex.h
+ *
+ * The remaining declarations replace regex.h.
+ */
+
+/* This is an array of error messages corresponding to the error codes.
+ */
+extern __const__ char *re_error_msg[];
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* The regex.c support, as a client of rx, defines a set of possible
+ * side effects that can be added to the edge lables of nfa edges.
+ * Here is the list of sidef effects in use.
+ */
+
+enum re_side_effects
+{
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE) NAME VALUE,
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) NAME VALUE,
+#include "rx.h"
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ re_floogle_flap = 65533
+};
+
+/* These hold paramaters for the kinds of side effects that are possible
+ * in the supported pattern languages. These include things like the
+ * numeric bounds of {} operators and the index of paren registers for
+ * subexpression measurement or backreferencing.
+ */
+struct re_se_params
+{
+ enum re_side_effects se;
+ int op1;
+ int op2;
+};
+
+typedef unsigned reg_syntax_t;
+
+struct re_pattern_buffer
+{
+ struct rx rx;
+ reg_syntax_t syntax; /* See below for syntax bit definitions. */
+
+ unsigned int no_sub:1; /* If set, don't return register offsets. */
+ unsigned int not_bol:1; /* If set, the anchors ('^' and '$') don't */
+ unsigned int not_eol:1; /* match at the ends of the string. */
+ unsigned int newline_anchor:1;/* If true, an anchor at a newline matches.*/
+ unsigned int least_subs:1; /* If set, and returning registers, return
+ * as few values as possible. Only
+ * backreferenced groups and group 0 (the whole
+ * match) will be returned.
+ */
+
+ /* If true, this says that the matcher should keep registers on its
+ * backtracking stack. For many patterns, we can easily determine that
+ * this isn't necessary.
+ */
+ unsigned int match_regs_on_stack:1;
+ unsigned int search_regs_on_stack:1;
+
+ /* is_anchored and begbuf_only are filled in by rx_compile. */
+ unsigned int is_anchored:1; /* Anchorded by ^? */
+ unsigned int begbuf_only:1; /* Anchored to char position 0? */
+
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ * for `max (RE_NREGS, re_nsub + 1)' groups.
+ * If REGS_REALLOCATE, reallocate space if necessary.
+ * If REGS_FIXED, use what's there.
+ */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned int regs_allocated:2;
+
+
+ /* Either a translate table to apply to all characters before
+ * comparing them, or zero for no translation. The translation
+ * is applied to a pattern when it is compiled and to a string
+ * when it is matched.
+ */
+ unsigned char * translate;
+
+ /* If this is a valid pointer, it tells rx not to store the extents of
+ * certain subexpressions (those corresponding to non-zero entries).
+ * Passing 0x1 is the same as passing an array of all ones. Passing 0x0
+ * is the same as passing an array of all zeros.
+ * The array should contain as many entries as their are subexps in the
+ * regexp.
+ *
+ * For POSIX compatability, when using regcomp and regexec this field
+ * is zeroed and ignored.
+ */
+ char * syntax_parens;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ void * buffer; /* Malloced memory for the nfa. */
+ unsigned long allocated; /* Size of that memory. */
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ * the fastmap, if there is one, to skip over impossible
+ * starting points for matches. */
+ char *fastmap;
+
+ unsigned int fastmap_accurate:1; /* These three are internal. */
+ unsigned int can_match_empty:1;
+ struct rx_nfa_state * start; /* The nfa starting state. */
+
+ /* This is the list of iterator bounds for {lo,hi} constructs.
+ * The memory pointed to is part of the rx->buffer.
+ */
+ struct re_se_params *se_params;
+
+ /* This is a bitset representation of the fastmap.
+ * This is a true fastmap that already takes the translate
+ * table into account.
+ */
+ rx_Bitset fastset;
+};
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ * `re_match_2' returns information about at least this many registers
+ * the first time a `regs' structure is passed.
+ *
+ * Also, this is the greatest number of backreferenced subexpressions
+ * allowed in a pattern being matched without caller-supplied registers.
+ */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+extern int rx_cache_bound;
+extern const char *rx_version_string;
+
+
+
+#ifdef RX_WANT_RX_DEFS
+
+/* This is decls to the interesting subsystems and lower layers
+ * of rx. Everything which doesn't have a public counterpart in
+ * regex.c is declared here.
+ */
+
+
+#ifdef __STDC__
+typedef void (*rx_hash_freefn) (struct rx_hash_item * it);
+#else /* ndef __STDC__ */
+typedef void (*rx_hash_freefn) ();
+#endif /* ndef __STDC__ */
+
+
+
+
+#ifdef __STDC__
+RX_DECL int rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL int rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL int rx_bitset_empty (int size, rx_Bitset set);
+RX_DECL void rx_bitset_null (int size, rx_Bitset b);
+RX_DECL void rx_bitset_universe (int size, rx_Bitset b);
+RX_DECL void rx_bitset_complement (int size, rx_Bitset b);
+RX_DECL void rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_union (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_intersection (int size,
+ rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_revdifference (int size,
+ rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL unsigned long rx_bitset_hash (int size, rx_Bitset b);
+RX_DECL struct rx_hash_item * rx_hash_find (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules);
+RX_DECL struct rx_hash_item * rx_hash_store (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules);
+RX_DECL void rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules);
+RX_DECL void rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
+ struct rx_hash_rules * rules);
+RX_DECL rx_Bitset rx_cset (struct rx *rx);
+RX_DECL rx_Bitset rx_copy_cset (struct rx *rx, rx_Bitset a);
+RX_DECL void rx_free_cset (struct rx * rx, rx_Bitset c);
+RX_DECL struct rexp_node * rexp_node (struct rx *rx,
+ enum rexp_node_type type);
+RX_DECL struct rexp_node * rx_mk_r_cset (struct rx * rx,
+ rx_Bitset b);
+RX_DECL struct rexp_node * rx_mk_r_concat (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_alternate (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_opt (struct rx * rx,
+ struct rexp_node * a);
+RX_DECL struct rexp_node * rx_mk_r_star (struct rx * rx,
+ struct rexp_node * a);
+RX_DECL struct rexp_node * rx_mk_r_2phase_star (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_side_effect (struct rx * rx,
+ rx_side_effect a);
+RX_DECL struct rexp_node * rx_mk_r_data (struct rx * rx,
+ void * a);
+RX_DECL void rx_free_rexp (struct rx * rx, struct rexp_node * node);
+RX_DECL struct rexp_node * rx_copy_rexp (struct rx *rx,
+ struct rexp_node *node);
+RX_DECL struct rx_nfa_state * rx_nfa_state (struct rx *rx);
+RX_DECL void rx_free_nfa_state (struct rx_nfa_state * n);
+RX_DECL struct rx_nfa_state * rx_id_to_nfa_state (struct rx * rx,
+ int id);
+RX_DECL struct rx_nfa_edge * rx_nfa_edge (struct rx *rx,
+ enum rx_nfa_etype type,
+ struct rx_nfa_state *start,
+ struct rx_nfa_state *dest);
+RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge * e);
+RX_DECL void rx_free_nfa (struct rx *rx);
+RX_DECL int rx_build_nfa (struct rx *rx,
+ struct rexp_node *rexp,
+ struct rx_nfa_state **start,
+ struct rx_nfa_state **end);
+RX_DECL void rx_name_nfa_states (struct rx *rx);
+RX_DECL int rx_eclose_nfa (struct rx *rx);
+RX_DECL void rx_delete_epsilon_transitions (struct rx *rx);
+RX_DECL int rx_compactify_nfa (struct rx *rx,
+ void **mem, unsigned long *size);
+RX_DECL void rx_release_superset (struct rx *rx,
+ struct rx_superset *set);
+RX_DECL struct rx_superset * rx_superset_cons (struct rx * rx,
+ struct rx_nfa_state *car, struct rx_superset *cdr);
+RX_DECL struct rx_superset * rx_superstate_eclosure_union
+ (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl);
+RX_DECL struct rx_superstate * rx_superstate (struct rx *rx,
+ struct rx_superset *set);
+RX_DECL struct rx_inx * rx_handle_cache_miss
+ (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data);
+RX_DECL reg_errcode_t rx_compile (__const__ char *pattern, int size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer * rxb);
+RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer * rxb);
+#else /* STDC */
+RX_DECL int rx_bitset_is_equal ();
+RX_DECL int rx_bitset_is_subset ();
+RX_DECL int rx_bitset_empty ();
+RX_DECL void rx_bitset_null ();
+RX_DECL void rx_bitset_universe ();
+RX_DECL void rx_bitset_complement ();
+RX_DECL void rx_bitset_assign ();
+RX_DECL void rx_bitset_union ();
+RX_DECL void rx_bitset_intersection ();
+RX_DECL void rx_bitset_difference ();
+RX_DECL void rx_bitset_revdifference ();
+RX_DECL void rx_bitset_xor ();
+RX_DECL unsigned long rx_bitset_hash ();
+RX_DECL struct rx_hash_item * rx_hash_find ();
+RX_DECL struct rx_hash_item * rx_hash_store ();
+RX_DECL void rx_hash_free ();
+RX_DECL void rx_free_hash_table ();
+RX_DECL rx_Bitset rx_cset ();
+RX_DECL rx_Bitset rx_copy_cset ();
+RX_DECL void rx_free_cset ();
+RX_DECL struct rexp_node * rexp_node ();
+RX_DECL struct rexp_node * rx_mk_r_cset ();
+RX_DECL struct rexp_node * rx_mk_r_concat ();
+RX_DECL struct rexp_node * rx_mk_r_alternate ();
+RX_DECL struct rexp_node * rx_mk_r_opt ();
+RX_DECL struct rexp_node * rx_mk_r_star ();
+RX_DECL struct rexp_node * rx_mk_r_2phase_star ();
+RX_DECL struct rexp_node * rx_mk_r_side_effect ();
+RX_DECL struct rexp_node * rx_mk_r_data ();
+RX_DECL void rx_free_rexp ();
+RX_DECL struct rexp_node * rx_copy_rexp ();
+RX_DECL struct rx_nfa_state * rx_nfa_state ();
+RX_DECL void rx_free_nfa_state ();
+RX_DECL struct rx_nfa_state * rx_id_to_nfa_state ();
+RX_DECL struct rx_nfa_edge * rx_nfa_edge ();
+RX_DECL void rx_free_nfa_edge ();
+RX_DECL void rx_free_nfa ();
+RX_DECL int rx_build_nfa ();
+RX_DECL void rx_name_nfa_states ();
+RX_DECL int rx_eclose_nfa ();
+RX_DECL void rx_delete_epsilon_transitions ();
+RX_DECL int rx_compactify_nfa ();
+RX_DECL void rx_release_superset ();
+RX_DECL struct rx_superset * rx_superset_cons ();
+RX_DECL struct rx_superset * rx_superstate_eclosure_union ();
+RX_DECL struct rx_superstate * rx_superstate ();
+RX_DECL struct rx_inx * rx_handle_cache_miss ();
+RX_DECL reg_errcode_t rx_compile ();
+RX_DECL void rx_blow_up_fastmap ();
+#endif /* STDC */
+
+
+#endif /* RX_WANT_RX_DEFS */
+
+
+
+#ifdef __STDC__
+extern int re_search_2 (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop);
+extern int re_search (struct re_pattern_buffer * rxb, __const__ char *string,
+ int size, int startpos, int range,
+ struct re_registers *regs);
+extern int re_match_2 (struct re_pattern_buffer * rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int pos, struct re_registers *regs, int stop);
+extern int re_match (struct re_pattern_buffer * rxb,
+ __const__ char * string,
+ int size, int pos,
+ struct re_registers *regs);
+extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
+extern void re_set_registers (struct re_pattern_buffer *bufp,
+ struct re_registers *regs,
+ unsigned num_regs,
+ regoff_t * starts, regoff_t * ends);
+extern __const__ char * re_compile_pattern (__const__ char *pattern,
+ int length,
+ struct re_pattern_buffer * rxb);
+extern int re_compile_fastmap (struct re_pattern_buffer * rxb);
+extern char * re_comp (__const__ char *s);
+extern int re_exec (__const__ char *s);
+extern int regcomp (regex_t * preg, __const__ char * pattern, int cflags);
+extern int regexec (__const__ regex_t *preg, __const__ char *string,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags);
+extern size_t regerror (int errcode, __const__ regex_t *preg,
+ char *errbuf, size_t errbuf_size);
+extern void regfree (regex_t *preg);
+
+#else /* STDC */
+extern int re_search_2 ();
+extern int re_search ();
+extern int re_match_2 ();
+extern int re_match ();
+extern reg_syntax_t re_set_syntax ();
+extern void re_set_registers ();
+extern __const__ char * re_compile_pattern ();
+extern int re_compile_fastmap ();
+extern char * re_comp ();
+extern int re_exec ();
+extern int regcomp ();
+extern int regexec ();
+extern size_t regerror ();
+extern void regfree ();
+
+#endif /* STDC */
+
+
+
+#ifdef RX_WANT_RX_DEFS
+
+struct rx_counter_frame
+{
+ int tag;
+ int val;
+ struct rx_counter_frame * inherited_from; /* If this is a copy. */
+ struct rx_counter_frame * cdr;
+};
+
+struct rx_backtrack_frame
+{
+ char * counter_stack_sp;
+
+ /* A frame is used to save the matchers state when it crosses a
+ * backtracking point. The `stk_' fields correspond to variables
+ * in re_search_2 (just strip off thes `stk_'). They are documented
+ * tere.
+ */
+ struct rx_superstate * stk_super;
+ unsigned int stk_c;
+ struct rx_string_position stk_test_pos;
+ int stk_last_l;
+ int stk_last_r;
+ int stk_test_ret;
+
+ /* This is the list of options left to explore at the backtrack
+ * point for which this frame was created.
+ */
+ struct rx_distinct_future * df;
+ struct rx_distinct_future * first_df;
+
+#ifdef RX_DEBUG
+ int stk_line_no;
+#endif
+};
+
+struct rx_stack_chunk
+{
+ struct rx_stack_chunk * next_chunk;
+ int bytes_left;
+ char * sp;
+};
+
+enum rx_outer_entry
+{
+ rx_outer_start,
+ rx_outer_fastmap,
+ rx_outer_test,
+ rx_outer_restore_pos
+};
+
+enum rx_fastmap_return
+{
+ rx_fastmap_continuation,
+ rx_fastmap_error,
+ rx_fastmap_ok,
+ rx_fastmap_fail
+};
+
+enum rx_fastmap_entry
+{
+ rx_fastmap_start,
+ rx_fastmap_string_break
+};
+
+enum rx_test_return
+{
+ rx_test_continuation,
+ rx_test_error,
+ rx_test_fail,
+ rx_test_ok
+};
+
+enum rx_test_internal_return
+{
+ rx_test_internal_error,
+ rx_test_found_first,
+ rx_test_line_finished
+};
+
+enum rx_test_match_entry
+{
+ rx_test_start,
+ rx_test_cache_hit_loop,
+ rx_test_backreference_check,
+ rx_test_backtrack_return
+};
+
+struct rx_search_state
+{
+ /* Two groups of registers are kept. The group with the register state
+ * of the current test match, and the group that holds the state at the end
+ * of the best known match, if any.
+ *
+ * For some patterns, there may also be registers saved on the stack.
+ */
+ unsigned num_regs; /* Includes an element for register zero. */
+ regoff_t * lparen; /* scratch space for register returns */
+ regoff_t * rparen;
+ regoff_t * best_lpspace; /* in case the user doesn't want these */
+ regoff_t * best_rpspace; /* values, we still need space to store
+ * them. Normally, this memoryis unused
+ * and the space pointed to by REGS is
+ * used instead.
+ */
+
+ int last_l; /* Highest index of a valid lparen. */
+ int last_r; /* It's dual. */
+
+ int * best_lparen; /* This contains the best known register */
+ int * best_rparen; /* assignments.
+ * This may point to the same mem as
+ * best_lpspace, or it might point to memory
+ * passed by the caller.
+ */
+ int best_last_l; /* best_last_l:best_lparen::last_l:lparen */
+ int best_last_r;
+
+
+ unsigned char * translate;
+
+ struct rx_string_position outer_pos;
+
+ struct rx_superstate * start_super;
+ int nfa_choice;
+ int first_found; /* If true, return after finding any match. */
+ int ret_val;
+
+ /* For continuations... */
+ enum rx_outer_entry outer_search_resume_pt;
+ struct re_pattern_buffer * saved_rxb;
+ int saved_startpos;
+ int saved_range;
+ int saved_stop;
+ int saved_total_size;
+ rx_get_burst_fn saved_get_burst;
+ rx_back_check_fn saved_back_check;
+ struct re_registers * saved_regs;
+
+ /**
+ ** state for fastmap
+ **/
+ char * fastmap;
+ int fastmap_chr;
+ int fastmap_val;
+
+ /* for continuations in the fastmap procedure: */
+ enum rx_fastmap_entry fastmap_resume_pt;
+
+ /**
+ ** state for test_match
+ **/
+
+ /* The current superNFA position of the matcher. */
+ struct rx_superstate * super;
+
+ /* The matcher interprets a series of instruction frames.
+ * This is the `instruction counter' for the interpretation.
+ */
+ struct rx_inx * ifr;
+
+ /* We insert a ghost character in the string to prime
+ * the nfa. test_pos.pos, test_pos.str_half, and test_pos.end_half
+ * keep track of the test-match position and string-half.
+ */
+ unsigned char c;
+
+ /* Position within the string. */
+ struct rx_string_position test_pos;
+
+ struct rx_stack_chunk * counter_stack;
+ struct rx_stack_chunk * backtrack_stack;
+ int backtrack_frame_bytes;
+ int chunk_bytes;
+ struct rx_stack_chunk * free_chunks;
+
+ /* To return from this function, set test_ret and
+ * `goto test_do_return'.
+ *
+ * Possible return values are:
+ * 1 --- end of string while the superNFA is still going
+ * 0 --- internal error (out of memory)
+ * -1 --- search completed by reaching the superNFA fail state
+ * -2 --- a match was found, maybe not the longest.
+ *
+ * When the search is complete (-1), best_last_r indicates whether
+ * a match was found.
+ *
+ * -2 is return only if search_state.first_found is non-zero.
+ *
+ * if search_state.first_found is non-zero, a return of -1 indicates no match,
+ * otherwise, best_last_r has to be checked.
+ */
+ int test_ret;
+
+ int could_have_continued;
+
+#ifdef RX_DEBUG
+ int backtrack_depth;
+ /* There is a search tree with every node as set of deterministic
+ * transitions in the super nfa. For every branch of a
+ * backtrack point is an edge in the tree.
+ * This counts up a pre-order of nodes in that tree.
+ * It's saved on the search stack and printed when debugging.
+ */
+ int line_no;
+ int lines_found;
+#endif
+
+
+ /* For continuations within the match tester */
+ enum rx_test_match_entry test_match_resume_pt;
+ struct rx_inx * saved_next_tr_table;
+ struct rx_inx * saved_this_tr_table;
+ int saved_reg;
+ struct rx_backtrack_frame * saved_bf;
+
+};
+
+
+extern char rx_slowmap[];
+extern unsigned char rx_id_translation[];
+
+static __inline__ void
+init_fastmap (rxb, search_state)
+ struct re_pattern_buffer * rxb;
+ struct rx_search_state * search_state;
+{
+ search_state->fastmap = (rxb->fastmap
+ ? (char *)rxb->fastmap
+ : (char *)rx_slowmap);
+ /* Update the fastmap now if not correct already.
+ * When the regexp was compiled, the fastmap was computed
+ * and stored in a bitset. This expands the bitset into a
+ * character array containing 1s and 0s.
+ */
+ if ((search_state->fastmap == rxb->fastmap) && !rxb->fastmap_accurate)
+ rx_blow_up_fastmap (rxb);
+ search_state->fastmap_chr = -1;
+ search_state->fastmap_val = 0;
+ search_state->fastmap_resume_pt = rx_fastmap_start;
+}
+
+static __inline__ void
+uninit_fastmap (rxb, search_state)
+ struct re_pattern_buffer * rxb;
+ struct rx_search_state * search_state;
+{
+ /* Unset the fastmap sentinel */
+ if (search_state->fastmap_chr >= 0)
+ search_state->fastmap[search_state->fastmap_chr]
+ = search_state->fastmap_val;
+}
+
+static __inline__ int
+fastmap_search (rxb, stop, get_burst, app_closure, search_state)
+ struct re_pattern_buffer * rxb;
+ int stop;
+ rx_get_burst_fn get_burst;
+ void * app_closure;
+ struct rx_search_state * search_state;
+{
+ enum rx_fastmap_entry pc;
+
+ if (0)
+ {
+ return_continuation:
+ search_state->fastmap_resume_pt = pc;
+ return rx_fastmap_continuation;
+ }
+
+ pc = search_state->fastmap_resume_pt;
+
+ switch (pc)
+ {
+ default:
+ return rx_fastmap_error;
+ case rx_fastmap_start:
+ init_fastmap_sentinal:
+ /* For the sake of fast fastmapping, set a sentinal in the fastmap.
+ * This sentinal will trap the fastmap loop when it reaches the last
+ * valid character in a string half.
+ *
+ * This must be reset when the fastmap/search loop crosses a string
+ * boundry, and before returning to the caller. So sometimes,
+ * the fastmap loop is restarted with `continue', othertimes by
+ * `goto init_fastmap_sentinal'.
+ */
+ if (search_state->outer_pos.size)
+ {
+ search_state->fastmap_chr = ((search_state->outer_pos.search_direction == 1)
+ ? *(search_state->outer_pos.end - 1)
+ : *search_state->outer_pos.string);
+ search_state->fastmap_val
+ = search_state->fastmap[search_state->fastmap_chr];
+ search_state->fastmap[search_state->fastmap_chr] = 1;
+ }
+ else
+ {
+ search_state->fastmap_chr = -1;
+ search_state->fastmap_val = 0;
+ }
+
+ if (search_state->outer_pos.pos >= search_state->outer_pos.end)
+ goto fastmap_hit_bound;
+ else
+ {
+ if (search_state->outer_pos.search_direction == 1)
+ {
+ if (search_state->fastmap_val)
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ ++search_state->outer_pos.pos;
+ return rx_fastmap_ok;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ ++search_state->outer_pos.pos;
+ if (*search_state->outer_pos.pos != search_state->fastmap_chr)
+ return rx_fastmap_ok;
+ else
+ {
+ ++search_state->outer_pos.pos;
+ if (search_state->outer_pos.pos == search_state->outer_pos.end)
+ goto fastmap_hit_bound;
+ }
+ }
+ }
+ }
+ else
+ {
+ __const__ unsigned char * bound;
+ bound = search_state->outer_pos.string - 1;
+ if (search_state->fastmap_val)
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ --search_state->outer_pos.pos;
+ return rx_fastmap_ok;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ --search_state->outer_pos.pos;
+ if ((*search_state->outer_pos.pos != search_state->fastmap_chr) || search_state->fastmap_val)
+ return rx_fastmap_ok;
+ else
+ {
+ --search_state->outer_pos.pos;
+ if (search_state->outer_pos.pos == bound)
+ goto fastmap_hit_bound;
+ }
+ }
+ }
+ }
+ }
+
+ case rx_fastmap_string_break:
+ fastmap_hit_bound:
+ {
+ /* If we hit a bound, it may be time to fetch another burst
+ * of string, or it may be time to return a continuation to
+ * the caller, or it might be time to fail.
+ */
+
+ int burst_state;
+ burst_state = get_burst (&search_state->outer_pos, app_closure, stop);
+ switch (burst_state)
+ {
+ default:
+ case rx_get_burst_error:
+ return rx_fastmap_error;
+ case rx_get_burst_continuation:
+ {
+ pc = rx_fastmap_string_break;
+ goto return_continuation;
+ }
+ case rx_get_burst_ok:
+ goto init_fastmap_sentinal;
+ case rx_get_burst_no_more:
+ /* ...not a string split, simply no more string.
+ *
+ * When searching backward, running out of string
+ * is reason to quit.
+ *
+ * When searching forward, we allow the possibility
+ * of an (empty) match after the last character in the
+ * virtual string. So, fall through to the matcher
+ */
+ return ( (search_state->outer_pos.search_direction == 1)
+ ? rx_fastmap_ok
+ : rx_fastmap_fail);
+ }
+ }
+ }
+
+}
+
+
+
+#ifdef emacs
+/* The `emacs' switch turns on certain matching commands
+ * that make sense only in Emacs.
+ */
+#include "config.h"
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+#endif /* emacs */
+
+/* Setting RX_MEMDBUG is useful if you have dbmalloc. Maybe with similar
+ * packages too.
+ */
+#ifdef RX_MEMDBUG
+#include <malloc.h>
+#endif /* RX_RX_MEMDBUG */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ * `BSTRING', as far as I know, and neither of them use this code.
+ */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+
+#else /* HAVE_STRING_H || STDC_HEADERS */
+#include <strings.h>
+#endif /* not (HAVE_STRING_H || STDC_HEADERS) */
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else /* not STDC_HEADERS */
+char *malloc ();
+char *realloc ();
+#endif /* not STDC_HEADERS */
+
+
+
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE (1 << CHARBITS)
+
+#ifndef emacs
+/* Define the syntax basics for \<, \>, etc.
+ * This must be nonzero for the wordchar and notwordchar pattern
+ * commands in re_match_2.
+ */
+#ifndef Sword
+#define Sword 1
+#endif
+#define SYNTAX(c) re_syntax_table[c]
+RX_DECL char re_syntax_table[CHAR_SET_SIZE];
+#endif /* not emacs */
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ * of `string1' and `string2'. If only one string, it's `string2'.
+ */
+
+#define AT_STRINGS_BEG() \
+ ( -1 \
+ == ((search_state.test_pos.pos - search_state.test_pos.string) \
+ + search_state.test_pos.offset))
+
+#define AT_STRINGS_END() \
+ ( (total_size - 1) \
+ == ((search_state.test_pos.pos - search_state.test_pos.string) \
+ + search_state.test_pos.offset))
+
+
+/* Test if POS + 1 points to a character which is word-constituent. We have
+ * two special cases to check for: if past the end of string1, look at
+ * the first character in string2; and if before the beginning of
+ * string2, look at the last character in string1.
+ *
+ * Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG ().
+ */
+#define LETTER_P(POS,OFF) \
+ ( SYNTAX (fetch_char(POS, OFF, app_closure, stop)) \
+ == Sword)
+
+/* Test if the character at D and the one after D differ with respect
+ * to being word-constituent.
+ */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d,0) != LETTER_P (d, 1))
+
+
+#ifdef RX_SUPPORT_CONTINUATIONS
+#define RX_STACK_ALLOC(BYTES) malloc(BYTES)
+#define RX_STACK_FREE(MEM) free(MEM)
+#else
+#define RX_STACK_ALLOC(BYTES) alloca(BYTES)
+#define RX_STACK_FREE(MEM) \
+ ((struct rx_stack_chunk *)MEM)->next_chunk = search_state.free_chunks; \
+ search_state.free_chunks = ((struct rx_stack_chunk *)MEM);
+
+#endif
+
+#define PUSH(CHUNK_VAR,BYTES) \
+ if (!CHUNK_VAR || (CHUNK_VAR->bytes_left < (BYTES))) \
+ { \
+ struct rx_stack_chunk * new_chunk; \
+ if (search_state.free_chunks) \
+ { \
+ new_chunk = search_state.free_chunks; \
+ search_state.free_chunks = search_state.free_chunks->next_chunk; \
+ } \
+ else \
+ { \
+ new_chunk = (struct rx_stack_chunk *)RX_STACK_ALLOC(search_state.chunk_bytes); \
+ if (!new_chunk) \
+ { \
+ search_state.ret_val = 0; \
+ goto test_do_return; \
+ } \
+ } \
+ new_chunk->sp = (char *)new_chunk + sizeof (struct rx_stack_chunk); \
+ new_chunk->bytes_left = (search_state.chunk_bytes \
+ - (BYTES) \
+ - sizeof (struct rx_stack_chunk)); \
+ new_chunk->next_chunk = CHUNK_VAR; \
+ CHUNK_VAR = new_chunk; \
+ } \
+ else \
+ (CHUNK_VAR->sp += (BYTES)), (CHUNK_VAR->bytes_left -= (BYTES))
+
+#define POP(CHUNK_VAR,BYTES) \
+ if (CHUNK_VAR->sp == ((char *)CHUNK_VAR + sizeof(*CHUNK_VAR))) \
+ { \
+ struct rx_stack_chunk * new_chunk = CHUNK_VAR->next_chunk; \
+ RX_STACK_FREE(CHUNK_VAR); \
+ CHUNK_VAR = new_chunk; \
+ } \
+ else \
+ (CHUNK_VAR->sp -= BYTES), (CHUNK_VAR->bytes_left += BYTES)
+
+
+
+#define SRCH_TRANSLATE(C) search_state.translate[(unsigned char) (C)]
+
+
+
+
+#ifdef __STDC__
+RX_DECL __inline__ int
+rx_search (struct re_pattern_buffer * rxb,
+ int startpos,
+ int range,
+ int stop,
+ int total_size,
+ rx_get_burst_fn get_burst,
+ rx_back_check_fn back_check,
+ rx_fetch_char_fn fetch_char,
+ void * app_closure,
+ struct re_registers * regs,
+ struct rx_search_state * resume_state,
+ struct rx_search_state * save_state)
+#else
+RX_DECL __inline__ int
+rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char,
+ app_closure, regs, resume_state, save_state)
+ struct re_pattern_buffer * rxb;
+ int startpos;
+ int range;
+ int stop;
+ int total_size;
+ rx_get_burst_fn get_burst;
+ rx_back_check_fn back_check;
+ rx_fetch_char_fn fetch_char;
+ void * app_closure;
+ struct re_registers * regs;
+ struct rx_search_state * resume_state;
+ struct rx_search_state * save_state;
+#endif
+{
+ int pc;
+ int test_state;
+ struct rx_search_state search_state;
+
+ search_state.free_chunks = 0;
+ if (!resume_state)
+ pc = rx_outer_start;
+ else
+ {
+ search_state = *resume_state;
+ regs = search_state.saved_regs;
+ rxb = search_state.saved_rxb;
+ startpos = search_state.saved_startpos;
+ range = search_state.saved_range;
+ stop = search_state.saved_stop;
+ total_size = search_state.saved_total_size;
+ get_burst = search_state.saved_get_burst;
+ back_check = search_state.saved_back_check;
+ pc = search_state.outer_search_resume_pt;
+ if (0)
+ {
+ return_continuation:
+ if (save_state)
+ {
+ *save_state = search_state;
+ save_state->saved_regs = regs;
+ save_state->saved_rxb = rxb;
+ save_state->saved_startpos = startpos;
+ save_state->saved_range = range;
+ save_state->saved_stop = stop;
+ save_state->saved_total_size = total_size;
+ save_state->saved_get_burst = get_burst;
+ save_state->saved_back_check = back_check;
+ save_state->outer_search_resume_pt = pc;
+ }
+ return rx_search_continuation;
+ }
+ }
+
+ switch (pc)
+ {
+ case rx_outer_start:
+ search_state.ret_val = rx_search_fail;
+ ( search_state.lparen
+ = search_state.rparen
+ = search_state.best_lpspace
+ = search_state.best_rpspace
+ = 0);
+
+ /* figure the number of registers we may need for use in backreferences.
+ * the number here includes an element for register zero.
+ */
+ search_state.num_regs = rxb->re_nsub + 1;
+
+
+ /* check for out-of-range startpos. */
+ if ((startpos < 0) || (startpos > total_size))
+ return rx_search_fail;
+
+ /* fix up range if it might eventually take us outside the string. */
+ {
+ int endpos;
+ endpos = startpos + range;
+ if (endpos < -1)
+ range = (-1 - startpos);
+ else if (endpos > (total_size + 1))
+ range = total_size - startpos;
+ }
+
+ /* if the search isn't to be a backwards one, don't waste time in a
+ * long search for a pattern that says it is anchored.
+ */
+ if (rxb->begbuf_only && (range > 0))
+ {
+ if (startpos > 0)
+ return rx_search_fail;
+ else
+ range = 1;
+ }
+
+ /* decide whether to use internal or user-provided reg buffers. */
+ if (!regs || rxb->no_sub)
+ {
+ search_state.best_lpspace =
+ (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.best_rpspace =
+ (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.best_lparen = search_state.best_lpspace;
+ search_state.best_rparen = search_state.best_rpspace;
+ }
+ else
+ {
+ /* have the register data arrays been allocated? */
+ if (rxb->regs_allocated == REGS_UNALLOCATED)
+ { /* no. so allocate them with malloc. we need one
+ extra element beyond `search_state.num_regs' for the `-1' marker
+ gnu code uses. */
+ regs->num_regs = MAX (RE_NREGS, rxb->re_nsub + 1);
+ regs->start = ((regoff_t *)
+ malloc (regs->num_regs * sizeof ( regoff_t)));
+ regs->end = ((regoff_t *)
+ malloc (regs->num_regs * sizeof ( regoff_t)));
+ if (regs->start == 0 || regs->end == 0)
+ return rx_search_error;
+ rxb->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (rxb->regs_allocated == REGS_REALLOCATE)
+ { /* yes. if we need more elements than were already
+ allocated, reallocate them. if we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < search_state.num_regs + 1)
+ {
+ regs->num_regs = search_state.num_regs + 1;
+ regs->start = ((regoff_t *)
+ realloc (regs->start,
+ regs->num_regs * sizeof (regoff_t)));
+ regs->end = ((regoff_t *)
+ realloc (regs->end,
+ regs->num_regs * sizeof ( regoff_t)));
+ if (regs->start == 0 || regs->end == 0)
+ return rx_search_error;
+ }
+ }
+ else if (rxb->regs_allocated != REGS_FIXED)
+ return rx_search_error;
+
+ if (regs->num_regs < search_state.num_regs + 1)
+ {
+ search_state.best_lpspace =
+ ((regoff_t *)
+ REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
+ search_state.best_rpspace =
+ ((regoff_t *)
+ REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
+ search_state.best_lparen = search_state.best_lpspace;
+ search_state.best_rparen = search_state.best_rpspace;
+ }
+ else
+ {
+ search_state.best_lparen = regs->start;
+ search_state.best_rparen = regs->end;
+ }
+ }
+
+ search_state.lparen =
+ (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.rparen =
+ (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+
+ if (! ( search_state.best_rparen
+ && search_state.best_lparen
+ && search_state.lparen && search_state.rparen))
+ return rx_search_error;
+
+ search_state.best_last_l = search_state.best_last_r = -1;
+
+ search_state.translate = (rxb->translate
+ ? rxb->translate
+ : rx_id_translation);
+
+
+
+ /*
+ * two nfa's were compiled.
+ * `0' is complete.
+ * `1' faster but gets registers wrong and ends too soon.
+ */
+ search_state.nfa_choice = (regs && !rxb->least_subs) ? '\0' : '\1';
+
+ /* we have the option to look for the best match or the first
+ * one we can find. if the user isn't asking for register information,
+ * we don't need to find the best match.
+ */
+ search_state.first_found = !regs;
+
+ if (range >= 0)
+ {
+ search_state.outer_pos.search_end = startpos + range;
+ search_state.outer_pos.search_direction = 1;
+ }
+ else
+ {
+ search_state.outer_pos.search_end = startpos + range;
+ search_state.outer_pos.search_direction = -1;
+ }
+
+ /* the vacuous search always turns up nothing. */
+ if ((search_state.outer_pos.search_direction == 1)
+ ? (startpos > search_state.outer_pos.search_end)
+ : (startpos < search_state.outer_pos.search_end))
+ return rx_search_fail;
+
+ /* now we build the starting state of the supernfa. */
+ {
+ struct rx_superset * start_contents;
+ struct rx_nfa_state_set * start_nfa_set;
+
+ /* we presume here that the nfa start state has only one
+ * possible future with no side effects.
+ */
+ start_nfa_set = rxb->start->futures->destset;
+ if ( rxb->rx.start_set
+ && (rxb->rx.start_set->starts_for == &rxb->rx))
+ start_contents = rxb->rx.start_set;
+ else
+ {
+ start_contents =
+ rx_superstate_eclosure_union (&rxb->rx,
+ rx_superset_cons (&rxb->rx, 0, 0),
+ start_nfa_set);
+
+ if (!start_contents)
+ return rx_search_fail;
+
+ start_contents->starts_for = &rxb->rx;
+ rxb->rx.start_set = start_contents;
+ }
+ if ( start_contents->superstate
+ && (start_contents->superstate->rx_id == rxb->rx.rx_id))
+ {
+ search_state.start_super = start_contents->superstate;
+ rx_lock_superstate (&rxb->rx, search_state.start_super);
+ }
+ else
+ {
+ rx_protect_superset (&rxb->rx, start_contents);
+
+ search_state.start_super = rx_superstate (&rxb->rx, start_contents);
+ if (!search_state.start_super)
+ return rx_search_fail;
+ rx_lock_superstate (&rxb->rx, search_state.start_super);
+ rx_release_superset (&rxb->rx, start_contents);
+ }
+ }
+
+
+ /* The outer_pos tracks the position within the strings
+ * as seen by loop that calls fastmap_search.
+ *
+ * The caller supplied get_burst function actually
+ * gives us pointers to chars.
+ *
+ * Communication with the get_burst function is through an
+ * rx_string_position structure. Here, the structure for
+ * outer_pos is initialized. It is set to point to the
+ * NULL string, at an offset of STARTPOS. STARTPOS is out
+ * of range of the NULL string, so the first call to
+ * getburst will patch up the rx_string_position to point
+ * to valid characters.
+ */
+
+ ( search_state.outer_pos.string
+ = search_state.outer_pos.end
+ = 0);
+
+ search_state.outer_pos.offset = 0;
+ search_state.outer_pos.size = 0;
+ search_state.outer_pos.pos = (unsigned char *)startpos;
+ init_fastmap (rxb, &search_state);
+
+ search_state.fastmap_resume_pt = rx_fastmap_start;
+ case rx_outer_fastmap:
+ /* do { */
+ pseudo_do:
+ {
+ {
+ int fastmap_state;
+ fastmap_state = fastmap_search (rxb, stop, get_burst, app_closure,
+ &search_state);
+ switch (fastmap_state)
+ {
+ case rx_fastmap_continuation:
+ pc = rx_outer_fastmap;
+ goto return_continuation;
+ case rx_fastmap_fail:
+ goto finish;
+ case rx_fastmap_ok:
+ break;
+ }
+ }
+
+ /* now the fastmap loop has brought us to a plausible
+ * starting point for a match. so, it's time to run the
+ * nfa and see if a match occured.
+ */
+ startpos = ( search_state.outer_pos.pos
+ - search_state.outer_pos.string
+ + search_state.outer_pos.offset);
+#if 0
+/*|*/ if ((range > 0) && (startpos == search_state.outer_pos.search_end))
+/*|*/ goto finish;
+#endif
+ }
+
+ search_state.test_match_resume_pt = rx_test_start;
+ /* do interrupted for entry point... */
+ case rx_outer_test:
+ /* ...do continued */
+ {
+ goto test_match;
+ test_returns_to_search:
+ switch (test_state)
+ {
+ case rx_test_continuation:
+ pc = rx_outer_test;
+ goto return_continuation;
+ case rx_test_error:
+ search_state.ret_val = rx_search_error;
+ goto finish;
+ case rx_test_fail:
+ break;
+ case rx_test_ok:
+ goto finish;
+ }
+ search_state.outer_pos.pos += search_state.outer_pos.search_direction;
+ startpos += search_state.outer_pos.search_direction;
+#if 0
+/*|*/ if (search_state.test_pos.pos < search_state.test_pos.end)
+/*|*/ break;
+#endif
+ }
+ /* do interrupted for entry point... */
+ case rx_outer_restore_pos:
+ {
+ int x;
+ x = get_burst (&search_state.outer_pos, app_closure, stop);
+ switch (x)
+ {
+ case rx_get_burst_continuation:
+ pc = rx_outer_restore_pos;
+ goto return_continuation;
+ case rx_get_burst_error:
+ search_state.ret_val = rx_search_error;
+ goto finish;
+ case rx_get_burst_no_more:
+ if (rxb->can_match_empty)
+ break;
+ goto finish;
+ case rx_get_burst_ok:
+ break;
+ }
+ } /* } while (...see below...) */
+
+ if ((search_state.outer_pos.search_direction == 1)
+ ? (startpos <= search_state.outer_pos.search_end)
+ : (startpos > search_state.outer_pos.search_end))
+ goto pseudo_do;
+
+
+ finish:
+ uninit_fastmap (rxb, &search_state);
+ if (search_state.start_super)
+ rx_unlock_superstate (&rxb->rx, search_state.start_super);
+
+#ifdef regex_malloc
+ if (search_state.lparen) free (search_state.lparen);
+ if (search_state.rparen) free (search_state.rparen);
+ if (search_state.best_lpspace) free (search_state.best_lpspace);
+ if (search_state.best_rpspace) free (search_state.best_rpspace);
+#endif
+ return search_state.ret_val;
+ }
+
+
+ test_match:
+ {
+ enum rx_test_match_entry test_pc;
+ int inx;
+ test_pc = search_state.test_match_resume_pt;
+ if (test_pc == rx_test_start)
+ {
+#ifdef RX_DEBUG
+ search_state.backtrack_depth = 0;
+#endif
+ search_state.last_l = search_state.last_r = 0;
+ search_state.lparen[0] = startpos;
+ search_state.super = search_state.start_super;
+ search_state.c = search_state.nfa_choice;
+ search_state.test_pos.pos = search_state.outer_pos.pos - 1;
+ search_state.test_pos.string = search_state.outer_pos.string;
+ search_state.test_pos.end = search_state.outer_pos.end;
+ search_state.test_pos.offset = search_state.outer_pos.offset;
+ search_state.test_pos.size = search_state.outer_pos.size;
+ search_state.test_pos.search_direction = 1;
+ search_state.counter_stack = 0;
+ search_state.backtrack_stack = 0;
+ search_state.backtrack_frame_bytes =
+ (sizeof (struct rx_backtrack_frame)
+ + (rxb->match_regs_on_stack
+ ? sizeof (regoff_t) * (search_state.num_regs + 1) * 2
+ : 0));
+ search_state.chunk_bytes = search_state.backtrack_frame_bytes * 64;
+ search_state.test_ret = rx_test_line_finished;
+ search_state.could_have_continued = 0;
+ }
+ /* This is while (1)...except that the body of the loop is interrupted
+ * by some alternative entry points.
+ */
+ pseudo_while_1:
+ switch (test_pc)
+ {
+ case rx_test_cache_hit_loop:
+ goto resume_continuation_1;
+ case rx_test_backreference_check:
+ goto resume_continuation_2;
+ case rx_test_backtrack_return:
+ goto resume_continuation_3;
+ case rx_test_start:
+#ifdef RX_DEBUG
+ /* There is a search tree with every node as set of deterministic
+ * transitions in the super nfa. For every branch of a
+ * backtrack point is an edge in the tree.
+ * This counts up a pre-order of nodes in that tree.
+ * It's saved on the search stack and printed when debugging.
+ */
+ search_state.line_no = 0;
+ search_state.lines_found = 0;
+#endif
+
+ top_of_cycle:
+ /* A superstate is basicly a transition table, indexed by
+ * characters from the string being tested, and containing
+ * RX_INX (`instruction frame') structures.
+ */
+ search_state.ifr = &search_state.super->transitions [search_state.c];
+
+ recurse_test_match:
+ /* This is the point to which control is sent when the
+ * test matcher `recurses'. Before jumping here, some variables
+ * need to be saved on the stack and the next instruction frame
+ * has to be computed.
+ */
+
+ restart:
+ /* Some instructions don't advance the matcher, but just
+ * carry out some side effects and fetch a new instruction.
+ * To dispatch that new instruction, `goto restart'.
+ */
+
+ {
+ struct rx_inx * next_tr_table;
+ struct rx_inx * this_tr_table;
+ /* The fastest route through the loop is when the instruction
+ * is RX_NEXT_CHAR. This case is detected when SEARCH_STATE.IFR->DATA
+ * is non-zero. In that case, it points to the next
+ * superstate.
+ *
+ * This allows us to not bother fetching the bytecode.
+ */
+ next_tr_table = (struct rx_inx *)search_state.ifr->data;
+ this_tr_table = search_state.super->transitions;
+ while (next_tr_table)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp;
+
+ fprintf (stderr, "%d %d>> re_next_char @ %d (%d)",
+ search_state.line_no,
+ search_state.backtrack_depth,
+ (search_state.test_pos.pos - search_state.test_pos.string
+ + search_state.test_pos.offset), search_state.c);
+
+ search_state.super =
+ ((struct rx_superstate *)
+ ((char *)this_tr_table
+ - ((unsigned long)
+ ((struct rx_superstate *)0)->transitions)));
+
+ setp = search_state.super->contents;
+ fprintf (stderr, " superstet (rx=%d, &=%x: ",
+ rxb->rx.rx_id, setp);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "\n");
+ }
+#endif
+ this_tr_table = next_tr_table;
+ ++search_state.test_pos.pos;
+ if (search_state.test_pos.pos == search_state.test_pos.end)
+ {
+ int burst_state;
+ try_burst_1:
+ burst_state = get_burst (&search_state.test_pos,
+ app_closure, stop);
+ switch (burst_state)
+ {
+ case rx_get_burst_continuation:
+ search_state.saved_this_tr_table = this_tr_table;
+ search_state.saved_next_tr_table = next_tr_table;
+ test_pc = rx_test_cache_hit_loop;
+ goto test_return_continuation;
+
+ resume_continuation_1:
+ /* Continuation one jumps here to do its work: */
+ search_state.saved_this_tr_table = this_tr_table;
+ search_state.saved_next_tr_table = next_tr_table;
+ goto try_burst_1;
+
+ case rx_get_burst_ok:
+ /* get_burst succeeded...keep going */
+ break;
+
+ case rx_get_burst_no_more:
+ search_state.test_ret = rx_test_line_finished;
+ search_state.could_have_continued = 1;
+ goto test_do_return;
+
+ case rx_get_burst_error:
+ /* An error... */
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ }
+ search_state.c = *search_state.test_pos.pos;
+ search_state.ifr = this_tr_table + search_state.c;
+ next_tr_table = (struct rx_inx *)search_state.ifr->data;
+ } /* Fast loop through cached transition tables */
+
+ /* Here when we ran out of cached next-char transitions.
+ * So, it will be necessary to do a more expensive
+ * dispatch on the current instruction. The superstate
+ * pointer is allowed to become invalid during next-char
+ * transitions -- now we must bring it up to date.
+ */
+ search_state.super =
+ ((struct rx_superstate *)
+ ((char *)this_tr_table
+ - ((unsigned long)
+ ((struct rx_superstate *)0)->transitions)));
+ }
+
+ /* We've encountered an instruction other than next-char.
+ * Dispatch that instruction:
+ */
+ inx = (int)search_state.ifr->inx;
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp = search_state.super->contents;
+
+ fprintf (stderr, "%d %d>> %s @ %d (%d)", search_state.line_no,
+ search_state.backtrack_depth,
+ inx_names[inx],
+ (search_state.test_pos.pos - search_state.test_pos.string
+ + (test_pos.half == 0 ? 0 : size1)), search_state.c);
+
+ fprintf (stderr, " superstet (rx=%d, &=%x: ",
+ rxb->rx.rx_id, setp);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "\n");
+ }
+#endif
+ switch ((enum rx_opcode)inx)
+ {
+ case rx_do_side_effects:
+
+ /* RX_DO_SIDE_EFFECTS occurs when we cross epsilon
+ * edges associated with parentheses, backreferencing, etc.
+ */
+ {
+ struct rx_distinct_future * df =
+ (struct rx_distinct_future *)search_state.ifr->data_2;
+ struct rx_se_list * el = df->effects;
+ /* Side effects come in lists. This walks down
+ * a list, dispatching.
+ */
+ while (el)
+ {
+ long effect;
+ effect = (long)el->car;
+ if (effect < 0)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp = search_state.super->contents;
+
+ fprintf (stderr, "....%d %d>> %s\n", search_state.line_no,
+ search_state.backtrack_depth,
+ efnames[-effect]);
+ }
+#endif
+ switch ((enum re_side_effects) effect)
+
+ {
+ case re_se_pushback:
+ search_state.ifr = &df->future_frame;
+ if (!search_state.ifr->data)
+ {
+ struct rx_superstate * sup;
+ sup = search_state.super;
+ rx_lock_superstate (rx, sup);
+ if (!rx_handle_cache_miss (&rxb->rx,
+ search_state.super,
+ search_state.c,
+ (search_state.ifr
+ ->data_2)))
+ {
+ rx_unlock_superstate (rx, sup);
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ rx_unlock_superstate (rx, sup);
+ }
+ /* --search_state.test_pos.pos; */
+ search_state.c = 't';
+ search_state.super
+ = ((struct rx_superstate *)
+ ((char *)search_state.ifr->data
+ - (long)(((struct rx_superstate *)0)
+ ->transitions)));
+ goto top_of_cycle;
+ break;
+ case re_se_push0:
+ {
+ struct rx_counter_frame * old_cf
+ = (search_state.counter_stack
+ ? ((struct rx_counter_frame *)
+ search_state.counter_stack->sp)
+ : 0);
+ struct rx_counter_frame * cf;
+ PUSH (search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ cf->tag = re_se_iter;
+ cf->val = 0;
+ cf->inherited_from = 0;
+ cf->cdr = old_cf;
+ break;
+ }
+ case re_se_fail:
+ goto test_do_return;
+ case re_se_begbuf:
+ if (!AT_STRINGS_BEG ())
+ goto test_do_return;
+ break;
+ case re_se_endbuf:
+ if (!AT_STRINGS_END ())
+ goto test_do_return;
+ break;
+ case re_se_wordbeg:
+ if ( LETTER_P (&search_state.test_pos, 1)
+ && ( AT_STRINGS_BEG()
+ || !LETTER_P (&search_state.test_pos, 0)))
+ break;
+ else
+ goto test_do_return;
+ case re_se_wordend:
+ if ( !AT_STRINGS_BEG ()
+ && LETTER_P (&search_state.test_pos, 0)
+ && (AT_STRINGS_END ()
+ || !LETTER_P (&search_state.test_pos, 1)))
+ break;
+ else
+ goto test_do_return;
+ case re_se_wordbound:
+ if (AT_WORD_BOUNDARY (&search_state.test_pos))
+ break;
+ else
+ goto test_do_return;
+ case re_se_notwordbound:
+ if (!AT_WORD_BOUNDARY (&search_state.test_pos))
+ break;
+ else
+ goto test_do_return;
+ case re_se_hat:
+ if (AT_STRINGS_BEG ())
+ {
+ if (rxb->not_bol)
+ goto test_do_return;
+ else
+ break;
+ }
+ else
+ {
+ char pos_c = *search_state.test_pos.pos;
+ if ( (SRCH_TRANSLATE (pos_c)
+ == SRCH_TRANSLATE('\n'))
+ && rxb->newline_anchor)
+ break;
+ else
+ goto test_do_return;
+ }
+ case re_se_dollar:
+ if (AT_STRINGS_END ())
+ {
+ if (rxb->not_eol)
+ goto test_do_return;
+ else
+ break;
+ }
+ else
+ {
+ if ( ( SRCH_TRANSLATE (fetch_char
+ (&search_state.test_pos, 1,
+ app_closure, stop))
+ == SRCH_TRANSLATE ('\n'))
+ && rxb->newline_anchor)
+ break;
+ else
+ goto test_do_return;
+ }
+
+ case re_se_try:
+ /* This is the first side effect in every
+ * expression.
+ *
+ * FOR NO GOOD REASON...get rid of it...
+ */
+ break;
+
+ case re_se_pushpos:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ struct rx_counter_frame * old_cf
+ = (search_state.counter_stack
+ ? ((struct rx_counter_frame *)
+ search_state.counter_stack->sp)
+ : 0);
+ struct rx_counter_frame * cf;
+ PUSH(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ cf->tag = re_se_pushpos;
+ cf->val = urhere;
+ cf->inherited_from = 0;
+ cf->cdr = old_cf;
+ break;
+ }
+
+ case re_se_chkpos:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ struct rx_counter_frame * cf
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (cf->val == urhere)
+ goto test_do_return;
+ cf->val = urhere;
+ break;
+ }
+ break;
+
+ case re_se_poppos:
+ POP(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ break;
+
+
+ case re_se_at_dot:
+ case re_se_syntax:
+ case re_se_not_syntax:
+#ifdef emacs
+ /*
+ * this release lacks emacs support
+ */
+#endif
+ break;
+ case re_se_win:
+ case re_se_lparen:
+ case re_se_rparen:
+ case re_se_backref:
+ case re_se_iter:
+ case re_se_end_iter:
+ case re_se_tv:
+ case re_floogle_flap:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ }
+ else
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "....%d %d>> %s %d %d\n", search_state.line_no,
+ search_state.backtrack_depth,
+ efnames2[rxb->se_params [effect].se],
+ rxb->se_params [effect].op1,
+ rxb->se_params [effect].op2);
+#endif
+ switch (rxb->se_params [effect].se)
+ {
+ case re_se_win:
+ /* This side effect indicates that we've
+ * found a match, though not necessarily the
+ * best match. This is a fancy assignment to
+ * register 0 unless the caller didn't
+ * care about registers. In which case,
+ * this stops the match.
+ */
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+
+ if ( (search_state.best_last_r < 0)
+ || (urhere + 1 > search_state.best_rparen[0]))
+ {
+ /* Record the best known and keep
+ * looking.
+ */
+ int x;
+ for (x = 0; x <= search_state.last_l; ++x)
+ search_state.best_lparen[x] = search_state.lparen[x];
+ search_state.best_last_l = search_state.last_l;
+ for (x = 0; x <= search_state.last_r; ++x)
+ search_state.best_rparen[x] = search_state.rparen[x];
+ search_state.best_rparen[0] = urhere + 1;
+ search_state.best_last_r = search_state.last_r;
+ }
+ /* If we're not reporting the match-length
+ * or other register info, we need look no
+ * further.
+ */
+ if (search_state.first_found)
+ {
+ search_state.test_ret = rx_test_found_first;
+ goto test_do_return;
+ }
+ }
+ break;
+ case re_se_lparen:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+
+ int reg = rxb->se_params [effect].op1;
+#if 0
+ if (reg > search_state.last_l)
+#endif
+ {
+ search_state.lparen[reg] = urhere + 1;
+ /* In addition to making this assignment,
+ * we now know that lower numbered regs
+ * that haven't already been assigned,
+ * won't be. We make sure they're
+ * filled with -1, so they can be
+ * recognized as unassigned.
+ */
+ if (search_state.last_l < reg)
+ while (++search_state.last_l < reg)
+ search_state.lparen[search_state.last_l] = -1;
+ }
+ break;
+ }
+
+ case re_se_rparen:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ int reg = rxb->se_params [effect].op1;
+ search_state.rparen[reg] = urhere + 1;
+ if (search_state.last_r < reg)
+ {
+ while (++search_state.last_r < reg)
+ search_state.rparen[search_state.last_r]
+ = -1;
+ }
+ break;
+ }
+
+ case re_se_backref:
+ {
+ int reg = rxb->se_params [effect].op1;
+ if ( reg > search_state.last_r
+ || search_state.rparen[reg] < 0)
+ goto test_do_return;
+
+ {
+ int backref_status;
+ check_backreference:
+ backref_status
+ = back_check (&search_state.test_pos,
+ search_state.lparen[reg],
+ search_state.rparen[reg],
+ search_state.translate,
+ app_closure,
+ stop);
+ switch (backref_status)
+ {
+ case rx_back_check_continuation:
+ search_state.saved_reg = reg;
+ test_pc = rx_test_backreference_check;
+ goto test_return_continuation;
+ resume_continuation_2:
+ reg = search_state.saved_reg;
+ goto check_backreference;
+ case rx_back_check_fail:
+ /* Fail */
+ goto test_do_return;
+ case rx_back_check_pass:
+ /* pass --
+ * test_pos now advanced to last
+ * char matched by backref
+ */
+ break;
+ }
+ }
+ break;
+ }
+ case re_se_iter:
+ {
+ struct rx_counter_frame * csp
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (csp->val == rxb->se_params[effect].op2)
+ goto test_do_return;
+ else
+ ++csp->val;
+ break;
+ }
+ case re_se_end_iter:
+ {
+ struct rx_counter_frame * csp
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (csp->val < rxb->se_params[effect].op1)
+ goto test_do_return;
+ else
+ {
+ struct rx_counter_frame * source = csp;
+ while (source->inherited_from)
+ source = source->inherited_from;
+ if (!source || !source->cdr)
+ {
+ POP(search_state.counter_stack,
+ sizeof(struct rx_counter_frame));
+ }
+ else
+ {
+ source = source->cdr;
+ csp->val = source->val;
+ csp->tag = source->tag;
+ csp->cdr = 0;
+ csp->inherited_from = source;
+ }
+ }
+ break;
+ }
+ case re_se_tv:
+ /* is a noop */
+ break;
+ case re_se_try:
+ case re_se_pushback:
+ case re_se_push0:
+ case re_se_pushpos:
+ case re_se_chkpos:
+ case re_se_poppos:
+ case re_se_at_dot:
+ case re_se_syntax:
+ case re_se_not_syntax:
+ case re_se_begbuf:
+ case re_se_hat:
+ case re_se_wordbeg:
+ case re_se_wordbound:
+ case re_se_notwordbound:
+ case re_se_wordend:
+ case re_se_endbuf:
+ case re_se_dollar:
+ case re_se_fail:
+ case re_floogle_flap:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ }
+ el = el->cdr;
+ }
+ /* Now the side effects are done,
+ * so get the next instruction.
+ * and move on.
+ */
+ search_state.ifr = &df->future_frame;
+ goto restart;
+ }
+
+ case rx_backtrack_point:
+ {
+ /* A backtrack point indicates that we've reached a
+ * non-determinism in the superstate NFA. This is a
+ * loop that exhaustively searches the possibilities.
+ *
+ * A backtracking strategy is used. We keep track of what
+ * registers are valid so we can erase side effects.
+ *
+ * First, make sure there is some stack space to hold
+ * our state.
+ */
+
+ struct rx_backtrack_frame * bf;
+
+ PUSH(search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+#ifdef RX_DEBUG_0
+ ++search_state.backtrack_depth;
+#endif
+
+ bf = ((struct rx_backtrack_frame *)
+ search_state.backtrack_stack->sp);
+ {
+ bf->stk_super = search_state.super;
+ /* We prevent the current superstate from being
+ * deleted from the superstate cache.
+ */
+ rx_lock_superstate (&rxb->rx, search_state.super);
+#ifdef RX_DEBUG_0
+ bf->stk_search_state.line_no = search_state.line_no;
+#endif
+ bf->stk_c = search_state.c;
+ bf->stk_test_pos = search_state.test_pos;
+ bf->stk_last_l = search_state.last_l;
+ bf->stk_last_r = search_state.last_r;
+ bf->df = ((struct rx_super_edge *)
+ search_state.ifr->data_2)->options;
+ bf->first_df = bf->df;
+ bf->counter_stack_sp = (search_state.counter_stack
+ ? search_state.counter_stack->sp
+ : 0);
+ bf->stk_test_ret = search_state.test_ret;
+ if (rxb->match_regs_on_stack)
+ {
+ int x;
+ regoff_t * stk =
+ (regoff_t *)((char *)bf + sizeof (*bf));
+ for (x = 0; x <= search_state.last_l; ++x)
+ stk[x] = search_state.lparen[x];
+ stk += x;
+ for (x = 0; x <= search_state.last_r; ++x)
+ stk[x] = search_state.rparen[x];
+ }
+ }
+
+ /* Here is a while loop whose body is mainly a function
+ * call and some code to handle a return from that
+ * function.
+ *
+ * From here on for the rest of `case backtrack_point' it
+ * is unsafe to assume that the search_state copies of
+ * variables saved on the backtracking stack are valid
+ * -- so read their values from the backtracking stack.
+ *
+ * This lets us use one generation fewer stack saves in
+ * the call-graph of a search.
+ */
+
+ while_non_det_options:
+#ifdef RX_DEBUG_0
+ ++search_state.lines_found;
+ if (rx_debug_trace)
+ fprintf (stderr, "@@@ %d calls %d @@@\n",
+ search_state.line_no, search_state.lines_found);
+
+ search_state.line_no = search_state.lines_found;
+#endif
+
+ if (bf->df->next_same_super_edge[0] == bf->first_df)
+ {
+ /* This is a tail-call optimization -- we don't recurse
+ * for the last of the possible futures.
+ */
+ search_state.ifr = (bf->df->effects
+ ? &bf->df->side_effects_frame
+ : &bf->df->future_frame);
+
+ rx_unlock_superstate (&rxb->rx, search_state.super);
+ POP(search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+#ifdef RX_DEBUG
+ --search_state.backtrack_depth;
+#endif
+ goto restart;
+ }
+ else
+ {
+ if (search_state.counter_stack)
+ {
+ struct rx_counter_frame * old_cf
+ = ((struct rx_counter_frame *)search_state.counter_stack->sp);
+ struct rx_counter_frame * cf;
+ PUSH(search_state.counter_stack, sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)search_state.counter_stack->sp);
+ cf->tag = old_cf->tag;
+ cf->val = old_cf->val;
+ cf->inherited_from = old_cf;
+ cf->cdr = 0;
+ }
+ /* `Call' this test-match block */
+ search_state.ifr = (bf->df->effects
+ ? &bf->df->side_effects_frame
+ : &bf->df->future_frame);
+ goto recurse_test_match;
+ }
+
+ /* Returns in this block are accomplished by
+ * goto test_do_return. There are two cases.
+ * If there is some search-stack left,
+ * then it is a return from a `recursive' call.
+ * If there is no search-stack left, then
+ * we should return to the fastmap/search loop.
+ */
+
+ test_do_return:
+
+ if (!search_state.backtrack_stack)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "!!! %d bails returning %d !!!\n",
+ search_state.line_no, search_state.test_ret);
+#endif
+
+ /* No more search-stack -- this test is done. */
+ if (search_state.test_ret != rx_test_internal_error)
+ goto return_from_test_match;
+ else
+ goto error_in_testing_match;
+ }
+
+ /* Returning from a recursive call to
+ * the test match block:
+ */
+
+ bf = ((struct rx_backtrack_frame *)
+ search_state.backtrack_stack->sp);
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "+++ %d returns %d (to %d)+++\n",
+ search_state.line_no,
+ search_state.test_ret,
+ bf->stk_search_state.line_no);
+#endif
+
+ while (search_state.counter_stack
+ && (!bf->counter_stack_sp
+ || (bf->counter_stack_sp
+ != search_state.counter_stack->sp)))
+ {
+ POP(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ }
+
+ if (search_state.test_ret == rx_test_internal_error)
+ {
+ POP (search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+
+ /* If a non-longest match was found and that is good
+ * enough, return immediately.
+ */
+ if ( (search_state.test_ret == rx_test_found_first)
+ && search_state.first_found)
+ {
+ rx_unlock_superstate (&rxb->rx, bf->stk_super);
+ POP (search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+ goto test_do_return;
+ }
+
+ search_state.test_ret = bf->stk_test_ret;
+ search_state.last_l = bf->stk_last_l;
+ search_state.last_r = bf->stk_last_r;
+ bf->df = bf->df->next_same_super_edge[0];
+ search_state.super = bf->stk_super;
+ search_state.c = bf->stk_c;
+#ifdef RX_DEBUG_0
+ search_state.line_no = bf->stk_search_state.line_no;
+#endif
+
+ if (rxb->match_regs_on_stack)
+ {
+ int x;
+ regoff_t * stk =
+ (regoff_t *)((char *)bf + sizeof (*bf));
+ for (x = 0; x <= search_state.last_l; ++x)
+ search_state.lparen[x] = stk[x];
+ stk += x;
+ for (x = 0; x <= search_state.last_r; ++x)
+ search_state.rparen[x] = stk[x];
+ }
+
+ {
+ int x;
+ try_burst_2:
+ x = get_burst (&bf->stk_test_pos, app_closure, stop);
+ switch (x)
+ {
+ case rx_get_burst_continuation:
+ search_state.saved_bf = bf;
+ test_pc = rx_test_backtrack_return;
+ goto test_return_continuation;
+ resume_continuation_3:
+ bf = search_state.saved_bf;
+ goto try_burst_2;
+ case rx_get_burst_no_more:
+ /* Since we've been here before, it is some kind of
+ * error that we can't return.
+ */
+ case rx_get_burst_error:
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ case rx_get_burst_ok:
+ break;
+ }
+ }
+ search_state.test_pos = bf->stk_test_pos;
+ goto while_non_det_options;
+ }
+
+
+ case rx_cache_miss:
+ /* Because the superstate NFA is lazily constructed,
+ * and in fact may erode from underneath us, we sometimes
+ * have to construct the next instruction from the hard way.
+ * This invokes one step in the lazy-conversion.
+ */
+ search_state.ifr = rx_handle_cache_miss (&rxb->rx,
+ search_state.super,
+ search_state.c,
+ search_state.ifr->data_2);
+ if (!search_state.ifr)
+ {
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ goto restart;
+
+ case rx_backtrack:
+ /* RX_BACKTRACK means that we've reached the empty
+ * superstate, indicating that match can't succeed
+ * from this point.
+ */
+ goto test_do_return;
+
+ case rx_next_char:
+ case rx_error_inx:
+ case rx_num_instructions:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ goto pseudo_while_1;
+ }
+
+ /* Healthy exits from the test-match loop do a
+ * `goto return_from_test_match' On the other hand,
+ * we might end up here.
+ */
+ error_in_testing_match:
+ test_state = rx_test_error;
+ goto test_returns_to_search;
+
+ /***** fastmap/search loop body
+ * considering the results testing for a match
+ */
+
+ return_from_test_match:
+
+ if (search_state.best_last_l >= 0)
+ {
+ if (regs && (regs->start != search_state.best_lparen))
+ {
+ bcopy (search_state.best_lparen, regs->start,
+ regs->num_regs * sizeof (int));
+ bcopy (search_state.best_rparen, regs->end,
+ regs->num_regs * sizeof (int));
+ }
+ if (regs && !rxb->no_sub)
+ {
+ int q;
+ int bound = (regs->num_regs > search_state.num_regs
+ ? regs->num_regs
+ : search_state.num_regs);
+ regoff_t * s = regs->start;
+ regoff_t * e = regs->end;
+ for (q = search_state.best_last_l + 1; q < bound; ++q)
+ s[q] = e[q] = -1;
+ }
+ search_state.ret_val = search_state.best_lparen[0];
+ test_state = rx_test_ok;
+ goto test_returns_to_search;
+ }
+ else
+ {
+ test_state = rx_test_fail;
+ goto test_returns_to_search;
+ }
+
+ test_return_continuation:
+ search_state.test_match_resume_pt = test_pc;
+ test_state = rx_test_continuation;
+ goto test_returns_to_search;
+ }
+}
+
+
+
+#endif /* RX_WANT_RX_DEFS */
+
+
+
+#else /* RX_WANT_SE_DEFS */
+ /* Integers are used to represent side effects.
+ *
+ * Simple side effects are given negative integer names by these enums.
+ *
+ * Non-negative names are reserved for complex effects.
+ *
+ * Complex effects are those that take arguments. For example,
+ * a register assignment associated with a group is complex because
+ * it requires an argument to tell which group is being matched.
+ *
+ * The integer name of a complex effect is an index into rxb->se_params.
+ */
+
+ RX_DEF_SE(1, re_se_try, = -1) /* Epsilon from start state */
+
+ RX_DEF_SE(0, re_se_pushback, = re_se_try - 1)
+ RX_DEF_SE(0, re_se_push0, = re_se_pushback -1)
+ RX_DEF_SE(0, re_se_pushpos, = re_se_push0 - 1)
+ RX_DEF_SE(0, re_se_chkpos, = re_se_pushpos -1)
+ RX_DEF_SE(0, re_se_poppos, = re_se_chkpos - 1)
+
+ RX_DEF_SE(1, re_se_at_dot, = re_se_poppos - 1) /* Emacs only */
+ RX_DEF_SE(0, re_se_syntax, = re_se_at_dot - 1) /* Emacs only */
+ RX_DEF_SE(0, re_se_not_syntax, = re_se_syntax - 1) /* Emacs only */
+
+ RX_DEF_SE(1, re_se_begbuf, = re_se_not_syntax - 1) /* match beginning of buffer */
+ RX_DEF_SE(1, re_se_hat, = re_se_begbuf - 1) /* match beginning of line */
+
+ RX_DEF_SE(1, re_se_wordbeg, = re_se_hat - 1)
+ RX_DEF_SE(1, re_se_wordbound, = re_se_wordbeg - 1)
+ RX_DEF_SE(1, re_se_notwordbound, = re_se_wordbound - 1)
+
+ RX_DEF_SE(1, re_se_wordend, = re_se_notwordbound - 1)
+ RX_DEF_SE(1, re_se_endbuf, = re_se_wordend - 1)
+
+ /* This fails except at the end of a line.
+ * It deserves to go here since it is typicly one of the last steps
+ * in a match.
+ */
+ RX_DEF_SE(1, re_se_dollar, = re_se_endbuf - 1)
+
+ /* Simple effects: */
+ RX_DEF_SE(1, re_se_fail, = re_se_dollar - 1)
+
+ /* Complex effects. These are used in the 'se' field of
+ * a struct re_se_params. Indexes into the se array
+ * are stored as instructions on nfa edges.
+ */
+ RX_DEF_CPLX_SE(1, re_se_win, = 0)
+ RX_DEF_CPLX_SE(1, re_se_lparen, = re_se_win + 1)
+ RX_DEF_CPLX_SE(1, re_se_rparen, = re_se_lparen + 1)
+ RX_DEF_CPLX_SE(0, re_se_backref, = re_se_rparen + 1)
+ RX_DEF_CPLX_SE(0, re_se_iter, = re_se_backref + 1)
+ RX_DEF_CPLX_SE(0, re_se_end_iter, = re_se_iter + 1)
+ RX_DEF_CPLX_SE(0, re_se_tv, = re_se_end_iter + 1)
+
+#endif
+
+#endif
diff --git a/lib/scanners.c b/lib/scanners.c
new file mode 100644
index 0000000..4f50245
--- /dev/null
+++ b/lib/scanners.c
@@ -0,0 +1,1201 @@
+/* scanners.c -- file & directory name manipulations
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <getopt.h>
+
+#include <config.h>
+#include "system.h"
+#include "error.h"
+#include "strxtra.h"
+#include "token.h"
+#include "alloc.h"
+#include "scanners.h"
+
+#define DEBUG(args) /* printf args */
+
+struct obstack lang_args_obstack;
+struct lang_args *lang_args_default = 0;
+struct lang_args *lang_args_list = 0;
+struct obstack tokens_obstack;
+
+extern void usage __P((void));
+extern char *program_name;
+
+/****************************************************************************/
+
+struct lang_args **parse_language_map_file __P((char const *file_name, struct lang_args **next_ptr));
+char *read_language_map_file __P((char const *file_name));
+
+static struct token *get_token_c __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_c __P((char **argv, int argc));
+static void help_me_c __P((void));
+
+static struct token *get_token_asm __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_asm __P((char **argv, int argc));
+static void help_me_asm __P((void));
+
+static struct token *get_token_text __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_text __P((char **argv, int argc));
+static void help_me_text __P((void));
+
+struct language languages_0[] =
+{
+ { "C", parse_args_c, get_token_c, help_me_c },
+ { "asm", parse_args_asm, get_token_asm, help_me_asm },
+ { "text", parse_args_text, get_token_text, help_me_text },
+};
+struct language const *languages_N = &languages_0[cardinalityof (languages_0)];
+
+void
+language_help_me (void)
+{
+ struct language *lang;
+ for (lang = languages_0; lang < languages_N; lang++)
+ {
+ putchar ('\n');
+ (*lang->lg_help_me) ();
+ }
+}
+
+void
+language_save_arg (char *arg)
+{
+ static char horizontal_space[] = " \t";
+ char *lang_name = strtok (arg, ":");
+ struct language *lang = get_language (lang_name);
+
+ if (lang == 0)
+ {
+ fprintf (stderr, _("unrecognized language: `%s'\n"), lang_name);
+ usage ();
+ }
+ if (lang->lg_argc == 0)
+ lang->lg_argv[lang->lg_argc++] = program_name;
+ lang->lg_argv[lang->lg_argc++] = strtok (0, horizontal_space);
+}
+
+void
+language_getopt ()
+{
+ struct language *lang;
+
+ for (lang = languages_0; lang < languages_N; lang++)
+ if (lang->lg_argc)
+ lang->lg_parse_args (lang->lg_argv, lang->lg_argc);
+}
+
+struct language *
+get_language (char const *lang_name)
+{
+ struct language *lang;
+
+ for (lang = languages_0; lang < languages_N; lang++)
+ if (strequ (lang_name, lang->lg_name))
+ {
+ DEBUG (("lang=%s", lang_name));
+ return lang;
+ }
+ DEBUG (("!lang=%s", lang_name));
+ return 0;
+}
+
+/****************************************************************************/
+
+int lang_args_index = 0;
+
+void
+parse_language_map (char const *file_name)
+{
+ if (obstack_init (&lang_args_obstack) == 0)
+ error (1, 0, _("can't allocate language args obstack: memory exhausted"));
+ if (file_name == 0)
+ file_name = LANGUAGE_MAP;
+ parse_language_map_file (file_name, &lang_args_list);
+}
+
+struct lang_args **
+parse_language_map_file (char const *file_name, struct lang_args **next_ptr)
+{
+ static char white_space[] = " \t\r\n\v\f";
+ static char horizontal_space[] = " \t";
+ static char vertical_space[] = "\r\n\v\f";
+ char *lang_map_buffer;
+ char *lmp;
+
+ lmp = lang_map_buffer = read_language_map_file (file_name);
+ for (;;)
+ {
+ struct lang_args *new_args;
+ struct language const *lang;
+ int pattern_size;
+ char *lang_name;
+ int space;
+
+ /* Skip leading white space and full-line comments */
+ while (*lmp)
+ {
+ lmp += strspn (lmp, white_space);
+ if (*lmp != '#')
+ break;
+ lmp += strcspn (lmp, vertical_space);
+ }
+ if (*lmp == '\0')
+ break;
+
+ pattern_size = strcspn (lmp, white_space);
+ if (pattern_size == 3 && strnequ (lmp, "***", 3))
+ {
+ lmp += pattern_size;
+ lmp += strspn (lmp, horizontal_space);
+ if (isspace (*lmp))
+ next_ptr = parse_language_map_file (LANGUAGE_MAP, next_ptr);
+ else
+ {
+ char *end = lmp + strcspn (lmp, white_space);
+ *end = '\0';
+ next_ptr = parse_language_map_file (lmp, next_ptr);
+ lmp = end + 1;
+ }
+ continue;
+ }
+
+ new_args = OBSTACK_ALLOC (&lang_args_obstack, struct lang_args, 1);
+ if (new_args == 0)
+ error (1, 0, _("can't allocate language args: memory exhausted"));
+ new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp, pattern_size);
+ new_args->la_args_string = 0;
+ lmp += pattern_size;
+ lmp += strspn (lmp, horizontal_space);
+ if (isspace (*lmp))
+ {
+ error (0, 0, _("language name expected following `%s' in file `%s'"),
+ new_args->la_pattern, file_name);
+ obstack_free (&lang_args_obstack, new_args);
+ continue;
+ }
+ lang_name = lmp;
+ lmp += strcspn (lmp, white_space);
+ space = *lmp;
+ *lmp++ = '\0';
+ lmp += strspn (lmp, horizontal_space);
+ lang = new_args->la_language = get_language (lang_name);
+
+ if (*lmp == '#')
+ lmp += strcspn (lmp, vertical_space);
+ else if (!isspace (*lmp) && (space == ' ' || space == '\t'))
+ {
+ int args_size = strcspn (lmp, vertical_space);
+ new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp, args_size);
+ lmp += args_size;
+ }
+ new_args->la_args_digested = (lang
+ ? lang->lg_parse_args (&new_args->la_args_string, 0)
+ : 0);
+ if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2))
+ {
+ if (lang_args_default)
+ {
+ obstack_free (&lang_args_obstack, new_args);
+ continue;
+ }
+ lang_args_default = new_args;
+ DEBUG ((", <default>"));
+ }
+ else
+ {
+ new_args->la_index = lang_args_index++;
+ *next_ptr = new_args;
+ next_ptr = &new_args->la_next;
+ }
+ DEBUG ((", pat=%s\n", new_args->la_pattern));
+ }
+ free (lang_map_buffer);
+ return next_ptr;
+}
+
+char *
+read_language_map_file (char const *file_name)
+{
+ int map_fd;
+ char *lang_map_buffer;
+ struct stat st;
+ int bytes;
+
+ map_fd = open (file_name, O_RDONLY);
+ if (map_fd < 0)
+ error (1, errno, _("can't open language map file `%s'"), file_name);
+ if (fstat (map_fd, &st) < 0)
+ error (1, errno, _("can't get size of map file `%s'"), file_name);
+
+ lang_map_buffer = MALLOC (char, st.st_size + 2);
+ if (lang_map_buffer == 0)
+ error (1, 0, _("can't allocate language args: memory exhausted"));
+ lang_map_buffer[st.st_size] = '\n';
+ lang_map_buffer[st.st_size+1] = '\0';
+
+ bytes = read (map_fd, lang_map_buffer, st.st_size);
+ if (bytes < 0)
+ error (1, errno, _("can't read language map file `%s'"), file_name);
+ /* FIXME: handle interrupted & partial reads */
+ if (bytes != st.st_size)
+ error (1, errno, _("can't read entire language map file `%s'"), file_name);
+
+ close (map_fd);
+ return lang_map_buffer;
+}
+
+/****************************************************************************/
+
+void
+tokenize_args_string (char *args_string, int *argcp, char ***argvp)
+{
+ static char horizontal_space[] = " \t";
+ char **argv_0 = MALLOC (char *, strlen (args_string) / 2);
+ char **argv = argv_0;
+ char *arg;
+
+ *argv++ = program_name;
+ arg = strtok (args_string, horizontal_space);
+ while (arg)
+ {
+ *argv++ = arg;
+ arg = strtok (0, horizontal_space);
+ }
+ *argcp = argv - argv_0;
+ *argvp = REALLOC (argv_0, char *, *argcp);
+}
+
+static void
+set_ushort_ctype (unsigned short *ctype, char const *chars, int type)
+{
+ unsigned short *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_ushort_ctype (unsigned short *ctype, char const *chars, int type)
+{
+ unsigned short *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+static void
+set_uchar_ctype (unsigned char *ctype, char const *chars, int type)
+{
+ unsigned char *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_uchar_ctype (unsigned char *ctype, char const *chars, int type)
+{
+ unsigned char *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+/*************** C & C++ ****************************************************/
+
+#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
+#define DG 0x0002 /* decimal digit [0-9] */
+#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */
+#define C1 0x0008 /* C comment introduction char: / */
+#define C2 0x0010 /* C comment termination char: * */
+#define Q1 0x0020 /* single quote: ' */
+#define Q2 0x0040 /* double quote: " */
+#define ES 0x0080 /* escape char: \ */
+#define NL 0x0100 /* newline: \n */
+#define EF 0x0200 /* EOF */
+#define SK 0x0400 /* Make these chars valid for names within strings */
+#define VH 0x0800 /* VHIL comment introduction char: # */
+#define WS 0x1000 /* White space characters */
+
+/* character class membership macros: */
+
+#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
+#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
+#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
+#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
+#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
+#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
+#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
+
+/* The `BORING' classes should be skipped over until something
+ interesting comes along... */
+
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */
+#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */
+#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */
+#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */
+#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
+
+static unsigned short ctype_c[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
+/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+ /* FIXME: latin-1 */
+};
+
+struct args_c
+{
+ int strip_underscore;
+ unsigned short *ctype;
+};
+
+static struct args_c args_c = { 0, ctype_c };
+
+static struct option const long_options_c[] =
+{
+ { "keep", required_argument, 0, 'k' },
+ { "ignore", required_argument, 0, 'i' },
+ { "strip-underscore", no_argument, 0, 'u' },
+ { 0 }
+};
+
+static void
+help_me_c (void)
+{
+ printf (_("\
+C language:\n\
+ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
+ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
+ -u,--strip-underscore Strip a leading underscore from single-token strings\n\
+"));
+}
+
+static void *
+parse_args_c (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_c *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_c;
+
+ if (argc)
+ args = &args_c;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_c, 1);
+ args->strip_underscore = 0;
+ args->ctype = ctype_c;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "k:i:u",
+ long_options_c, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c)
+ args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c));
+ switch (optc)
+ {
+ case 'k':
+ set_ushort_ctype (args->ctype, optarg, SK);
+ break;
+
+ case 'i':
+ clear_ushort_ctype (args->ctype, optarg, SK);
+ break;
+
+ case 'u':
+ args->strip_underscore = 1;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+
+/* Grab the next identifier from the C source file. This state
+ machine is built for speed, not elegance. */
+
+static struct token *
+get_token_c (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_c *) args)
+ static int new_line = 1;
+ unsigned short *rct = &ARGS->ctype[1];
+ char id_0[BUFSIZ];
+ char *id = id_0;
+ int c;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ if (new_line)
+ {
+ new_line = 0;
+ if (c != '#')
+ goto next;
+ c = getc (in_FILE);
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = id_0;
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (id_0, "include"))
+ {
+ while (c == ' ' || c == '\t')
+ c = getc (in_FILE);
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+ id = id_0;
+ if (c == '"')
+ {
+ c = getc (in_FILE);
+ while (c != '\n' && c != EOF && c != '"')
+ {
+ *id++ = c;
+ c = getc (in_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (c == '<')
+ {
+ c = getc (in_FILE);
+ while (c != '\n' && c != EOF && c != '>')
+ {
+ *id++ = c;
+ c = getc (in_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (ISID1ST (c))
+ {
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *flags = TOK_NAME;
+ }
+ else
+ {
+ while (c != '\n' && c != EOF)
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+ while (c != '\n' && c != EOF)
+ c = getc (in_FILE);
+ new_line = 1;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+ if (strnequ (id_0, "if", 2)
+ || strequ (id_0, "define")
+ || strequ (id_0, "elif") /* ansi C */
+ || strequ (id_0, "undef"))
+ goto next;
+ while ((c != '\n') && (c != EOF))
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (in_FILE);
+
+ switch (c)
+ {
+ case '"':
+ id = id_0;
+ *id++ = c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISQ2BORING (c))
+ *id++ = c = getc (in_FILE);
+ if (c == '\\')
+ {
+ *id++ = c = getc (in_FILE);
+ continue;
+ }
+ else if (c != '"')
+ goto next;
+ break;
+ }
+ *--id = '\0';
+ id = id_0;
+ while (ISSTRKEEP (*id))
+ id++;
+ if (*id || id == id_0)
+ {
+ c = getc (in_FILE);
+ goto next;
+ }
+ *flags = TOK_STRING;
+ if (ARGS->strip_underscore && id_0[0] == '_' && id_0[1])
+ obstack_grow0 (&tokens_obstack, id_0 + 1, id - id_0 - 1);
+ else
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+
+ case '\'':
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISQ1BORING (c))
+ c = getc (in_FILE);
+ if (c == '\\')
+ {
+ c = getc (in_FILE);
+ continue;
+ }
+ else if (c == '\'')
+ c = getc (in_FILE);
+ goto next;
+ }
+
+ case '/':
+ c = getc (in_FILE);
+ if (c == '/')
+ { /* Cope with C++ comment */
+ while (ISCCBORING (c))
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+ else if (c != '*')
+ goto next;
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISCBORING (c))
+ c = getc (in_FILE);
+ c = getc (in_FILE);
+ if (c == '/')
+ {
+ c = getc (in_FILE);
+ goto next;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ }
+
+ case '\n':
+ new_line = 1;
+ goto top;
+
+ default:
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ id = id_0;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else if (ISDIGIT (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ }
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+#undef ARGS
+}
+
+#undef I1
+#undef DG
+#undef NM
+#undef C1
+#undef C2
+#undef Q1
+#undef Q2
+#undef ES
+#undef NL
+#undef EF
+#undef SK
+#undef VH
+#undef WS
+#undef ISDIGIT
+#undef ISNUMBER
+#undef ISEOF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISSTRKEEP
+#undef ISSPACE
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISQ1BORING
+#undef ISQ2BORING
+
+/*************** Assembly ***************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define NL 0x04 /* newline: \n */
+#define CM 0x08 /* assembler comment char: usually # or | */
+#define IG 0x10 /* ignore `identifiers' with these chars in them */
+#define C1 0x20 /* C comment introduction char: / */
+#define C2 0x40 /* C comment termination char: * */
+#define EF 0x80 /* EOF */
+
+/* Assembly Language character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISCOMMENT(c) ((rct)[c] & (CM))
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
+#define ISCBORING(c) (!((rct)[c] & (EF|NL)))
+#define ISCCBORING(c) (!((rct)[c] & (EF|C2)))
+#define ISIGNORE(c) ((rct)[c] & (IG))
+
+static unsigned char ctype_asm[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+
+};
+
+struct args_asm
+{
+ int handle_cpp;
+ int strip_underscore;
+ unsigned char *ctype;
+};
+
+static struct args_asm args_asm = { 1, 0, ctype_asm };
+
+static struct option const long_options_asm[] =
+{
+ { "comment", required_argument, 0, 'c' },
+ { "keep", required_argument, 0, 'k' },
+ { "ignore", required_argument, 0, 'i' },
+ { "strip-underscore", no_argument, 0, 'u' },
+ { "no-cpp", no_argument, 0, 'p' },
+ { 0 }
+};
+
+static void
+help_me_asm (void)
+{
+ printf (_("\
+Assembly language:\n\
+ -c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\
+ -k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\
+ -i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\
+ -u,--strip-underscore Strip a leading underscore from tokens\n\
+ -n,--no-cpp Don't handle C pre-processor directives\n\
+"));
+}
+
+static void *
+parse_args_asm (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_asm *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_asm;
+
+ if (argc)
+ args = &args_asm;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_asm, 1);
+ args->strip_underscore = 0;
+ args->ctype = ctype_asm;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "c:k:i:un",
+ long_options_asm, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i' || optc == 'c')
+ && args->ctype == ctype_asm)
+ args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof (ctype_asm));
+ switch (optc)
+ {
+ case 'c':
+ set_uchar_ctype (args->ctype, optarg, CM);
+ break;
+
+ case 'k':
+ set_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ case 'i':
+ set_uchar_ctype (args->ctype, optarg, I1 | IG);
+ break;
+
+ case 'u':
+ args->strip_underscore = 1;
+ break;
+
+ case 'n':
+ args->handle_cpp = 0;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+/* Grab the next identifier the assembly language source file. This
+ state machine is built for speed, not elegance. */
+
+static struct token *
+get_token_asm (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_asm *) args)
+ static int new_line = 1;
+ unsigned char *rct = &ARGS->ctype[1];
+ char id_0[BUFSIZ];
+ char *id = id_0;
+ int c;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ if (ARGS->handle_cpp > 0 && new_line)
+ {
+ new_line = 0;
+ if (c != '#')
+ goto next;
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = id_0;
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (id_0, "include"))
+ {
+ while (c != '"' && c != '<')
+ c = getc (in_FILE);
+ id = id_0;
+ *id++ = c = getc (in_FILE);
+ while ((c = getc (in_FILE)) != '"' && c != '>')
+ *id++ = c;
+ *flags = TOK_STRING;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+ if (strnequ (id_0, "if", 2)
+ || strequ (id_0, "define")
+ || strequ (id_0, "undef"))
+ goto next;
+ while (c != '\n')
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (in_FILE);
+
+ if (ISCOMMENT (c))
+ {
+ while (ISCBORING (c))
+ c = getc (in_FILE);
+ new_line = 1;
+ }
+
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+
+ if (c == '/')
+ {
+ if ((c = getc (in_FILE)) != '*')
+ goto next;
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISCCBORING (c))
+ c = getc (in_FILE);
+ c = getc (in_FILE);
+ if (c == '/')
+ {
+ c = getc (in_FILE);
+ break;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ }
+ goto next;
+ }
+
+ id = id_0;
+ if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE)))
+ {
+ obstack_grow0 (&tokens_obstack, "_", 1);
+ return obstack_finish (&tokens_obstack);
+ }
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ goto next;
+ }
+
+ *id = '\0';
+ for (id = id_0; *id; id++)
+ if (ISIGNORE (*id))
+ goto next;
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+#undef ARGS
+}
+
+#undef I1
+#undef NM
+#undef NL
+#undef CM
+#undef IG
+#undef C1
+#undef C2
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISCOMMENT
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISIGNORE
+
+/*************** Text *******************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define SQ 0x04 /* squeeze these out (.,',-) */
+#define EF 0x80 /* EOF */
+
+/* Text character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISBORING(c) (!((rct)[c] & (I1|NM|EF)))
+#define ISIDSQUEEZE(c) ((rct)[c] & (SQ))
+
+static unsigned char ctype_text[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*050*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+/*200*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*210*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*220*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*230*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*240*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*250*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*260*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*270*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*300*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*310*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*320*/ I1, I1, I1, I1, I1, I1, I1, 0,
+/*330*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*340*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*350*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*360*/ I1, I1, I1, I1, I1, I1, I1, 0,
+/*370*/ I1, I1, I1, I1, I1, I1, I1, I1,
+};
+
+struct args_text
+{
+ unsigned char *ctype;
+};
+
+static struct args_text args_text = { ctype_text };
+
+static struct option const long_options_text[] =
+{
+ { "include", required_argument, 0, 'i' },
+ { "exclude", required_argument, 0, 'x' },
+ { 0 }
+};
+
+static void
+help_me_text (void)
+{
+ printf (_("\
+Text language:\n\
+ -i,--include=CHAR-CLASS Include characters from CHAR-CLASS in tokens\n\
+ -x,--exclude=CHAR-CLASS Exclude characters from CHAR-CLASS from tokens\n\
+"));
+}
+
+static void *
+parse_args_text (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_text *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_text;
+
+ if (argc)
+ args = &args_text;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_text, 1);
+ args->ctype = ctype_text;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "i:x:",
+ long_options_text, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i') && args->ctype == ctype_text)
+ args->ctype = CLONE (ctype_text, unsigned char, cardinalityof (ctype_text));
+ switch (optc)
+ {
+ case 'i':
+ set_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ case 'x':
+ clear_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+/* Grab the next identifier the text source file. This state machine
+ is built for speed, not elegance. */
+
+static struct token *
+get_token_text (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_text *) args)
+ static char id_0[BUFSIZ];
+ unsigned char *rct = &ARGS->ctype[1];
+ int c;
+ char *id = id_0;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (ISEOF (c))
+ {
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ id = id_0;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ if (!ISIDSQUEEZE (c))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ goto top;
+ }
+
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+#undef ARGS
+}
+
+#undef I1
+#undef NM
+#undef SQ
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISBORING
+#undef ISIDSQUEEZE
diff --git a/lib/scanners.h b/lib/scanners.h
new file mode 100644
index 0000000..3c65a67
--- /dev/null
+++ b/lib/scanners.h
@@ -0,0 +1,67 @@
+/* scanners.h -- defs for interface to scanners.c
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _scanners_h_
+#define _scanners_h_
+
+#include <obstack.h>
+
+#define MAX_LEVELS 5 /* log_8 of the max # of files: log_8 (32768) == 5 */
+
+struct token
+{
+ unsigned short tok_count;
+ unsigned char tok_flags;
+ unsigned char tok_hits[MAX_LEVELS];
+ char tok_name[1];
+};
+
+typedef struct token *(*get_token_func_t) __P((FILE *in_FILE, void const *args, int *flags));
+typedef void *(*parse_args_func_t) __P((char **argv, int argc));
+typedef void (*help_me_func_t) __P((void));
+
+struct language
+{
+ char const *lg_name;
+ parse_args_func_t lg_parse_args;
+ get_token_func_t lg_get_token;
+ help_me_func_t lg_help_me;
+ int lg_argc;
+ char *lg_argv[16];
+};
+
+struct lang_args
+{
+ struct language const *la_language;
+ char const *la_pattern; /* fnmatch(3) pattern */
+ char *la_args_string; /* human-readable scanner args */
+ void const *la_args_digested; /* pre-parsed scanner args */
+ int la_index;
+ struct lang_args *la_next;
+};
+
+extern void language_help_me __P((void));
+extern void language_save_arg __P((char *arg));
+extern struct language *get_language __P((char const *lang_name));
+extern void parse_language_map __P((char const *file_name));
+
+extern struct lang_args *lang_args_default;
+extern struct lang_args *lang_args_list;
+
+extern struct obstack tokens_obstack;
+
+#endif /* not _scanners_h_ */
diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c
new file mode 100644
index 0000000..cd038e3
--- /dev/null
+++ b/lib/strcasecmp.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1987 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and that due credit is given
+ * to the University of California at Berkeley. The name of the University
+ * may not be used to endorse or promote products derived from this
+ * software without specific written prior permission. This software
+ * is provided ``as is'' without express or implied warranty.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcasecmp.c 5.5 (Berkeley) 11/24/87";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * This array is designed for mapping upper and lower case letter
+ * together for a case independent comparison. The mappings are
+p * based upon ascii character sequences.
+ */
+static unsigned char charmap[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+
+int
+strcasecmp(char const *s1, char const *s2)
+{
+ unsigned char u1, u2;
+
+ for (;;) {
+ u1 = (unsigned char) *s1++;
+ u2 = (unsigned char) *s2++;
+ if (charmap[u1] != charmap[u2]) {
+ return charmap[u1] - charmap[u2];
+ }
+ if (u1 == '\0') {
+ return 0;
+ }
+ }
+}
+
diff --git a/lib/strdup.c b/lib/strdup.c
new file mode 100644
index 0000000..2c8aff2
--- /dev/null
+++ b/lib/strdup.c
@@ -0,0 +1,38 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Duplicate S, returning an identical malloc'd string. */
+char *
+DEFUN(strdup, (s), CONST char *s)
+{
+ size_t len = strlen(s) + 1;
+ PTR new = malloc(len);
+
+ if (new == NULL)
+ return NULL;
+
+ memcpy(new, (PTR) s, len);
+
+ return (char *) new;
+}
diff --git a/lib/strndup.c b/lib/strndup.c
new file mode 100644
index 0000000..556215d
--- /dev/null
+++ b/lib/strndup.c
@@ -0,0 +1,37 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Duplicate S, returning an identical malloc'd string. */
+char *
+DEFUN(strndup, (s, n), CONST char *s AND size_t n)
+{
+ char *new = malloc(n + 1);
+
+ if (new == NULL)
+ return NULL;
+
+ memcpy (new, (PTR) s, n);
+ new[n] = '\0';
+
+ return new;
+}
diff --git a/lib/strtok.c b/lib/strtok.c
new file mode 100644
index 0000000..0b95084
--- /dev/null
+++ b/lib/strtok.c
@@ -0,0 +1,73 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <errno.h>
+#include <string.h>
+
+
+static char *olds = NULL;
+
+/* Parse S into tokens separated by characters in DELIM.
+ If S is NULL, the last string strtok() was called with is
+ used. For example:
+ char s[] = "-abc=-def";
+ x = strtok(s, "-"); // x = "abc"
+ x = strtok(NULL, "=-"); // x = "def"
+ x = strtok(NULL, "="); // x = NULL
+ // s = "abc\0-def\0"
+*/
+char *
+DEFUN(strtok, (s, delim),
+ register char *s AND register CONST char *delim)
+{
+ char *token;
+
+ if (s == NULL)
+ {
+ if (olds == NULL)
+ {
+ errno = EINVAL;
+ return NULL;
+ }
+ else
+ s = olds;
+ }
+
+ /* Scan leading delimiters. */
+ s += strspn(s, delim);
+ if (*s == '\0')
+ {
+ olds = NULL;
+ return NULL;
+ }
+
+ /* Find the end of the token. */
+ token = s;
+ s = strpbrk(token, delim);
+ if (s == NULL)
+ /* This token finishes the string. */
+ olds = NULL;
+ else
+ {
+ /* Terminate the token and make OLDS point past it. */
+ *s = '\0';
+ olds = s + 1;
+ }
+ return token;
+}
diff --git a/lib/strxtra.h b/lib/strxtra.h
new file mode 100644
index 0000000..75efa5e
--- /dev/null
+++ b/lib/strxtra.h
@@ -0,0 +1,40 @@
+/* strxtra.h -- convenient string manipulation macros
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _strxtra_h_
+#define _strxtra_h_
+
+#if HAVE_STDLIB_H
+#include <stdlib.h>
+#else /* not HAVE_STDLIB_H */
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+#endif /* not HAVE_STDLIB_H */
+
+#define strequ(s1, s2) (strcmp ((s1), (s2)) == 0)
+#define strnequ(s1, s2, n) (strncmp ((s1), (s2), (n)) == 0)
+#define strcaseequ(s1, s2) (strcasecmp ((s1), (s2)) == 0)
+#define strncaseequ(s1, s2, n) (strncasecmp ((s1), (s2), (n)) == 0)
+#ifdef HAVE_STRDUP
+extern char *strdup ();
+#else
+#define strdup(s) (strcpy (calloc (1, strlen (s) + 1), (s)))
+#endif
+#define strndup(s, n) (strncpy (calloc (1, (n)+1), (s), (n)))
+
+#endif /* not _strxtra_h_ */
diff --git a/lib/system.h b/lib/system.h
new file mode 100644
index 0000000..11d26e8
--- /dev/null
+++ b/lib/system.h
@@ -0,0 +1,45 @@
+/* system-dependent definitions for id-utils programs.
+ Copyright (C) 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Include sys/types.h before this file. */
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#define cardinalityof(a) (sizeof (a) / sizeof ((a)[0]))
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+/* Take care of NLS matters. */
+
+#if HAVE_LOCALE_H
+# include <locale.h>
+#endif
+#if !HAVE_SETLOCALE
+# define setlocale(Category, Locale) /* empty */
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(Text) gettext (Text)
+#else
+# define bindtextdomain(Domain, Directory) /* empty */
+# define textdomain(Domain) /* empty */
+# define _(Text) Text
+#endif
diff --git a/lib/token.c b/lib/token.c
new file mode 100644
index 0000000..b89f1fa
--- /dev/null
+++ b/lib/token.c
@@ -0,0 +1,49 @@
+/* token.c -- misc. access functions for mkid database tokens
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <config.h>
+#include "token.h"
+
+unsigned int
+tok_flags (char const *buf)
+{
+ return *(unsigned char const *)&buf[strlen (buf) + 1];
+}
+
+#define TOK_COUNT_ADDR(buf) ((unsigned char const *)(TOK_FLAGS_ADDR (buf) + 1))
+#define TOK_HITS_ADDR(buf) ((unsigned char const *)(TOK_COUNT_ADDR (buf) + 2))
+
+unsigned short
+tok_count (char const *buf)
+{
+ unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1];
+ unsigned char const *addr = flags + 1;
+ unsigned short count = *addr;
+ if (*flags & TOK_SHORT_COUNT)
+ count += (*++addr << 8);
+ return count;
+}
+
+unsigned char const *
+tok_hits_addr (char const *buf)
+{
+ unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1];
+ unsigned char const *addr = flags + 2;
+ if (*flags & TOK_SHORT_COUNT)
+ addr++;
+ return addr;
+}
diff --git a/lib/token.h b/lib/token.h
new file mode 100644
index 0000000..41c8e28
--- /dev/null
+++ b/lib/token.h
@@ -0,0 +1,39 @@
+/* token.h -- defs for interface to token.c
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _token_h_
+#define _token_h_
+
+/* token flags (struct token is in mkid.c) */
+#define TOK_VECTOR 0x01 /* 1 = hits are stored as a vector
+ 0 = hits are stored as a 8-way tree of bits
+ mkid chooses whichever is more compact.
+ vector is more compact for tokens with few hits */
+#define TOK_NUMBER 0x02 /* occurs as a number */
+#define TOK_NAME 0x04 /* occurs as a name */
+#define TOK_STRING 0x08 /* occurs in a string */
+#define TOK_LITERAL 0x10 /* occurs as a literal */
+#define TOK_COMMENT 0x20 /* occurs in a comment */
+#define TOK_UNUSED_1 0x40
+#define TOK_SHORT_COUNT 0x80 /* count is two bytes */
+
+#define tok_string(buf) (buf)
+unsigned int tok_flags __P((char const *buf));
+unsigned short tok_count __P((char const *buf));
+unsigned char const *tok_hits_addr __P((char const *buf));
+
+#endif /* not _token_h_ */
diff --git a/lib/xgetcwd.c b/lib/xgetcwd.c
new file mode 100644
index 0000000..1c1a7bd
--- /dev/null
+++ b/lib/xgetcwd.c
@@ -0,0 +1,78 @@
+/* xgetcwd.c -- return current directory with unlimited length
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+#include <sys/types.h>
+#include "pathmax.h"
+
+#ifndef HAVE_GETCWD
+char *getwd ();
+#define getcwd(buf, max) getwd (buf)
+#else
+char *getcwd ();
+#endif
+
+/* Amount to increase buffer size by in each try. */
+#define PATH_INCR 32
+
+char *xmalloc ();
+char *xrealloc ();
+void free ();
+
+/* Return the current directory, newly allocated, arbitrarily long.
+ Return NULL and set errno on error. */
+
+char *
+xgetcwd ()
+{
+ char *cwd;
+ char *ret;
+ unsigned path_max;
+
+ errno = 0;
+ path_max = (unsigned) PATH_MAX;
+ path_max += 2; /* The getcwd docs say to do this. */
+
+ cwd = xmalloc (path_max);
+
+ errno = 0;
+ while ((ret = getcwd (cwd, path_max)) == NULL && errno == ERANGE)
+ {
+ path_max += PATH_INCR;
+ cwd = xrealloc (cwd, path_max);
+ errno = 0;
+ }
+
+ if (ret == NULL)
+ {
+ int save_errno = errno;
+ free (cwd);
+ errno = save_errno;
+ return NULL;
+ }
+ return cwd;
+}
diff --git a/lib/xmalloc.c b/lib/xmalloc.c
new file mode 100644
index 0000000..67a774b
--- /dev/null
+++ b/lib/xmalloc.c
@@ -0,0 +1,98 @@
+/* xmalloc.c -- malloc with out of memory checking
+ Copyright (C) 1990, 91, 92, 93, 94 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "error.h"
+
+#if __STDC__
+#define VOID void
+#else
+#define VOID char
+#endif
+
+#include <sys/types.h>
+
+#if STDC_HEADERS
+#include <stdlib.h>
+#else
+VOID *malloc ();
+VOID *realloc ();
+void free ();
+#endif
+
+/* This is for other GNU distributions with internationalized messages.
+ The GNU C Library itself does not yet support such messages. */
+#if HAVE_LIBINTL_H
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+/* Exit value when the requested amount of memory is not available.
+ The caller may set it to some other value. */
+int xmalloc_exit_failure = EXIT_FAILURE;
+
+static VOID *
+fixup_null_alloc (n)
+ size_t n;
+{
+ VOID *p;
+
+ p = 0;
+ if (n == 0)
+ p = malloc ((size_t) 1);
+ if (p == 0)
+ error (xmalloc_exit_failure, 0, gettext ("Memory exhausted"));
+ return p;
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+VOID *
+xmalloc (n)
+ size_t n;
+{
+ VOID *p;
+
+ p = malloc (n);
+ if (p == 0)
+ p = fixup_null_alloc (n);
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc. */
+
+VOID *
+xrealloc (p, n)
+ VOID *p;
+ size_t n;
+{
+ if (p == 0)
+ return xmalloc (n);
+ p = realloc (p, n);
+ if (p == 0)
+ p = fixup_null_alloc (n);
+ return p;
+}
diff --git a/lib/xmalloc.h b/lib/xmalloc.h
new file mode 100644
index 0000000..622b31b
--- /dev/null
+++ b/lib/xmalloc.h
@@ -0,0 +1,12 @@
+#ifndef _xmalloc_h_
+#define _xmalloc_h_ 1
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+void *xmalloc __P((size_t n));
+void *xrealloc __P((void *p, size_t n));
+
+#endif /* _xmalloc_h_ */