summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorGreg McGary <greg@mcgary.org>1997-04-18 06:42:59 +0000
committerGreg McGary <greg@mcgary.org>1997-04-18 06:42:59 +0000
commit40b4b4e4990e67028efb79345fba5fa9e760b522 (patch)
tree34e9ba556c821f7c7f94b99c0cbcbf0cc321b5af /src
parent8c13e09279f361e18173f9e237c454af0ab33299 (diff)
downloadidutils-40b4b4e4990e67028efb79345fba5fa9e760b522.tar.gz
idutils-40b4b4e4990e67028efb79345fba5fa9e760b522.tar.bz2
idutils-40b4b4e4990e67028efb79345fba5fa9e760b522.zip
Initial revision
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am19
-rw-r--r--src/Makefile.in327
-rw-r--r--src/aid3
-rw-r--r--src/ansi2knr.119
-rw-r--r--src/ansi2knr.c439
-rw-r--r--src/defid6
-rw-r--r--src/eid3
-rw-r--r--src/fid.c270
-rw-r--r--src/gid3
-rw-r--r--src/idx.c167
-rw-r--r--src/lid.c1482
-rw-r--r--src/mkid.c794
-rw-r--r--src/pid3
13 files changed, 3535 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..3d1b163
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,19 @@
+## Process this file with automake to create Makefile.in
+
+AUTOMAKE_OPTIONS = ansi2knr
+
+bin_PROGRAMS = mkid lid fid idx
+bin_SCRIPTS = eid aid gid pid defid
+
+EXTRA_DIST = ansi2knr.1 ansi2knr.c $(bin_SCRIPTS)
+
+localedir = $(datadir)/locale
+
+INCLUDES = -I. -I$(srcdir) \
+ -I../lib -I$(top_srcdir)/lib \
+ -I../intl -I$(top_srcdir)/intl \
+ -I.. -I$(top_srcdir)
+DEFS = -DLOCALEDIR=\"$(localedir)\" @DEFS@
+LDADD = @INTLLIBS@ ../lib/libidu.a
+
+$(PROGRAMS): $(LDADD)
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..8ea0b5c
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,327 @@
+# Makefile.in generated automatically by automake 1.0 from Makefile.am
+
+# Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+AUTOMAKE_OPTIONS = ansi2knr
+
+bin_PROGRAMS = mkid lid fid idx
+bin_SCRIPTS = eid aid gid pid defid
+
+EXTRA_DIST = ansi2knr.1 ansi2knr.c $(bin_SCRIPTS)
+
+localedir = $(datadir)/locale
+
+INCLUDES = -I. -I$(srcdir) \
+ -I../lib -I$(top_srcdir)/lib \
+ -I../intl -I$(top_srcdir)/intl \
+ -I.. -I$(top_srcdir)
+DEFS = -DLOCALEDIR=\"$(localedir)\" @DEFS@
+LDADD = @INTLLIBS@ ../lib/libidu.a
+mkinstalldirs = $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+PROGRAMS = $(bin_PROGRAMS)
+
+
+CC = @CC@
+LEX = @LEX@
+YACC = @YACC@
+CPPFLAGS = @CPPFLAGS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+
+COMPILE = $(CC) -c $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
+LINK = $(CC) $(LDFLAGS) -o $@
+
+ANSI2KNR = @ANSI2KNR@
+o = .@U@o
+mkid_SOURCES = mkid.c
+mkid_OBJECTS = mkid$o
+EXTRA_mkid_SOURCES =
+mkid_LDADD = $(LDADD)
+lid_SOURCES = lid.c
+lid_OBJECTS = lid$o
+EXTRA_lid_SOURCES =
+lid_LDADD = $(LDADD)
+fid_SOURCES = fid.c
+fid_OBJECTS = fid$o
+EXTRA_fid_SOURCES =
+fid_LDADD = $(LDADD)
+idx_SOURCES = idx.c
+idx_OBJECTS = idx$o
+EXTRA_idx_SOURCES =
+idx_LDADD = $(LDADD)
+SCRIPTS = $(bin_SCRIPTS)
+
+DIST_COMMON = Makefile.am Makefile.in
+
+
+PACKAGE = @PACKAGE@
+VERSION = @VERSION@
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFOS) $(MANS) $(EXTRA_DIST) $(DATA)
+DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFO_DEPS) $(MANS) $(EXTRA_DIST) $(DATA)
+
+TAR = tar
+SOURCES = mkid.c lid.c fid.c idx.c
+OBJECTS = mkid$o lid$o fid$o idx$o
+
+default: all
+
+
+$(srcdir)/Makefile.in: @MAINT@Makefile.am $(top_srcdir)/configure.in
+ cd $(top_srcdir) && automake $(subdir)/Makefile
+
+Makefile: $(top_builddir)/config.status Makefile.in
+ cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status
+
+mostlyclean-binPROGRAMS:
+
+clean-binPROGRAMS:
+ rm -f $(bin_PROGRAMS)
+
+distclean-binPROGRAMS:
+
+maintainer-clean-binPROGRAMS:
+
+install-binPROGRAMS: $(bin_PROGRAMS)
+ $(mkinstalldirs) $(bindir)
+ list="$(bin_PROGRAMS)"; for p in $$list; do \
+ if test -f $$p; then \
+ $(INSTALL_PROGRAM) $$p $(bindir)/`echo $$p|sed '$(transform)'`; \
+ else :; fi; \
+ done
+
+uninstall-binPROGRAMS:
+ list="$(bin_PROGRAMS)"; for p in $$list; do \
+ rm -f $(bindir)/`echo $$p|sed '$(transform)'`; \
+ done
+
+.c.o:
+ $(COMPILE) $<
+
+mostlyclean-compile:
+ rm -f *.o core
+
+clean-compile:
+
+distclean-compile:
+ rm -f *.tab.c
+
+maintainer-clean-compile:
+
+.c._c:
+ $(ANSI2KNR) $< > $*.tmp && mv $*.tmp $@
+
+._c._o:
+ @echo $(COMPILE) $<
+ @rm -f _$*.c
+ @ln $< _$*.c && $(COMPILE) _$*.c && mv _$*.o $@ && rm _$*.c
+
+.c._o:
+ $(ANSI2KNR) $< > $*.tmp && mv $*.tmp $*._c
+ @echo $(COMPILE) $*._c
+ @rm -f _$*.c
+ @ln $*._c _$*.c && $(COMPILE) _$*.c && mv _$*.o $@ && rm _$*.c
+
+ansi2knr: ansi2knr.o
+ $(LINK) ansi2knr.o $(LIBS)
+
+$(OBJECTS): $(ANSI2KNR)
+ansi2knr.o: $(CONFIG_HEADER)
+
+mostlyclean-kr:
+ rm -f *._o *._c _*.c _*.o
+
+clean-kr:
+
+distclean-kr:
+ rm -f ansi2knr
+
+maintainer-clean-kr:
+$(mkid_OBJECTS): ../config.h
+
+mkid: $(mkid_OBJECTS) $(mkid_DEPENDENCIES)
+ $(LINK) $(mkid_OBJECTS) $(mkid_LDADD) $(LIBS)
+$(lid_OBJECTS): ../config.h
+
+lid: $(lid_OBJECTS) $(lid_DEPENDENCIES)
+ $(LINK) $(lid_OBJECTS) $(lid_LDADD) $(LIBS)
+$(fid_OBJECTS): ../config.h
+
+fid: $(fid_OBJECTS) $(fid_DEPENDENCIES)
+ $(LINK) $(fid_OBJECTS) $(fid_LDADD) $(LIBS)
+$(idx_OBJECTS): ../config.h
+
+idx: $(idx_OBJECTS) $(idx_DEPENDENCIES)
+ $(LINK) $(idx_OBJECTS) $(idx_LDADD) $(LIBS)
+
+install-binSCRIPTS: $(bin_SCRIPTS)
+ $(mkinstalldirs) $(bindir)
+ list="$(bin_SCRIPTS)"; for p in $$list; do \
+ if test -f $$p; then \
+ $(INSTALL_SCRIPT) $$p $(bindir)/`echo $$p|sed '$(transform)'`; \
+ else if test -f $(srcdir)/$$p; then \
+ $(INSTALL_SCRIPT) $(srcdir)/$$p \
+ $(bindir)/`echo $$p|sed '$(transform)'`; \
+ else :; fi; fi; \
+ done
+
+uninstall-binSCRIPTS:
+ list="$(bin_SCRIPTS)"; for p in $$list; do \
+ rm -f $(bindir)/`echo $$p|sed '$(transform)'`; \
+ done
+
+ID: $(HEADERS) $(SOURCES)
+ here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS)
+
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES)
+ here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ rm -f TAGS ID
+
+maintainer-clean-tags:
+
+subdir = src
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+distdir: $(DEP_DISTFILES)
+ @for file in `cd $(srcdir) && echo $(DISTFILES)`; do \
+ test -f $(distdir)/$$file \
+ || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \
+ || cp -p $(srcdir)/$$file $(distdir)/$$file; \
+ done
+fid$o: fid.c system.h idfile.h \
+ hash.h bitops.h filenames.h \
+ misc.h strxtra.h alloc.h \
+ token.h error.h pathmax.h
+idx$o: idx.c alloc.h system.h \
+ misc.h filenames.h scanners.h \
+ idfile.h hash.h pathmax.h
+lid$o: lid.c system.h alloc.h \
+ idfile.h hash.h token.h \
+ bitops.h strxtra.h misc.h \
+ filenames.h error.h pathmax.h
+mkid$o: mkid.c system.h pathmax.h \
+ strxtra.h alloc.h idfile.h \
+ hash.h token.h bitops.h \
+ misc.h filenames.h scanners.h \
+ error.h
+
+info:
+
+dvi:
+
+check: all
+
+installcheck:
+
+install-exec: install-binPROGRAMS install-binSCRIPTS
+
+install-data:
+
+install: install-exec install-data all
+ @:
+
+uninstall: uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+all: $(PROGRAMS) $(SCRIPTS) Makefile
+
+install-strip:
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
+installdirs:
+ $(mkinstalldirs) $(bindir) $(bindir)
+
+
+mostlyclean-generic:
+ test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+ test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ rm -f Makefile $(DISTCLEANFILES)
+ rm -f config.cache config.log $(CONFIG_HEADER) stamp-h
+
+maintainer-clean-generic:
+ test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+ test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+mostlyclean: mostlyclean-binPROGRAMS mostlyclean-compile mostlyclean-kr \
+ mostlyclean-tags mostlyclean-generic
+
+clean: clean-binPROGRAMS clean-compile clean-kr clean-tags \
+ clean-generic mostlyclean
+
+distclean: distclean-binPROGRAMS distclean-compile distclean-kr \
+ distclean-tags distclean-generic clean
+ rm -f config.status
+
+maintainer-clean: maintainer-clean-binPROGRAMS maintainer-clean-compile \
+ maintainer-clean-kr maintainer-clean-tags \
+ maintainer-clean-generic distclean
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+.PHONY: default mostlyclean-binPROGRAMS distclean-binPROGRAMS \
+clean-binPROGRAMS maintainer-clean-binPROGRAMS uninstall-binPROGRAMS \
+install-binPROGRAMS mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-kr distclean-kr clean-kr \
+maintainer-clean-kr uninstall-binSCRIPTS install-binSCRIPTS tags \
+mostlyclean-tags distclean-tags clean-tags maintainer-clean-tags \
+distdir info dvi check installcheck install-exec install-data install \
+uninstall all installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+$(PROGRAMS): $(LDADD)
+.SUFFIXES:
+.SUFFIXES: .c .o ._c ._o
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/aid b/src/aid
new file mode 100644
index 0000000..7d2c79c
--- /dev/null
+++ b/src/aid
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+lid -E "$@"
diff --git a/src/ansi2knr.1 b/src/ansi2knr.1
new file mode 100644
index 0000000..434ce8f
--- /dev/null
+++ b/src/ansi2knr.1
@@ -0,0 +1,19 @@
+.TH ANSI2KNR 1 "31 December 1990"
+.SH NAME
+ansi2knr \- convert ANSI C to Kernighan & Ritchie C
+.SH SYNOPSIS
+.I ansi2knr
+input_file output_file
+.SH DESCRIPTION
+If no output_file is supplied, output goes to stdout.
+.br
+There are no error messages.
+.sp
+.I ansi2knr
+recognizes functions by seeing a non-keyword identifier at the left margin, followed by a left parenthesis, with a right parenthesis as the last character on the line. It will recognize a multi-line header if the last character on each line but the last is a left parenthesis or comma. These algorithms ignore whitespace and comments, except that the function name must be the first thing on the line.
+.sp
+The following constructs will confuse it:
+.br
+ - Any other construct that starts at the left margin and follows the above syntax (such as a macro or function call).
+.br
+ - Macros that tinker with the syntax of the function header.
diff --git a/src/ansi2knr.c b/src/ansi2knr.c
new file mode 100644
index 0000000..9bcc4ad
--- /dev/null
+++ b/src/ansi2knr.c
@@ -0,0 +1,439 @@
+/* Copyright (C) 1989, 1991, 1993, 1994 Aladdin Enterprises. All rights reserved. */
+
+/* ansi2knr.c */
+/* Convert ANSI function declarations to K&R syntax */
+
+/*
+ansi2knr is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY. No author or distributor accepts responsibility
+to anyone for the consequences of using it or for whether it serves any
+particular purpose or works at all, unless he says so in writing. Refer
+to the GNU General Public License for full details.
+
+Everyone is granted permission to copy, modify and redistribute
+ansi2knr, but only under the conditions described in the GNU
+General Public License. A copy of this license is supposed to have been
+given to you along with ansi2knr so you can know your rights and
+responsibilities. It should be in a file named COPYLEFT. Among other
+things, the copyright notice and this notice must be preserved on all
+copies.
+*/
+
+/*
+ * Usage:
+ ansi2knr [--varargs] input_file [output_file]
+ * If no output_file is supplied, output goes to stdout.
+ * There are no error messages.
+ *
+ * ansi2knr recognizes function definitions by seeing a non-keyword
+ * identifier at the left margin, followed by a left parenthesis,
+ * with a right parenthesis as the last character on the line.
+ * It will recognize a multi-line header provided that the last character
+ * of the last line of the header is a right parenthesis,
+ * and no intervening line ends with a left brace or a semicolon.
+ * These algorithms ignore whitespace and comments, except that
+ * the function name must be the first thing on the line.
+ * The following constructs will confuse it:
+ * - Any other construct that starts at the left margin and
+ * follows the above syntax (such as a macro or function call).
+ * - Macros that tinker with the syntax of the function header.
+ *
+ * If the --varargs switch is supplied, ansi2knr will attempt to
+ * convert a ... argument to va_alist and va_dcl. If this switch is not
+ * supplied, ansi2knr will simply drop any such arguments.
+ */
+
+/*
+ * The original and principal author of ansi2knr is L. Peter Deutsch
+ * <ghost@aladdin.com>. Other authors are noted in the change history
+ * that follows (in reverse chronological order):
+ lpd 94-10-10 removed CONFIG_BROKETS conditional
+ lpd 94-07-16 added some conditionals to help GNU `configure',
+ suggested by Francois Pinard <pinard@iro.umontreal.ca>;
+ properly erase prototype args in function parameters,
+ contributed by Jim Avera <jima@netcom.com>;
+ correct error in writeblanks (it shouldn't erase EOLs)
+ lpd 89-xx-xx original version
+ */
+
+/* Most of the conditionals here are to make ansi2knr work with */
+/* the GNU configure machinery. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+#ifdef HAVE_CONFIG_H
+
+/*
+ For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
+ This will define HAVE_CONFIG_H and so, activate the following lines.
+ */
+
+# if STDC_HEADERS || HAVE_STRING_H
+# include <string.h>
+# else
+# include <strings.h>
+# endif
+
+#else /* not HAVE_CONFIG_H */
+
+/*
+ Without AC_CONFIG_HEADER, merely use <string.h> as in the original
+ Ghostscript distribution. This loses on older BSD systems.
+ */
+
+# include <string.h>
+
+#endif /* not HAVE_CONFIG_H */
+
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+#else
+/*
+ malloc and free should be declared in stdlib.h,
+ but if you've got a K&R compiler, they probably aren't.
+ */
+char *malloc();
+void free();
+#endif
+
+/* Scanning macros */
+#define isidchar(ch) (isalnum(ch) || (ch) == '_')
+#define isidfirstchar(ch) (isalpha(ch) || (ch) == '_')
+
+/* Forward references */
+char *skipspace();
+void writeblanks();
+int test1();
+int convert1();
+
+/* The main program */
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{ FILE *in, *out;
+#define bufsize 5000 /* arbitrary size */
+ char *buf;
+ char *line;
+ int convert_varargs = 0;
+ if ( argc > 1 && argv[1][0] == '-' )
+ { if ( !strcmp(argv[1], "--varargs") )
+ { convert_varargs = 1;
+ argc--;
+ argv++;
+ }
+ else
+ { fprintf(stderr, "Unrecognized switch: %s\n", argv[1]);
+ exit(1);
+ }
+ }
+ switch ( argc )
+ {
+ default:
+ printf("Usage: ansi2knr [--varargs] input_file [output_file]\n");
+ exit(0);
+ case 2:
+ out = stdout;
+ break;
+ case 3:
+ out = fopen(argv[2], "w");
+ if ( out == NULL )
+ { fprintf(stderr, "Cannot open output file %s\n", argv[2]);
+ exit(1);
+ }
+ }
+ in = fopen(argv[1], "r");
+ if ( in == NULL )
+ { fprintf(stderr, "Cannot open input file %s\n", argv[1]);
+ exit(1);
+ }
+ fprintf(out, "#line 1 \"%s\"\n", argv[1]);
+ buf = malloc(bufsize);
+ line = buf;
+ while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
+ { switch ( test1(buf) )
+ {
+ case 2: /* a function header */
+ convert1(buf, out, 1, convert_varargs);
+ break;
+ case 1: /* a function */
+ convert1(buf, out, 0, convert_varargs);
+ break;
+ case -1: /* maybe the start of a function */
+ line = buf + strlen(buf);
+ if ( line != buf + (bufsize - 1) ) /* overflow check */
+ continue;
+ /* falls through */
+ default: /* not a function */
+ fputs(buf, out);
+ break;
+ }
+ line = buf;
+ }
+ if ( line != buf ) fputs(buf, out);
+ free(buf);
+ fclose(out);
+ fclose(in);
+ return 0;
+}
+
+/* Skip over space and comments, in either direction. */
+char *
+skipspace(p, dir)
+ register char *p;
+ register int dir; /* 1 for forward, -1 for backward */
+{ for ( ; ; )
+ { while ( isspace(*p) ) p += dir;
+ if ( !(*p == '/' && p[dir] == '*') ) break;
+ p += dir; p += dir;
+ while ( !(*p == '*' && p[dir] == '/') )
+ { if ( *p == 0 ) return p; /* multi-line comment?? */
+ p += dir;
+ }
+ p += dir; p += dir;
+ }
+ return p;
+}
+
+/*
+ * Write blanks over part of a string.
+ * Don't overwrite end-of-line characters.
+ */
+void
+writeblanks(start, end)
+ char *start;
+ char *end;
+{ char *p;
+ for ( p = start; p < end; p++ )
+ if ( *p != '\r' && *p != '\n' ) *p = ' ';
+}
+
+/*
+ * Test whether the string in buf is a function definition.
+ * The string may contain and/or end with a newline.
+ * Return as follows:
+ * 0 - definitely not a function definition;
+ * 1 - definitely a function definition;
+ * 2 - definitely a function prototype (NOT USED);
+ * -1 - may be the beginning of a function definition,
+ * append another line and look again.
+ * The reason we don't attempt to convert function prototypes is that
+ * Ghostscript's declaration-generating macros look too much like
+ * prototypes, and confuse the algorithms.
+ */
+int
+test1(buf)
+ char *buf;
+{ register char *p = buf;
+ char *bend;
+ char *endfn;
+ int contin;
+ if ( !isidfirstchar(*p) )
+ return 0; /* no name at left margin */
+ bend = skipspace(buf + strlen(buf) - 1, -1);
+ switch ( *bend )
+ {
+ case ';': contin = 0 /*2*/; break;
+ case ')': contin = 1; break;
+ case '{': return 0; /* not a function */
+ default: contin = -1;
+ }
+ while ( isidchar(*p) ) p++;
+ endfn = p;
+ p = skipspace(p, 1);
+ if ( *p++ != '(' )
+ return 0; /* not a function */
+ p = skipspace(p, 1);
+ if ( *p == ')' )
+ return 0; /* no parameters */
+ /* Check that the apparent function name isn't a keyword. */
+ /* We only need to check for keywords that could be followed */
+ /* by a left parenthesis (which, unfortunately, is most of them). */
+ { static char *words[] =
+ { "asm", "auto", "case", "char", "const", "double",
+ "extern", "float", "for", "if", "int", "long",
+ "register", "return", "short", "signed", "sizeof",
+ "static", "switch", "typedef", "unsigned",
+ "void", "volatile", "while", 0
+ };
+ char **key = words;
+ char *kp;
+ int len = endfn - buf;
+ while ( (kp = *key) != 0 )
+ { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
+ return 0; /* name is a keyword */
+ key++;
+ }
+ }
+ return contin;
+}
+
+/* Convert a recognized function definition or header to K&R syntax. */
+int
+convert1(buf, out, header, convert_varargs)
+ char *buf;
+ FILE *out;
+ int header; /* Boolean */
+ int convert_varargs; /* Boolean */
+{ char *endfn;
+ register char *p;
+ char **breaks;
+ unsigned num_breaks = 2; /* for testing */
+ char **btop;
+ char **bp;
+ char **ap;
+ char *vararg = 0;
+ /* Pre-ANSI implementations don't agree on whether strchr */
+ /* is called strchr or index, so we open-code it here. */
+ for ( endfn = buf; *(endfn++) != '('; ) ;
+top: p = endfn;
+ breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
+ if ( breaks == 0 )
+ { /* Couldn't allocate break table, give up */
+ fprintf(stderr, "Unable to allocate break table!\n");
+ fputs(buf, out);
+ return -1;
+ }
+ btop = breaks + num_breaks * 2 - 2;
+ bp = breaks;
+ /* Parse the argument list */
+ do
+ { int level = 0;
+ char *lp = NULL;
+ char *rp;
+ char *end = NULL;
+ if ( bp >= btop )
+ { /* Filled up break table. */
+ /* Allocate a bigger one and start over. */
+ free((char *)breaks);
+ num_breaks <<= 1;
+ goto top;
+ }
+ *bp++ = p;
+ /* Find the end of the argument */
+ for ( ; end == NULL; p++ )
+ { switch(*p)
+ {
+ case ',':
+ if ( !level ) end = p;
+ break;
+ case '(':
+ if ( !level ) lp = p;
+ level++;
+ break;
+ case ')':
+ if ( --level < 0 ) end = p;
+ else rp = p;
+ break;
+ case '/':
+ p = skipspace(p, 1) - 1;
+ break;
+ default:
+ ;
+ }
+ }
+ /* Erase any embedded prototype parameters. */
+ if ( lp )
+ writeblanks(lp + 1, rp);
+ p--; /* back up over terminator */
+ /* Find the name being declared. */
+ /* This is complicated because of procedure and */
+ /* array modifiers. */
+ for ( ; ; )
+ { p = skipspace(p - 1, -1);
+ switch ( *p )
+ {
+ case ']': /* skip array dimension(s) */
+ case ')': /* skip procedure args OR name */
+ { int level = 1;
+ while ( level )
+ switch ( *--p )
+ {
+ case ']': case ')': level++; break;
+ case '[': case '(': level--; break;
+ case '/': p = skipspace(p, -1) + 1; break;
+ default: ;
+ }
+ }
+ if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
+ { /* We found the name being declared */
+ while ( !isidfirstchar(*p) )
+ p = skipspace(p, 1) + 1;
+ goto found;
+ }
+ break;
+ default: goto found;
+ }
+ }
+found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
+ { if ( convert_varargs )
+ { *bp++ = "va_alist";
+ vararg = p-2;
+ }
+ else
+ { p++;
+ if ( bp == breaks + 1 ) /* sole argument */
+ writeblanks(breaks[0], p);
+ else
+ writeblanks(bp[-1] - 1, p);
+ bp--;
+ }
+ }
+ else
+ { while ( isidchar(*p) ) p--;
+ *bp++ = p+1;
+ }
+ p = end;
+ }
+ while ( *p++ == ',' );
+ *bp = p;
+ /* Make a special check for 'void' arglist */
+ if ( bp == breaks+2 )
+ { p = skipspace(breaks[0], 1);
+ if ( !strncmp(p, "void", 4) )
+ { p = skipspace(p+4, 1);
+ if ( p == breaks[2] - 1 )
+ { bp = breaks; /* yup, pretend arglist is empty */
+ writeblanks(breaks[0], p + 1);
+ }
+ }
+ }
+ /* Put out the function name and left parenthesis. */
+ p = buf;
+ while ( p != endfn ) putc(*p, out), p++;
+ /* Put out the declaration. */
+ if ( header )
+ { fputs(");", out);
+ for ( p = breaks[0]; *p; p++ )
+ if ( *p == '\r' || *p == '\n' )
+ putc(*p, out);
+ }
+ else
+ { for ( ap = breaks+1; ap < bp; ap += 2 )
+ { p = *ap;
+ while ( isidchar(*p) )
+ putc(*p, out), p++;
+ if ( ap < bp - 1 )
+ fputs(", ", out);
+ }
+ fputs(") ", out);
+ /* Put out the argument declarations */
+ for ( ap = breaks+2; ap <= bp; ap += 2 )
+ (*ap)[-1] = ';';
+ if ( vararg != 0 )
+ { *vararg = 0;
+ fputs(breaks[0], out); /* any prior args */
+ fputs("va_dcl", out); /* the final arg */
+ fputs(bp[0], out);
+ }
+ else
+ fputs(breaks[0], out);
+ }
+ free((char *)breaks);
+ return 0;
+}
diff --git a/src/defid b/src/defid
new file mode 100644
index 0000000..5c04bc6
--- /dev/null
+++ b/src/defid
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+for sym
+do
+ gid $sym |egrep '(}[ ]*|:[ ]*#[ ]*define[ ]+)'$sym'|'typedef\>.*[ ]+'$sym
+done
diff --git a/src/eid b/src/eid
new file mode 100644
index 0000000..7d2c79c
--- /dev/null
+++ b/src/eid
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+lid -E "$@"
diff --git a/src/fid.c b/src/fid.c
new file mode 100644
index 0000000..4572ac8
--- /dev/null
+++ b/src/fid.c
@@ -0,0 +1,270 @@
+/* fid.c -- list all tokens in the given file(s)
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <getopt.h>
+
+#include <config.h>
+#include "system.h"
+#include "idfile.h"
+#include "bitops.h"
+#include "filenames.h"
+#include "misc.h"
+#include "strxtra.h"
+#include "alloc.h"
+#include "token.h"
+#include "error.h"
+#include "pathmax.h"
+
+int get_file_index __P((char *file_name));
+int is_hit __P((unsigned char const *hits, int file_number));
+int is_hit_1 __P((unsigned char const **hits, int level, int file_number));
+void skip_hits __P((unsigned char const **hits, int level));
+
+struct idhead idh;
+int tree8_levels;
+
+/* The name this program was run with. */
+
+char const *program_name;
+
+/* If nonzero, display usage information and exit. */
+
+static int show_help;
+
+/* If nonzero, print the version on standard output then exit. */
+
+static int show_version;
+
+/* The file name of the ID database. */
+
+char const *id_file_name;
+
+struct file_link *cw_dlink;
+struct file_link **members_0;
+unsigned int bits_vec_size;
+char *hits_buf;
+
+static struct option const long_options[] =
+{
+ { "file", required_argument, 0, 'f' },
+ { "help", no_argument, &show_help, 1 },
+ { "version", no_argument, &show_version, 1 },
+ { 0 }
+};
+
+void
+usage (void)
+{
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ exit (1);
+}
+
+static void
+help_me (void)
+{
+ printf (_("\
+Usage: %s [OPTION] FILENAME [FILENAME2]\n"),
+ program_name);
+ printf (_("\
+List identifiers that occur in FILENAME, or if FILENAME2 is\n\
+also given list the identifiers that occur in both files.\n\
+\n\
+ -f, --file=FILE file name of ID database\n\
+"));
+ exit (0);
+}
+
+int
+main (int argc, char **argv)
+{
+ int optc;
+ int index_1 = -1;
+ int index_2 = -1;
+
+ program_name = argv[0];
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "f:",
+ long_options, (int *) 0);
+ if (optc < 0)
+ break;
+ switch (optc)
+ {
+ case 0:
+ break;
+
+ case 'f':
+ id_file_name = optarg;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (show_version)
+ {
+ printf ("%s - %s\n", program_name, PACKAGE_VERSION);
+ exit (0);
+ }
+
+ if (show_help)
+ help_me ();
+
+ /* Look for the ID database up the tree */
+ id_file_name = look_up (id_file_name);
+ if (id_file_name == 0)
+ error (1, errno, _("can't locate `ID'"));
+
+ init_idh_obstacks (&idh);
+ init_idh_tables (&idh);
+
+ cw_dlink = get_current_dir_link ();
+
+ /* Determine absolute name of the directory name to which database
+ constituent files are relative. */
+ members_0 = read_id_file (id_file_name, &idh);
+ bits_vec_size = (idh.idh_files + 7) / 4; /* more than enough */
+ tree8_levels = tree8_count_levels (idh.idh_files);
+
+ argc -= optind;
+ argv += optind;
+ if (argc < 1)
+ {
+ error (0, 0, _("no file name arguments"));
+ usage ();
+ }
+ if (argc > 2)
+ {
+ error (0, 0, _("too many file name arguments"));
+ usage ();
+ }
+
+ index_1 = get_file_index ((argc--, *argv++));
+ if (argc)
+ index_2 = get_file_index ((argc--, *argv++));
+
+ if (index_1 < 0)
+ return 1;
+
+ hits_buf = xmalloc (idh.idh_buf_size);
+ fseek (idh.idh_FILE, idh.idh_tokens_offset, SEEK_SET);
+ {
+ int i;
+ for (i = 0; i < idh.idh_tokens; i++)
+ {
+ unsigned char const *hits;
+
+ gets_past_00 (hits_buf, idh.idh_FILE);
+ hits = tok_hits_addr (hits_buf);
+ if (is_hit (hits, index_1) && (index_2 < 0 || is_hit (hits, index_2)))
+ printf ("%s\n", tok_string (hits_buf));
+ }
+ }
+
+ return 0;
+}
+
+int
+get_file_index (char *file_name)
+{
+ struct file_link **members;
+ struct file_link **end = &members_0[idh.idh_files];
+ struct file_link *fn_flink = 0;
+ int has_slash = (strchr (file_name, '/') != 0);
+ int file_name_length = strlen (file_name);
+ int index = -1;
+
+ if (strstr (file_name, "./"))
+ fn_flink = parse_file_name (file_name, cw_dlink);
+
+ for (members = members_0; members < end; members++)
+ {
+ struct file_link *flink = *members;
+ if (fn_flink)
+ {
+ if (fn_flink != flink)
+ continue;
+ }
+ else if (has_slash)
+ {
+ char buf[PATH_MAX];
+ int member_length;
+ maybe_relative_path (buf, flink, cw_dlink);
+ member_length = strlen (buf);
+ if (file_name_length > member_length
+ || !strequ (&buf[member_length - file_name_length], file_name))
+ continue;
+ }
+ else if (!strequ (flink->fl_name, file_name))
+ continue;
+ if (index >= 0)
+ {
+ error (0, 0, _("`%s' is ambiguous"), file_name);
+ return;
+ }
+ index = members - members_0;
+ }
+ if (index < 0)
+ error (0, 0, _("`%s' not found"), file_name);
+ return index;
+}
+
+int
+is_hit (unsigned char const *hits, int file_number)
+{
+ return is_hit_1 (&hits, tree8_levels, file_number);
+}
+
+int
+is_hit_1 (unsigned char const **hits, int level, int file_number)
+{
+ int file_hit = 1 << ((file_number >> (3 * --level)) & 7);
+ int hit = *(*hits)++;
+ int bit;
+
+ if (!(file_hit & hit))
+ return 0;
+ if (level == 0)
+ return 1;
+
+ for (bit = 1; (bit < file_hit) && (bit & 0xff); bit <<= 1)
+ {
+ if (hit & bit)
+ skip_hits (hits, level);
+ }
+ return is_hit_1 (hits, level, file_number);
+}
+
+void
+skip_hits (unsigned char const **hits, int level)
+{
+ int hit = *(*hits)++;
+ int bit;
+
+ if (--level == 0)
+ return;
+ for (bit = 1; bit & 0xff; bit <<= 1)
+ {
+ if (hit & bit)
+ skip_hits (hits, level);
+ }
+}
diff --git a/src/gid b/src/gid
new file mode 100644
index 0000000..7d2c79c
--- /dev/null
+++ b/src/gid
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+lid -E "$@"
diff --git a/src/idx.c b/src/idx.c
new file mode 100644
index 0000000..60ca5fc
--- /dev/null
+++ b/src/idx.c
@@ -0,0 +1,167 @@
+/* idx.c -- simple interface for testing scanners scanners
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+
+#include <config.h>
+#include "alloc.h"
+#include "system.h"
+#include "misc.h"
+#include "filenames.h"
+#include "scanners.h"
+#include "idfile.h"
+#include "pathmax.h"
+
+void scan_files __P((struct idhead *idhp));
+void scan_member_file __P((struct member_file const *member));
+
+char const *program_name;
+char *lang_map_file_name = 0;
+int show_version = 0;
+int show_help = 0;
+struct idhead idh;
+
+void
+usage (void)
+{
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ exit (1);
+}
+
+static struct option const long_options[] =
+{
+ { "lang-map", required_argument, 0, 'm' },
+ { "help", no_argument, &show_help, 1 },
+ { "version", no_argument, &show_version, 1 },
+ { 0 }
+};
+
+static void
+help_me (void)
+{
+ printf (_("\
+Usage: %s [OPTION]... [FILE]...\n"),
+ program_name);
+
+ printf (_("\
+Print all tokens found in a source file.\n\
+ -m, --lang-map=FILE use FILE to file names onto source language\n\
+ --help display this help and exit\n\
+ --version output version information and exit\n\
+\n\
+The following arguments apply to the language-specific scanners:\n\
+"));
+ language_help_me ();
+ exit (0);
+}
+
+int
+main (int argc, char **argv)
+{
+ char *arg;
+
+ program_name = argv[0];
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "o:f:i:x:l:m:uvs",
+ long_options, (int *) 0);
+ if (optc < 0)
+ break;
+ switch (optc)
+ {
+ case 0:
+ break;
+
+ case 'm':
+ lang_map_file_name = optarg;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (show_version)
+ {
+ printf ("%s - %s\n", program_name, PACKAGE_VERSION);
+ exit (0);
+ }
+
+ if (show_help)
+ help_me ();
+
+ argc -= optind;
+ argv += optind;
+
+ init_idh_obstacks (&idh);
+ init_idh_tables (&idh);
+ parse_language_map (lang_map_file_name);
+ {
+ struct file_link *cwd_link = get_current_dir_link ();
+ while (argc--)
+ walk_flink (parse_file_name (*argv++, cwd_link), 0);
+ mark_member_file_links (&idh);
+ obstack_init (&tokens_obstack);
+ scan_files (&idh);
+ }
+
+ return 0;
+}
+
+void
+scan_files (struct idhead *idhp)
+{
+ struct member_file **members_0
+ = (struct member_file **) hash_dump (&idhp->idh_member_file_table,
+ 0, member_file_qsort_compare);
+ struct member_file **end = &members_0[idhp->idh_member_file_table.ht_fill];
+ struct member_file **members;
+
+ for (members = members_0; members < end; members++)
+ scan_member_file (*members);
+ free (members_0);
+}
+
+void
+scan_member_file (struct member_file const *member)
+{
+ struct lang_args const *lang_args = member->mf_lang_args;
+ struct language const *lang = lang_args->la_language;
+ get_token_func_t get_token = lang->lg_get_token;
+ struct file_link *flink = member->mf_link;
+ FILE *source_FILE;
+ size_t bytes;
+
+ chdir_to_link (flink->fl_parent);
+ source_FILE = open_source_FILE (flink->fl_name);
+ if (source_FILE)
+ {
+ void const *args = lang_args->la_args_digested;
+ int flags;
+ struct token *token;
+
+ while ((token = (*get_token) (source_FILE, args, &flags)) != NULL)
+ {
+ puts (token->tok_name);
+ obstack_free (&tokens_obstack, token);
+ }
+ close_source_FILE (source_FILE);
+ }
+}
diff --git a/src/lid.c b/src/lid.c
new file mode 100644
index 0000000..0768676
--- /dev/null
+++ b/src/lid.c
@@ -0,0 +1,1482 @@
+/* lid.c -- primary query interface for mkid database
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <assert.h>
+#include <limits.h>
+#if WITH_REGEX
+# include <regex.h>
+#else
+# include <rx.h>
+#endif
+
+#include <config.h>
+#include <getopt.h>
+#include "system.h"
+#include "alloc.h"
+#include "idfile.h"
+#include "token.h"
+#include "bitops.h"
+#include "strxtra.h"
+#include "misc.h"
+#include "filenames.h"
+#include "error.h"
+#include "pathmax.h"
+
+typedef void (*report_func_t) __P((char const *name, struct file_link **flinkv));
+typedef int (*query_func_t) __P((char const *arg, report_func_t));
+
+unsigned char *tree8_to_bits __P((unsigned char *bits_vec, unsigned char const *hits_tree8));
+void tree8_to_bits_1 __P((unsigned char **bits_vec, unsigned char const **hits_tree8, int level));
+struct file_link **tree8_to_flinkv __P((unsigned char const *hits_tree8));
+struct file_link **bits_to_flinkv __P((unsigned char const *bits_vec));
+
+void usage __P((void));
+static void help_me __P((void));
+int common_prefix_suffix __P((struct file_link const *flink_1, struct file_link const *flink_2));
+int member_file_index_qsort_compare __P((void const *x, void const *y));
+void look_id __P((char const *name, struct file_link **flinkv));
+void grep_id __P((char const *name, struct file_link **flinkv));
+void edit_id __P((char const *name, struct file_link **flinkv));
+int vector_cardinality __P((void *vector));
+int skip_to_argv __P((struct file_link **flinkv));
+int query_plain __P((char const *arg, report_func_t report_function));
+int query_anchor __P((char const *arg, report_func_t report_function));
+int query_regexp __P((char const *arg, report_func_t report_function));
+int query_number __P((char const *arg, report_func_t report_function));
+int query_non_unique __P((unsigned int, report_func_t report_function));
+int query_apropos __P((char const *arg, report_func_t report_function));
+void parse_frequency_arg __P((char const *arg));
+int frequency_wanted __P((char const *tok));
+char const *strcpos __P((char const *s1, char const *s2));
+char const *file_regexp __P((char const *name0, char const *left_delimit, char const *right_delimit));
+off_t query_token __P((char const *token));
+int is_regexp __P((char *name));
+int file_name_wildcard __P((char const *re, char const *fn));
+int word_match __P((char const *name0, char const *line));
+int get_radix __P((char const *name));
+int stoi __P((char const *name));
+int otoi __P((char const *name));
+int dtoi __P((char const *name));
+int xtoi __P((char const *name));
+void savetty __P((void));
+void restoretty __P((void));
+void linetty __P((void));
+void chartty __P((void));
+
+enum radix {
+ radix_oct = 1,
+ radix_dec = 2,
+ radix_hex = 4,
+ radix_all = radix_dec | radix_oct | radix_hex
+};
+
+#define TOLOWER(c) (isupper (c) ? tolower (c) : (c))
+#define IS_ALNUM(c) (isalnum (c) || (c) == '_')
+
+#ifndef BRACE_NOTATION_DEFAULT
+#define BRACE_NOTATION_DEFAULT 1
+#endif
+
+/* Sorry about all the globals, but it's really cleaner this way. */
+
+int merging;
+int file_name_regexp = 0;
+char anchor_dir[BUFSIZ];
+int tree8_levels;
+unsigned int bits_vec_size;
+struct idhead idh;
+char *hits_buf_1;
+char *hits_buf_2;
+unsigned char *bits_vec;
+
+/* The name this program was run with. */
+
+char const *program_name;
+
+/* If nonzero, display usage information and exit. */
+
+static int show_help;
+
+/* If nonzero, print the version on standard output then exit. */
+
+static int show_version;
+
+/* Which radixes do we want? */
+
+int radix_flag = radix_all;
+
+/* If nonzero, don't print the name of the matched identifier. */
+
+int no_id_flag = 0;
+
+/* If nonzero, merge multiple look_id regexp output lines into a
+ single line. */
+
+int merge_flag = 0;
+
+/* If nonzero, ignore differences in alphabetic case while matching. */
+
+int ignore_case_flag = 0;
+
+/* If nonzero, print file names in abbreviated fashion using the
+ shell's brace notation. */
+
+int brace_notation_flag = BRACE_NOTATION_DEFAULT;
+
+/* If non-zero, list identifiers that are are non-unique within this
+ number of leading characters. */
+
+unsigned int ambiguous_prefix_length = 0;
+
+/* The file name of the ID database. */
+
+char const *id_file_name;
+
+/* The style of report. */
+
+report_func_t report_function = look_id;
+
+/* The style of query. */
+
+query_func_t query_func;
+
+/* The style of query explicitly set by user from the command-line. */
+
+query_func_t forced_query_func;
+
+/* Lower and upper bounds on occurrence frequency. */
+
+unsigned int frequency_low = 1;
+unsigned int frequency_high = USHRT_MAX;
+
+struct file_link *cw_dlink;
+struct file_link **members_0;
+
+static struct option const long_options[] =
+{
+ { "file", required_argument, 0, 'f' },
+ { "frequency", required_argument, 0, 'F' },
+ { "ambiguous", required_argument, 0, 'a' },
+ { "grep", no_argument, 0, 'G' },
+ { "apropos", no_argument, 0, 'A' },
+ { "edit", no_argument, 0, 'E' },
+ { "regexp", no_argument, 0, 'e' },
+ { "braces", no_argument, 0, 'b' },
+ { "merge", no_argument, 0, 'm' },
+ { "ignore-case", no_argument, 0, 'i' },
+ { "word", no_argument, 0, 'w' },
+ { "hex", no_argument, 0, 'x' },
+ { "decimal", no_argument, 0, 'd' },
+ { "octal", no_argument, 0, 'o' },
+ { "no-id", no_argument, 0, 'n' },
+ { "help", no_argument, &show_help, 1 },
+ { "version", no_argument, &show_version, 1 },
+ { 0 }
+};
+
+void
+usage (void)
+{
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ exit (1);
+}
+
+static void
+help_me (void)
+{
+ printf (_("\
+Usage: %s [OPTION]... PATTERN...\n"),
+ program_name);
+ printf (_("\
+Query ID database and report results.\n\
+By default, output consists of multiple lines, each line containing the\n\
+matched identifier followed by the list of file names in which it occurs.\n\
+\n\
+ -f, --file=FILE file name of ID database\n\
+ -G, --grep show every line where the matched identifier occurs\n\
+ -E, --edit edit every file where the matched identifier occurs\n\
+ -m, --merge output a multi-line regexp match as a single line\n\
+ -n, --no-id print file names only - omit the identifier\n\
+ -b, --braces toggle shell brace-notation for output file names\n\
+\n\
+If PATTERN contains regular expression metacharacters, it is interpreted\n\
+as a regular expression. Otherwise, PATTERN is interpreted as a literal\n\
+word.\n\
+\n\
+ -e, --regexp match PATTERN as a regular expression substring\n\
+ -w, --word match PATTERN as a word\n\
+ -i, --ignore-case match PATTERN case insinsitively\n\
+ -A, --apropos match PATTERN as a case-insensitive substring\n\
+\n\
+ -F, --frequency=FREQ find identifiers that occur FREQ times, where FREQ\n\
+ is a range expressed as `N..M'. N omitted defaults\n\
+ to 1, M omitted defaults to MAX_USHRT.\n\
+ -a, --ambiguous=LEN find identifiers whose names are ambiguous for LEN chars\n\
+\n\
+ -x, --hex only find numbers expressed as hexadecimal\n\
+ -d, --decimal only find numbers expressed as decimal\n\
+ -o, --octal only find numbers expressed as octal\n\
+\n\
+ --help display this help and exit\n\
+ --version output version information and exit\n\
+"));
+ exit (0);
+}
+
+int
+main (int argc, char **argv)
+{
+ program_name = argv[0];
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "f:F:a:GAEebmiwxdon",
+ long_options, (int *) 0);
+ if (optc < 0)
+ break;
+ switch (optc)
+ {
+ case 0:
+ break;
+
+ case 'f':
+ id_file_name = optarg;
+ break;
+
+ case 'F':
+ parse_frequency_arg (optarg);
+ break;
+
+ case 'a':
+ ambiguous_prefix_length = stoi (optarg);
+ break;
+
+ case 'G':
+ report_function = grep_id;
+ break;
+
+ case 'A':
+ forced_query_func = query_apropos;
+ report_function = look_id;
+ break;
+
+ case 'E':
+ report_function = edit_id;
+ break;
+
+ case 'e':
+ forced_query_func = query_regexp;
+ file_name_regexp = 1;
+ break;
+
+ case 'b':
+ brace_notation_flag = !brace_notation_flag;
+ break;
+
+ case 'm':
+ merge_flag = 1;
+ break;
+
+ case 'i':
+ ignore_case_flag = REG_ICASE;
+ break;
+
+ case 'w':
+ forced_query_func = query_plain;
+ break;
+
+ case 'x':
+ radix_flag |= radix_hex;
+ break;
+
+ case 'd':
+ radix_flag |= radix_dec;
+ break;
+
+ case 'o':
+ radix_flag |= radix_oct;
+ break;
+
+ case 'n':
+ no_id_flag = 1;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (show_version)
+ {
+ printf ("%s - %s\n", program_name, PACKAGE_VERSION);
+ exit (0);
+ }
+
+ if (show_help)
+ help_me ();
+
+ /* Look for the ID database up the tree */
+ id_file_name = look_up (id_file_name);
+ if (id_file_name == 0)
+ error (1, errno, _("can't locate `ID'"));
+
+ init_idh_obstacks (&idh);
+ init_idh_tables (&idh);
+
+ cw_dlink = get_current_dir_link ();
+
+ /* Determine absolute name of the directory name to which database
+ constituent files are relative. */
+ members_0 = read_id_file (id_file_name, &idh);
+ bits_vec_size = (idh.idh_files + 7) / 4; /* more than enough */
+ tree8_levels = tree8_count_levels (idh.idh_files);
+
+ argc -= optind;
+ argv += optind;
+ if (argc == 0)
+ {
+ argc++;
+ *(char const **)--argv = ".*";
+ }
+
+ while (argc)
+ {
+ long val = -1;
+ char *arg = (argc--, *argv++);
+
+ if (forced_query_func)
+ query_func = forced_query_func;
+ else if (get_radix (arg) && (val = stoi (arg)) >= 0)
+ query_func = query_number;
+ else if (is_regexp (arg))
+ query_func = query_regexp;
+ else if (arg[0] == '^')
+ query_func = query_anchor;
+ else
+ query_func = query_plain;
+
+ if ((report_function == look_id && !merge_flag)
+ || (query_func == query_number
+ && val > 7
+ && radix_flag != radix_dec
+ && radix_flag != radix_oct
+ && radix_flag != radix_hex))
+ merging = 0;
+ else
+ merging = 1;
+
+ hits_buf_1 = xmalloc (idh.idh_buf_size);
+ hits_buf_2 = xmalloc (idh.idh_buf_size);
+ bits_vec = MALLOC (unsigned char, bits_vec_size);
+
+ if (ambiguous_prefix_length)
+ {
+ if (!query_non_unique (ambiguous_prefix_length, report_function))
+ fprintf (stderr, _("All identifiers are non-ambiguous within the first %d characters\n"),
+ ambiguous_prefix_length);
+ exit (0);
+ }
+ else if (!(*query_func) (arg, report_function))
+ {
+ fprintf (stderr, _("%s: not found\n"), arg);
+ continue;
+ }
+ }
+ fclose (idh.idh_FILE);
+ exit (0);
+}
+
+/* common_prefix_suffix returns non-zero if two file names have a
+ fully common directory prefix and a common suffix (i.e., they're
+ eligible for coalescing with brace notation. */
+
+int
+common_prefix_suffix (struct file_link const *flink_1, struct file_link const *flink_2)
+{
+ return (flink_1->fl_parent == flink_2->fl_parent
+ && strequ (suff_name (flink_1->fl_name), suff_name (flink_2->fl_name)));
+}
+
+void
+look_id (char const *name, struct file_link **flinkv)
+{
+ struct file_link const *arg;
+ struct file_link const *dlink;
+ int brace_is_open = 0;
+
+ if (!no_id_flag)
+ printf ("%-14s ", name);
+ while (*flinkv)
+ {
+ arg = *flinkv++;
+ if (*flinkv && brace_notation_flag
+ && common_prefix_suffix (arg, *flinkv))
+ {
+ if (brace_is_open)
+ printf (",%s", root_name (arg->fl_name));
+ else
+ {
+ dlink = arg->fl_parent;
+ if (dlink && dlink != cw_dlink)
+ {
+ char buf[PATH_MAX];
+ maybe_relative_path (buf, dlink, cw_dlink);
+ fputs (buf, stdout);
+ putchar ('/');
+ }
+ printf ("{%s", root_name (arg->fl_name));
+ }
+ brace_is_open = 1;
+ }
+ else
+ {
+ if (brace_is_open)
+ printf (",%s}%s", root_name (arg->fl_name), suff_name (arg->fl_name));
+ else
+ {
+ char buf[PATH_MAX];
+ maybe_relative_path (buf, arg, cw_dlink);
+ fputs (buf, stdout);
+ }
+ brace_is_open = 0;
+ if (*flinkv)
+ putchar (' ');
+ }
+ }
+ putchar ('\n');
+}
+
+/* FIXME: use regcomp regexec */
+
+void
+grep_id (char const *name, struct file_link **flinkv)
+{
+ char line[BUFSIZ];
+ char const *pattern = 0;
+ regex_t compiled;
+ int line_number;
+
+ if (merging)
+ {
+ pattern = file_regexp (name, "[^a-zA-Z0-9_À-ÿ]_*", "[^a-zA-Z0-9_À-ÿ]");
+ if (pattern)
+ {
+ int regcomp_errno = regcomp (&compiled, pattern,
+ ignore_case_flag | REG_EXTENDED);
+ if (regcomp_errno)
+ {
+ char buf[BUFSIZ];
+ regerror (regcomp_errno, &compiled, buf, sizeof (buf));
+ error (1, 0, "%s", buf);
+ }
+ }
+ }
+
+ line[0] = ' '; /* sentry */
+ while (*flinkv)
+ {
+ FILE *gid_FILE;
+ char file_name[PATH_MAX];
+
+ maybe_relative_path (file_name, *flinkv++, cw_dlink);
+ gid_FILE = fopen (file_name, "r");
+ if (gid_FILE == 0)
+ error (0, errno, "can't open `%s'", file_name);
+
+ line_number = 0;
+ while (fgets (&line[1], sizeof (line), gid_FILE))
+ {
+ line_number++;
+ if (pattern)
+ {
+ int regexec_errno = regexec (&compiled, line, 0, 0, 0);
+ if (regexec_errno == REG_ESPACE)
+ error (0, 0, "can't match regular-expression: memory exhausted");
+ else if (regexec_errno)
+ continue;
+ }
+ else if (!word_match (name, line))
+ continue;
+ printf ("%s:%d: %s", file_name, line_number, &line[1]);
+ }
+ fclose (gid_FILE);
+ }
+}
+
+void
+edit_id (char const *name, struct file_link **flinkv)
+{
+ static char const *editor;
+ static char const *eid_arg;
+ static char const *eid_right_del;
+ static char const *eid_left_del;
+ char re_buffer[BUFSIZ];
+ char ed_arg_buffer[BUFSIZ];
+ char const *pattern;
+ int c;
+ int skip;
+
+ if (editor == 0)
+ {
+ editor = getenv ("EDITOR");
+ if (editor == 0)
+ editor = "vi";
+ }
+
+ if (eid_arg == 0)
+ {
+ int using_vi = strequ ("vi", basename (editor));
+
+ eid_arg = getenv ("EIDARG");
+ if (eid_arg == 0)
+ eid_arg = (using_vi ? "+1;/%s/" : "");
+
+ eid_left_del = getenv ("EIDLDEL");
+ if (eid_left_del == 0)
+ eid_left_del = (using_vi ? "\\<" : "");
+
+ eid_right_del = getenv ("EIDRDEL");
+ if (eid_right_del == 0)
+ eid_right_del = (using_vi ? "\\>" : "");
+ }
+
+ look_id (name, flinkv);
+ savetty ();
+ for (;;)
+ {
+ /* FIXME: i18n */
+ printf (_("Edit? [y1-9^S/nq] "));
+ fflush (stdout);
+ chartty ();
+ c = (getchar () & 0177);
+ restoretty ();
+ switch (TOLOWER (c))
+ {
+ case '/':
+ case ('s' & 037):
+ putchar ('/');
+ skip = skip_to_flinkv (flinkv);
+ if (skip < 0)
+ continue;
+ flinkv += skip;
+ goto editit;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ putchar (c);
+ skip = c - '0';
+ break;
+ case 'y':
+ putchar (c);
+ skip = 0;
+ break;
+ case '\n':
+ case '\r':
+ putchar ('y');
+ skip = 0;
+ break;
+ case 'q':
+ putchar (c);
+ putchar ('\n');
+ exit (0);
+ case 'n':
+ putchar (c);
+ putchar ('\n');
+ return;
+ default:
+ putchar (c);
+ putchar ('\n');
+ continue;
+ }
+
+ putchar ('\n');
+ while (skip--)
+ if (*++flinkv == 0)
+ continue;
+ break;
+ }
+editit:
+
+ if (merging)
+ pattern = file_regexp (name, eid_left_del, eid_right_del);
+ else
+ pattern = 0;
+ if (pattern == 0)
+ {
+ pattern = re_buffer;
+ sprintf (re_buffer, "%s%s%s", eid_left_del, name, eid_right_del);
+ }
+
+ switch (fork ())
+ {
+ case -1:
+ error (1, errno, _("can't fork"));
+ break;
+
+ case 0:
+ {
+ char **argv_0 = MALLOC (char *, 3 + vector_cardinality (flinkv));
+ char **argv = argv_0 + 2;
+ while (*flinkv)
+ {
+ char buf[PATH_MAX];
+ maybe_relative_path (buf, *flinkv++, cw_dlink);
+ *argv++ = strdup (buf);
+ }
+ *argv = 0;
+ argv = argv_0 + 1;
+ if (eid_arg)
+ {
+ sprintf (ed_arg_buffer, eid_arg, pattern);
+ *--argv = ed_arg_buffer;
+ }
+ *(char const **)argv = editor;
+ execvp (editor, argv);
+ error (0, errno, _("can't exec `%s'"), editor);
+ }
+
+ default:
+ {
+ void (*oldint) __P((int)) = signal (SIGINT, SIG_IGN);
+ void (*oldquit) __P((int)) = signal (SIGQUIT, SIG_IGN);
+
+ while (wait (0) == -1 && errno == EINTR)
+ ;
+
+ signal (SIGINT, oldint);
+ signal (SIGQUIT, oldquit);
+ }
+ break;
+ }
+}
+
+int
+vector_cardinality (void *vector)
+{
+ void **v = (void **)vector;
+ int count = 0;
+
+ while (*v++)
+ count++;
+ return count;
+}
+
+int
+skip_to_flinkv (struct file_link **flinkv)
+{
+ char pattern[BUFSIZ];
+ unsigned int count;
+
+ if (gets (pattern) == 0)
+ return -1;
+
+ for (count = 0; *flinkv; count++, flinkv++)
+ {
+ char buf[PATH_MAX];
+ maybe_relative_path (buf, *flinkv, cw_dlink);
+ if (strcpos (buf, pattern))
+ return count;
+ }
+ return -1;
+}
+
+int
+query_plain (char const *arg, report_func_t report_function)
+{
+ if (query_token (arg) == 0)
+ return 0;
+ gets_past_00 (hits_buf_1, idh.idh_FILE);
+ assert (*hits_buf_1);
+ if (!frequency_wanted (hits_buf_1))
+ return 0;
+ (*report_function) (hits_buf_1, tree8_to_flinkv (tok_hits_addr (hits_buf_1)));
+ return 1;
+}
+
+int
+query_anchor (char const *arg, report_func_t report_function)
+{
+ int count;
+ unsigned int length;
+
+ if (query_token (++arg) == 0)
+ return 0;
+
+ length = strlen (arg);
+ count = 0;
+ if (merging)
+ memset (bits_vec, 0, bits_vec_size);
+ while (gets_past_00 (hits_buf_1, idh.idh_FILE) > 0)
+ {
+ assert (*hits_buf_1);
+ if (!frequency_wanted (hits_buf_1))
+ continue;
+ if (!strnequ (arg, hits_buf_1, length))
+ break;
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (hits_buf_1));
+ else
+ (*report_function) (hits_buf_1, tree8_to_flinkv (tok_hits_addr (hits_buf_1)));
+ count++;
+ }
+ if (merging && count)
+ (*report_function) (--arg, bits_to_flinkv (bits_vec));
+
+ return count;
+}
+
+int
+query_regexp (char const *pattern, report_func_t report_function)
+{
+ int count;
+ regex_t compiled;
+ int regcomp_errno;
+
+ regcomp_errno = regcomp (&compiled, pattern,
+ ignore_case_flag | REG_EXTENDED);
+ if (regcomp_errno)
+ {
+ char buf[BUFSIZ];
+ regerror (regcomp_errno, &compiled, buf, sizeof (buf));
+ error (1, 0, "%s", buf);
+ }
+ fseek (idh.idh_FILE, idh.idh_tokens_offset, SEEK_SET);
+
+ count = 0;
+ if (merging)
+ memset (bits_vec, 0, bits_vec_size);
+ while (gets_past_00 (hits_buf_1, idh.idh_FILE) > 0)
+ {
+ int regexec_errno;
+ assert (*hits_buf_1);
+ if (!frequency_wanted (hits_buf_1))
+ continue;
+ regexec_errno = regexec (&compiled, hits_buf_1, 0, 0, 0);
+ if (regexec_errno == REG_ESPACE)
+ error (0, 0, _("can't match regular-expression: memory exhausted"));
+ else if (regexec_errno)
+ continue;
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (hits_buf_1));
+ else
+ (*report_function) (hits_buf_1, tree8_to_flinkv (tok_hits_addr (hits_buf_1)));
+ count++;
+ }
+ if (merging && count)
+ (*report_function) (pattern, bits_to_flinkv (bits_vec));
+
+ return count;
+}
+
+int
+query_number (char const *arg, report_func_t report_function)
+{
+ int count;
+ int radix;
+ int val;
+ int hit_digits = 0;
+
+ radix = (val = stoi (arg)) ? radix_all : get_radix (arg);
+ fseek (idh.idh_FILE, idh.idh_tokens_offset, SEEK_SET);
+
+ count = 0;
+ if (merging)
+ memset (bits_vec, 0, bits_vec_size);
+ while (gets_past_00 (hits_buf_1, idh.idh_FILE) > 0)
+ {
+ if (hit_digits)
+ {
+ if (!isdigit (*hits_buf_1))
+ break;
+ }
+ else
+ {
+ if (isdigit (*hits_buf_1))
+ hit_digits = 1;
+ }
+
+ if (!((radix_flag ? radix_flag : radix) & get_radix (hits_buf_1))
+ || stoi (hits_buf_1) != val)
+ continue;
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (hits_buf_1));
+ else
+ (*report_function) (hits_buf_1, tree8_to_flinkv (tok_hits_addr (hits_buf_1)));
+ count++;
+ }
+ if (merging && count)
+ (*report_function) (arg, bits_to_flinkv (bits_vec));
+
+ return count;
+}
+
+/* Find identifiers that are non-unique within the first `count'
+ characters. */
+
+int
+query_non_unique (unsigned int limit, report_func_t report_function)
+{
+ char *old = hits_buf_1;
+ char *new = hits_buf_2;
+ int consecutive = 0;
+ int count = 0;
+ char name[1024];
+
+ if (limit <= 1)
+ usage ();
+ assert (limit < sizeof(name));
+
+ name[0] = '^';
+ *new = '\0';
+ fseek (idh.idh_FILE, idh.idh_tokens_offset, SEEK_SET);
+ while (gets_past_00 (old, idh.idh_FILE) > 0)
+ {
+ char *tmp;
+ if (!(tok_flags (old) & TOK_NAME))
+ continue;
+ tmp = old;
+ old = new;
+ new = tmp;
+ if (!strnequ (new, old, limit))
+ {
+ if (consecutive && merging)
+ {
+ strncpy (&name[1], old, limit);
+ (*report_function) (name, bits_to_flinkv (bits_vec));
+ }
+ consecutive = 0;
+ continue;
+ }
+ if (!consecutive++)
+ {
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (old));
+ else
+ (*report_function) (old, tree8_to_flinkv (tok_hits_addr (old)));
+ count++;
+ }
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (new));
+ else
+ (*report_function) (new, tree8_to_flinkv (tok_hits_addr (new)));
+ count++;
+ }
+ if (consecutive && merging)
+ {
+ strncpy (&name[1], new, limit);
+ (*report_function) (name, bits_to_flinkv (bits_vec));
+ }
+ return count;
+}
+
+int
+query_apropos (char const *arg, report_func_t report_function)
+{
+ int count;
+
+ fseek (idh.idh_FILE, idh.idh_tokens_offset, SEEK_SET);
+
+ count = 0;
+ if (merging)
+ memset (bits_vec, 0, bits_vec_size);
+ while (gets_past_00 (hits_buf_1, idh.idh_FILE) > 0)
+ {
+ assert (*hits_buf_1);
+ if (!frequency_wanted (hits_buf_1))
+ continue;
+ if (strcpos (hits_buf_1, arg) == 0)
+ continue;
+ if (merging)
+ tree8_to_bits (bits_vec, tok_hits_addr (hits_buf_1));
+ else
+ (*report_function) (hits_buf_1, tree8_to_flinkv (tok_hits_addr (hits_buf_1)));
+ count++;
+ }
+ if (merging && count)
+ (*report_function) (arg, bits_to_flinkv (bits_vec));
+
+ return count;
+}
+
+void
+parse_frequency_arg (char const *arg)
+{
+ if (strnequ (arg, "..", 2))
+ frequency_low = 1;
+ else
+ {
+ frequency_low = atoi (arg);
+ while (isdigit (*arg))
+ arg++;
+ if (strnequ (arg, "..", 2))
+ arg += 2;
+ }
+ if (*arg)
+ frequency_high = atoi (arg);
+ else if (strnequ (&arg[-1], "..", 2))
+ frequency_high = USHRT_MAX;
+ else
+ frequency_high = frequency_low;
+ if (frequency_low > frequency_high)
+ {
+ unsigned int tmp = frequency_low;
+ frequency_low = frequency_high;
+ frequency_high = tmp;
+ }
+}
+
+int
+frequency_wanted (char const *tok)
+{
+ unsigned int count = tok_count (tok);
+ return (frequency_low <= count && count <= frequency_high);
+}
+
+/* if string `s2' occurs in `s1', return a pointer to the first match.
+ Ignore differences in alphabetic case. */
+
+char const *
+strcpos (char const *s1, char const *s2)
+{
+ char const *s1p;
+ char const *s2p;
+ char const *s1last;
+
+ for (s1last = &s1[strlen (s1) - strlen (s2)]; s1 <= s1last; s1++)
+ for (s1p = s1, s2p = s2; TOLOWER (*s1p) == TOLOWER (*s2p); s1p++)
+ if (*++s2p == '\0')
+ return s1;
+ return 0;
+}
+
+/* Convert the regular expression that we used to locate identifiers
+ in the id database into one suitable for locating the identifiers
+ in files. */
+
+char const *
+file_regexp (char const *name0, char const *left_delimit, char const *right_delimit)
+{
+ static char pat_buf[BUFSIZ];
+ char *name = (char *) name0;
+
+ if (query_func == query_number && merging)
+ {
+ sprintf (pat_buf, "%s0*[Xx]*0*%d[Ll]*%s", left_delimit, stoi (name), right_delimit);
+ return pat_buf;
+ }
+
+ if (!is_regexp (name) && name[0] != '^')
+ return 0;
+
+ if (name[0] == '^')
+ name0++;
+ else
+ left_delimit = "";
+ while (*++name)
+ ;
+ if (*--name == '$')
+ *name = '\0';
+ else
+ right_delimit = "";
+
+ sprintf (pat_buf, "%s%s%s", left_delimit, name0, right_delimit);
+ return pat_buf;
+}
+
+off_t
+query_token (char const *token_0)
+{
+ off_t offset = 0;
+ off_t start = idh.idh_tokens_offset - 2;
+ off_t end = idh.idh_end_offset;
+ off_t anchor_offset = 0;
+ int order = -1;
+
+ while (start < end)
+ {
+ int c;
+ int incr = 1;
+ char const *token;
+
+ offset = start + (end - start) / 2;
+ fseek (idh.idh_FILE, offset, SEEK_SET);
+ offset += skip_past_00 (idh.idh_FILE);
+ if (offset >= end)
+ {
+ offset = start + 2;
+ fseek (idh.idh_FILE, offset, SEEK_SET);
+ }
+
+ /* compare the token names */
+ token = token_0;
+ while (*token == (c = getc (idh.idh_FILE)) && *token && c)
+ {
+ token++;
+ incr++;
+ }
+ if (c && !*token && query_func == query_anchor)
+ anchor_offset = offset;
+ order = *token - c;
+
+ if (order < 0)
+ end = offset - 2;
+ else if (order > 0)
+ start = offset + incr + skip_past_00 (idh.idh_FILE) - 2;
+ else
+ break;
+ }
+
+ if (order)
+ {
+ if (anchor_offset)
+ offset = anchor_offset;
+ else
+ return 0;
+ }
+ fseek (idh.idh_FILE, offset, SEEK_SET);
+ return offset;
+}
+
+/* Are there any regexp meta-characters in name?? */
+
+int
+is_regexp (char *name)
+{
+ int backslash = 0;
+
+ if (*name == '^')
+ name++;
+ while (*name)
+ {
+ if (*name == '\\')
+ {
+ if (strchr ("<>", name[1]))
+ return 1;
+ name++, backslash++;
+ }
+ else if (strchr ("[]{}().*+^$", *name))
+ return 1;
+ name++;
+ }
+ if (backslash)
+ while (*name)
+ {
+ if (*name == '\\')
+ strcpy (name, name + 1);
+ name++;
+ }
+ return 0;
+}
+
+/* file_name_wildcard implements a simple pattern matcher that
+ emulates the shell wild card capability.
+
+ * - any string of chars
+ ? - any char
+ [] - any char in set (if first char is !, any not in set)
+ \ - literal match next char */
+
+int
+file_name_wildcard (char const *pattern, char const *fn)
+{
+ int c;
+ int i;
+ char set[256];
+ int revset;
+
+ while ((c = *pattern++) != '\0')
+ {
+ if (c == '*')
+ {
+ if (*pattern == '\0')
+ return 1; /* match anything at end */
+ while (*fn != '\0')
+ {
+ if (file_name_wildcard (pattern, fn))
+ return 1;
+ ++fn;
+ }
+ return 0;
+ }
+ else if (c == '?')
+ {
+ if (*fn++ == '\0')
+ return 0;
+ }
+ else if (c == '[')
+ {
+ c = *pattern++;
+ memset (set, 0, 256);
+ if (c == '!')
+ {
+ revset = 1;
+ c = *pattern++;
+ }
+ else
+ revset = 0;
+ while (c != ']')
+ {
+ if (c == '\\')
+ c = *pattern++;
+ set[c] = 1;
+ if ((*pattern == '-') && (*(pattern + 1) != ']'))
+ {
+ pattern += 1;
+ while (++c <= *pattern)
+ set[c] = 1;
+ ++pattern;
+ }
+ c = *pattern++;
+ }
+ if (revset)
+ for (i = 1; i < 256; ++i)
+ set[i] = !set[i];
+ if (!set[(int)*fn++])
+ return 0;
+ }
+ else
+ {
+ if (c == '\\')
+ c = *pattern++;
+ if (c != *fn++)
+ return 0;
+ }
+ }
+ return (*fn == '\0');
+}
+
+/* Does `name' occur in `line' delimited by non-alphanumerics?? */
+
+int
+word_match (char const *name0, char const *line)
+{
+ char const *name = name0;
+
+ for (;;)
+ {
+ /* find an initial-character match */
+ while (*line != *name)
+ {
+ if (*line == '\0' || *line == '\n')
+ return 0;
+ line++;
+ }
+ /* do we have a word delimiter on the left ?? */
+ if (IS_ALNUM (line[-1]))
+ {
+ line++;
+ continue;
+ }
+ /* march down both strings as long as we match */
+ while (*++name == *++line)
+ ;
+ /* is this the end of `name', is there a word delimiter ?? */
+ if (*name == '\0' && !IS_ALNUM (*line))
+ return 1;
+ name = name0;
+ }
+}
+
+/* Use the C lexical rules to determine an ascii number's radix. The
+ radix is returned as a bit map, so that more than one radix may
+ apply. In particular, it is impossible to determine the radix of
+ 0, so return all possibilities. */
+
+int
+get_radix (char const *name)
+{
+ if (!isdigit (*name))
+ return 0;
+ if (*name != '0')
+ return radix_dec;
+ name++;
+ if (*name == 'x' || *name == 'X')
+ return radix_hex;
+ while (*name && *name == '0')
+ name++;
+ return (*name ? radix_oct : (radix_oct | radix_dec));
+}
+
+/* Convert an ascii string number to an integer. Determine the radix
+ before converting. */
+
+int
+stoi (char const *name)
+{
+ switch (get_radix (name))
+ {
+ case radix_dec:
+ return (dtoi (name));
+ case radix_oct:
+ return (otoi (&name[1]));
+ case radix_hex:
+ return (xtoi (&name[2]));
+ case radix_dec | radix_oct:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/* Convert an ascii octal number to an integer. */
+
+int
+otoi (char const *name)
+{
+ int n = 0;
+
+ while (*name >= '0' && *name <= '7')
+ {
+ n *= 010;
+ n += *name++ - '0';
+ }
+ if (*name == 'l' || *name == 'L')
+ name++;
+ return (*name ? -1 : n);
+}
+
+/* Convert an ascii decimal number to an integer. */
+
+int
+dtoi (char const *name)
+{
+ int n = 0;
+
+ while (isdigit (*name))
+ {
+ n *= 10;
+ n += *name++ - '0';
+ }
+ if (*name == 'l' || *name == 'L')
+ name++;
+ return (*name ? -1 : n);
+}
+
+/* Convert an ascii hex number to an integer. */
+
+int
+xtoi (char const *name)
+{
+ int n = 0;
+
+ while (isxdigit (*name))
+ {
+ n *= 0x10;
+ if (isdigit (*name))
+ n += *name++ - '0';
+ else if (islower (*name))
+ n += 0xa + *name++ - 'a';
+ else
+ n += 0xA + *name++ - 'A';
+ }
+ if (*name == 'l' || *name == 'L')
+ name++;
+ return (*name ? -1 : n);
+}
+
+unsigned char *
+tree8_to_bits (unsigned char *bv_0, unsigned char const *hits_tree8)
+{
+ unsigned char* bv = bv_0;
+ tree8_to_bits_1 (&bv, &hits_tree8, tree8_levels);
+ return bv_0;
+}
+
+void
+tree8_to_bits_1 (unsigned char **bv, unsigned char const **hits_tree8, int level)
+{
+ int hits = *(*hits_tree8)++;
+
+ if (--level)
+ {
+ int incr = 1 << ((level - 1) * 3);
+ int bit;
+ for (bit = 1; bit & 0xff; bit <<= 1)
+ {
+ if (bit & hits)
+ tree8_to_bits_1 (bv, hits_tree8, level);
+ else
+ *bv += incr;
+ }
+ }
+ else
+ *(*bv)++ |= hits;
+}
+
+struct file_link **
+bits_to_flinkv (unsigned char const *bv)
+{
+ int const reserved_flinkv_slots = 3;
+ static struct file_link **flinkv_0;
+ struct file_link **flinkv;
+ struct file_link **members = members_0;
+ struct file_link **end = &members_0[idh.idh_files];
+
+ if (flinkv_0 == 0)
+ flinkv_0 = MALLOC (struct file_link *, idh.idh_files + reserved_flinkv_slots + 2);
+ flinkv = &flinkv_0[reserved_flinkv_slots];
+
+ for (;;)
+ {
+ int hits;
+ int bit;
+
+ while (*bv == 0)
+ {
+ bv++;
+ members += 8;
+ if (members >= end)
+ goto out;
+ }
+ hits = *bv++;
+ for (bit = 1; bit & 0xff; bit <<= 1)
+ {
+ if (bit & hits)
+ *flinkv++ = *members;
+ if (++members >= end)
+ goto out;
+ }
+ }
+out:
+ *flinkv = 0;
+ return &flinkv_0[reserved_flinkv_slots];
+}
+
+struct file_link **
+tree8_to_flinkv (unsigned char const *hits_tree8)
+{
+ memset (bits_vec, 0, bits_vec_size);
+ return bits_to_flinkv (tree8_to_bits (bits_vec, hits_tree8));
+}
+
+#if HAVE_TERMIOS_H
+
+#include <termios.h>
+struct termios linemode;
+struct termios charmode;
+struct termios savemode;
+#define GET_TTY_MODES(modes) tcgetattr (0, (modes))
+#define SET_TTY_MODES(modes) tcsetattr(0, TCSANOW, (modes))
+
+#else /* not HAVE_TERMIOS_H */
+
+# if HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+# endif
+
+# if HAVE_TERMIO_H
+
+# include <termio.h>
+struct termio linemode;
+struct termio charmode;
+struct termio savemode;
+#define GET_TTY_MODES(modes) ioctl (0, TCGETA, (modes))
+#define SET_TTY_MODES(modes) ioctl (0, TCSETA, (modes))
+
+# else /* not HAVE_TERMIO_H */
+
+# if HAVE_SGTTY_H
+
+# include <sgtty.h>
+struct sgttyb linemode;
+struct sgttyb charmode;
+struct sgttyb savemode;
+
+# ifdef TIOCGETP
+#define GET_TTY_MODES(modes) ioctl (0, TIOCGETP, (modes))
+#define SET_TTY_MODES(modes) ioctl (0, TIOCSETP, (modes))
+# else
+#define GET_TTY_MODES(modes) gtty (0, (modes))
+#define SET_TTY_MODES(modes) stty (0, (modes))
+# endif
+
+void
+savetty (void)
+{
+# ifdef TIOCGETP
+ ioctl(0, TIOCGETP, &savemode);
+# else
+ gtty(0, &savemode);
+# endif
+ charmode = linemode = savemode;
+
+ charmode.sg_flags &= ~ECHO;
+ charmode.sg_flags |= RAW;
+
+ linemode.sg_flags |= ECHO;
+ linemode.sg_flags &= ~RAW;
+}
+
+# endif /* not HAVE_SGTTY_H */
+# endif /* not HAVE_TERMIO_H */
+#endif /* not HAVE_TERMIOS_H */
+
+#if HAVE_TERMIOS_H || HAVE_TERMIO_H
+
+void
+savetty (void)
+{
+ GET_TTY_MODES (&savemode);
+ charmode = linemode = savemode;
+
+ charmode.c_lflag &= ~(ECHO | ICANON | ISIG);
+ charmode.c_cc[VMIN] = 1;
+ charmode.c_cc[VTIME] = 0;
+
+ linemode.c_lflag |= (ECHO | ICANON | ISIG);
+ linemode.c_cc[VEOF] = 'd' & 037;
+ linemode.c_cc[VEOL] = 0377;
+}
+
+#endif
+
+#if HAVE_TERMIOS_H || HAVE_TERMIO_H || HAVE_SGTTY_H
+
+void
+restoretty (void)
+{
+ SET_TTY_MODES (&savemode);
+}
+
+void
+linetty (void)
+{
+ SET_TTY_MODES (&linemode);
+}
+
+void
+chartty (void)
+{
+ SET_TTY_MODES (&charmode);
+}
+
+#endif
diff --git a/src/mkid.c b/src/mkid.c
new file mode 100644
index 0000000..ebdb2fc
--- /dev/null
+++ b/src/mkid.c
@@ -0,0 +1,794 @@
+/* mkid.c -- build an identifer database
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <config.h>
+#include "system.h"
+#include "pathmax.h"
+#include "strxtra.h"
+#include "alloc.h"
+#include "idfile.h"
+#include "token.h"
+#include "bitops.h"
+#include "misc.h"
+#include "filenames.h"
+#include "hash.h"
+#include "scanners.h"
+#include "error.h"
+
+struct summary
+{
+ struct token **sum_tokens;
+ unsigned char const *sum_hits;
+ struct summary *sum_parent;
+ union {
+ struct summary *u_kids[8]; /* when sum_level > 0 */
+#define sum_kids sum_u.u_kids
+ struct member_file *u_files[8]; /* when sum_level == 0 */
+#define sum_files sum_u.u_files
+ } sum_u;
+ unsigned long sum_tokens_size;
+ unsigned long sum_hits_count;
+ int sum_free_index;
+ int sum_level;
+};
+
+void usage __P((void));
+static void help_me __P((void));
+int main __P((int argc, char **argv));
+void assert_writeable __P((char const *file_name));
+void scan_files __P((struct idhead *idhp));
+void scan_member_file __P((struct member_file const *member));
+void scan_member_file_1 __P((get_token_func_t get_token, void const *args, FILE *source_FILE));
+void report_statistics __P((void));
+void write_id_file __P((struct idhead *idhp));
+unsigned long token_hash_1 __P((void const *key));
+unsigned long token_hash_2 __P((void const *key));
+int token_hash_cmp __P((void const *x, void const *y));
+int token_qsort_cmp __P((void const *x, void const *y));
+void bump_current_hits_signature __P((void));
+void init_hits_signature __P((int i));
+void free_summary_tokens __P((void));
+void summarize __P((void));
+void init_summary __P((void));
+struct summary *make_sibling_summary __P((struct summary *summary));
+int count_vec_size __P((struct summary *summary, unsigned char const *tail_hits));
+int count_buf_size __P((struct summary *summary, unsigned char const *tail_hits));
+void assert_hits __P((struct summary* summary));
+void write_hits __P((FILE *fp, struct summary *summary, unsigned char const *tail_hits));
+void sign_token __P((struct token *token));
+void add_token_to_summary __P((struct summary *summary, struct token *token));
+
+struct hash_table token_table;
+
+/* Miscellaneous statistics */
+size_t input_chars;
+size_t name_tokens;
+size_t number_tokens;
+size_t string_tokens;
+size_t literal_tokens;
+size_t comment_tokens;
+size_t occurrences;
+size_t hits_length = 0;
+size_t tokens_length = 0;
+size_t output_length = 0;
+
+int verbose_flag = 0;
+int statistics_flag = 0;
+
+int file_name_count = 0; /* # of files in database */
+int levels = 0; /* ceil(log(8)) of file_name_count */
+
+unsigned char current_hits_signature[MAX_LEVELS];
+#define INIT_TOKENS_SIZE(level) (1 << ((level) + 13))
+struct summary *summary_root;
+struct summary *summary_leaf;
+
+char const *program_name;
+
+char *include_languages = 0;
+char *exclude_languages = 0;
+char *lang_map_file_name = 0;
+int show_version = 0;
+int show_help = 0;
+struct idhead idh;
+
+void
+usage (void)
+{
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ exit (1);
+}
+
+static struct option const long_options[] =
+{
+ { "file", required_argument, 0, 'f' },
+ { "output", required_argument, 0, 'o' },
+ { "include", required_argument, 0, 'i' },
+ { "exclude", required_argument, 0, 'r' },
+ { "lang-arg", required_argument, 0, 'l' },
+ { "lang-map", required_argument, 0, 'm' },
+ { "verbose", no_argument, 0, 'v' },
+ { "statistics", no_argument, 0, 's' },
+ { "help", no_argument, &show_help, 1 },
+ { "version", no_argument, &show_version, 1 },
+ { 0 }
+};
+
+static void
+help_me (void)
+{
+ printf (_("\
+Usage: %s [OPTION]... [FILE]...\n"),
+ program_name);
+
+ printf (_("\
+Build an identifier database.\n\
+ -o, --output=OUTFILE file name of ID database output\n\
+ -f, --file=OUTFILE synonym for --output\n\
+ -i, --include=LANGS include languages in LANGS (default: \"C C++ asm\")\n\
+ -x, --exclude=LANGS exclude languages in LANGS\n\
+ -l, --lang-arg=LANG:ARG pass ARG as a default for LANG (see below)\n\
+ -m, --lang-map=MAPFILE use MAPFILE to map file names onto source language\n\
+ -v, --verbose report progress and as files are scanned\n\
+ -s, --statistics report statistics at end of run\n\
+\n\
+ --help display this help and exit\n\
+ --version output version information and exit\n\
+\n\
+FILE may be a file name, or a directory name to recursively search.\n\
+The `--include' and `--exclude' options are mutually-exclusive.\n\
+\n\
+The following arguments apply to the language-specific scanners:\n\
+"));
+ language_help_me ();
+ exit (0);
+}
+
+#if !HAVE_DECL_SBRK
+extern void *sbrk ();
+#endif
+char const *heap_initial;
+char const *heap_after_walk;
+char const *heap_after_scan;
+
+int
+main (int argc, char **argv)
+{
+ program_name = argv[0];
+ idh.idh_file_name = ID_FILE_NAME;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "o:f:i:x:l:m:uvs",
+ long_options, (int *) 0);
+ if (optc < 0)
+ break;
+ switch (optc)
+ {
+ case 0:
+ break;
+
+ case 'o':
+ case 'f':
+ idh.idh_file_name = optarg;
+ break;
+
+ case 'i':
+ include_languages = optarg;
+ break;
+
+ case 'x':
+ exclude_languages = optarg;
+ break;
+
+ case 'l':
+ language_save_arg (optarg);
+ break;
+
+ case 'm':
+ lang_map_file_name = optarg;
+ break;
+
+ case 'v':
+ verbose_flag = 1;
+ break;
+
+ case 's':
+ statistics_flag = 1;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (show_version)
+ {
+ printf ("%s - %s\n", program_name, PACKAGE_VERSION);
+ exit (0);
+ }
+
+ if (show_help)
+ help_me ();
+
+ argc -= optind;
+ argv += optind;
+ language_getopt ();
+
+ assert_writeable (idh.idh_file_name);
+
+ if (argc == 0)
+ {
+ argc++;
+ *(char const **)--argv = ".";
+ }
+ heap_initial = (char const *) sbrk (0);
+ init_idh_obstacks (&idh);
+ init_idh_tables (&idh);
+ parse_language_map (lang_map_file_name);
+
+ {
+ struct file_link *cwd_link = get_current_dir_link ();
+ while (argc--)
+ walk_flink (parse_file_name (*argv++, cwd_link), 0);
+ mark_member_file_links (&idh);
+ heap_after_walk = (char const *) sbrk (0);
+ scan_files (&idh);
+ heap_after_scan = sbrk (0);
+ free_summary_tokens ();
+ free (token_table.ht_vec);
+ chdir_to_link (cwd_link);
+ write_id_file (&idh);
+ }
+ if (statistics_flag)
+ report_statistics ();
+ exit (0);
+}
+
+void
+assert_writeable (char const *file_name)
+{
+ if (access (file_name, 06) < 0)
+ {
+ if (errno == ENOENT)
+ {
+ char const *dir_name = dirname (file_name);
+ if (access (dir_name, 06) < 0)
+ error (1, errno, _("can't create `%s' in `%s'"),
+ basename (file_name), dir_name);
+ }
+ else
+ error (1, errno, _("can't modify `%s'"), file_name);
+ }
+}
+
+void
+scan_files (struct idhead *idhp)
+{
+ struct member_file **members_0
+ = (struct member_file **) hash_dump (&idhp->idh_member_file_table,
+ 0, member_file_qsort_compare);
+ struct member_file **end = &members_0[idhp->idh_member_file_table.ht_fill];
+ struct member_file **members;
+
+ hash_init (&token_table, idhp->idh_member_file_table.ht_fill * 64,
+ token_hash_1, token_hash_2, token_hash_cmp);
+ init_hits_signature (0);
+ init_summary ();
+ obstack_init (&tokens_obstack);
+
+ for (members = members_0; members < end; members++)
+ scan_member_file (*members);
+ free (members_0);
+}
+
+void
+scan_member_file (struct member_file const *member)
+{
+ struct lang_args const *lang_args = member->mf_lang_args;
+ struct language const *lang = lang_args->la_language;
+ get_token_func_t get_token = lang->lg_get_token;
+ struct file_link *flink = member->mf_link;
+ struct stat st;
+ FILE *source_FILE;
+ size_t bytes;
+
+ chdir_to_link (flink->fl_parent);
+ source_FILE = open_source_FILE (flink->fl_name);
+ if (source_FILE)
+ {
+ char buf[PATH_MAX];
+ if (verbose_flag)
+ {
+ printf ("%d: %s: %s", member->mf_index, lang->lg_name,
+ absolute_path (buf, flink));
+ fflush (stdout);
+ }
+ if (fstat (fileno (source_FILE), &st) < 0)
+ error (0, errno, _("can't stat `%s'"), absolute_path (buf, flink));
+ else
+ {
+ bytes = st.st_size;
+ input_chars += bytes;
+ }
+ scan_member_file_1 (get_token, lang_args->la_args_digested, source_FILE);
+ if (verbose_flag)
+ putchar ('\n');
+ close_source_FILE (source_FILE);
+ }
+ if (current_hits_signature[0] & 0x80)
+ summarize ();
+#if 0
+ if (member->mf_index < file_name_count)
+#endif
+ bump_current_hits_signature ();
+}
+
+void
+scan_member_file_1 (get_token_func_t get_token, void const *args, FILE *source_FILE)
+{
+ struct stat st;
+ struct token **slot;
+ struct token *token;
+ int flags;
+ int bytes = 0;
+ int total_tokens = 0;
+ int new_tokens = 0;
+ int distinct_tokens = 0;
+
+ while ((token = (*get_token) (source_FILE, args, &flags)) != NULL)
+ {
+ if (*token->tok_name == '\0') {
+ obstack_free (&tokens_obstack, token);
+ continue;
+ }
+ total_tokens++;
+ slot = (struct token **) hash_find_slot (&token_table, token);
+ if (HASH_VACANT (*slot))
+ {
+ token->tok_count = 1;
+ memset (token->tok_hits, 0, sizeof (token->tok_hits));
+ token->tok_flags = flags;
+ sign_token (token);
+ distinct_tokens++;
+ new_tokens++;
+ hash_insert_at (&token_table, token, slot);
+ }
+ else
+ {
+ obstack_free (&tokens_obstack, token);
+ token = *slot;
+ token->tok_flags |= flags;
+ if (token->tok_count < USHRT_MAX)
+ token->tok_count++;
+ if (!(token->tok_hits[0] & current_hits_signature[0]))
+ {
+ sign_token (token);
+ distinct_tokens++;
+ }
+ }
+ }
+ if (verbose_flag)
+ {
+ printf (_(" new = %d/%d"), new_tokens, distinct_tokens);
+ if (distinct_tokens != 0)
+ printf (" = %.0f%%", 100.0 * (double) new_tokens / (double) distinct_tokens);
+ }
+}
+
+void
+report_statistics (void)
+{
+ printf (_("Name=%ld, "), name_tokens);
+ printf (_("Number=%ld, "), number_tokens);
+ printf (_("String=%ld, "), string_tokens);
+ printf (_("Literal=%ld, "), literal_tokens);
+ printf (_("Comment=%ld\n"), comment_tokens);
+
+ printf (_("Files=%d, "), idh.idh_files);
+ printf (_("Tokens=%ld, "), occurrences);
+ printf (_("Bytes=%ld Kb, "), input_chars / 1024);
+ printf (_("Heap=%ld+%ld Kb, "), (heap_after_scan - heap_after_walk) / 1024,
+ (heap_after_walk - heap_initial) / 1024);
+ printf (_("Output=%ld (%ld tok, %ld hit)\n"), output_length, tokens_length, hits_length);
+
+ hash_print_stats (&token_table, stdout);
+ printf (_(", Freq=%ld/%ld=%.2f\n"), occurrences, token_table.ht_fill,
+ (double) occurrences / (double) token_table.ht_fill);
+}
+
+/* As the database is written, may need to adjust the file names. If
+ we are generating the ID file in a remote directory, then adjust
+ the file names to be relative to the location of the ID database.
+
+ (This would be a common useage if you want to make a database for a
+ directory which you have no write access to, so you cannot create
+ the ID file.) */
+void
+write_id_file (struct idhead *idhp)
+{
+ struct token **tokens;
+ int i;
+ int buf_size;
+ int vec_size;
+ int tok_size;
+ int max_buf_size = 0;
+ int max_vec_size = 0;
+
+ if (verbose_flag)
+ printf (_("Sorting tokens...\n"));
+ assert (summary_root->sum_hits_count == token_table.ht_fill);
+ tokens = REALLOC (summary_root->sum_tokens, struct token *, token_table.ht_fill);
+ qsort (tokens, token_table.ht_fill, sizeof (struct token *), token_qsort_cmp);
+
+ if (verbose_flag)
+ printf (_("Writing `%s'...\n"), idhp->idh_file_name);
+ idhp->idh_FILE = fopen (idhp->idh_file_name, "w+b");
+ if (idhp->idh_FILE == NULL)
+ error (1, errno, _("can't create `%s'"), idhp->idh_file_name);
+
+ idhp->idh_magic[0] = IDH_MAGIC_0;
+ idhp->idh_magic[1] = IDH_MAGIC_1;
+ idhp->idh_version = IDH_VERSION;
+ idhp->idh_flags = IDH_COUNTS;
+
+ /* write out the list of pathnames */
+
+ fseek (idhp->idh_FILE, sizeof_idhead (), 0);
+ idhp->idh_flinks_offset = ftell (idhp->idh_FILE);
+ serialize_file_links (idhp);
+
+ /* write out the list of identifiers */
+
+ putc ('\0', idhp->idh_FILE);
+ putc ('\0', idhp->idh_FILE);
+ idhp->idh_tokens_offset = ftell (idhp->idh_FILE);
+
+ for (i = 0; i < token_table.ht_fill; i++, tokens++)
+ {
+ struct token *token = *tokens;
+ occurrences += token->tok_count;
+ if (token->tok_flags & TOK_NUMBER)
+ number_tokens++;
+ if (token->tok_flags & TOK_NAME)
+ name_tokens++;
+ if (token->tok_flags & TOK_STRING)
+ string_tokens++;
+ if (token->tok_flags & TOK_LITERAL)
+ literal_tokens++;
+ if (token->tok_flags & TOK_COMMENT)
+ comment_tokens++;
+
+ fputs (token->tok_name, idhp->idh_FILE);
+ putc ('\0', idhp->idh_FILE);
+ if (token->tok_count > 0xff)
+ token->tok_flags |= TOK_SHORT_COUNT;
+ putc (token->tok_flags, idhp->idh_FILE);
+ putc (token->tok_count & 0xff, idhp->idh_FILE);
+ if (token->tok_flags & TOK_SHORT_COUNT)
+ putc (token->tok_count >> 8, idhp->idh_FILE);
+
+ vec_size = count_vec_size (summary_root, token->tok_hits + levels);
+ buf_size = count_buf_size (summary_root, token->tok_hits + levels);
+ hits_length += buf_size;
+ tok_size = strlen (token->tok_name) + 1;
+ tokens_length += tok_size;
+ buf_size += tok_size + sizeof (token->tok_flags) + sizeof (token->tok_count) + 2;
+ if (buf_size > max_buf_size)
+ max_buf_size = buf_size;
+ if (vec_size > max_vec_size)
+ max_vec_size = vec_size;
+
+ write_hits (idhp->idh_FILE, summary_root, token->tok_hits + levels);
+ putc ('\0', idhp->idh_FILE);
+ putc ('\0', idhp->idh_FILE);
+ }
+ assert_hits (summary_root);
+ idhp->idh_tokens = token_table.ht_fill;
+ output_length = ftell (idhp->idh_FILE);
+ idhp->idh_end_offset = output_length - 2;
+ idhp->idh_buf_size = max_buf_size;
+ idhp->idh_vec_size = max_vec_size;
+
+ write_idhead (&idh);
+ fclose (idhp->idh_FILE);
+}
+
+unsigned long
+token_hash_1 (void const *key)
+{
+ return_STRING_HASH_1 (((struct token const *) key)->tok_name);
+}
+
+unsigned long
+token_hash_2 (void const *key)
+{
+ return_STRING_HASH_2 (((struct token const *) key)->tok_name);
+}
+
+int
+token_hash_cmp (void const *x, void const *y)
+{
+ return_STRING_COMPARE (((struct token const *) x)->tok_name,
+ ((struct token const *) y)->tok_name);
+}
+
+int
+token_qsort_cmp (void const *x, void const *y)
+{
+ return_STRING_COMPARE ((*(struct token const *const *) x)->tok_name,
+ (*(struct token const *const *) y)->tok_name);
+}
+
+
+/****************************************************************************/
+
+void
+bump_current_hits_signature (void)
+{
+ unsigned char *hits = current_hits_signature;
+ while (*hits & 0x80)
+ *hits++ = 1;
+ *hits <<= 1;
+}
+
+void
+init_hits_signature (int i)
+{
+ unsigned char *hits = current_hits_signature;
+ unsigned char const *end = &current_hits_signature[MAX_LEVELS];
+ while (hits < end)
+ {
+ *hits = 1 << (i & 7);
+ i >>= 3;
+ hits++;
+ }
+}
+
+void
+free_summary_tokens (void)
+{
+ struct summary *summary = summary_leaf;
+ while (summary != summary_root)
+ {
+ free (summary->sum_tokens);
+ summary = summary->sum_parent;
+ }
+}
+
+void
+summarize (void)
+{
+ unsigned char const *hits_sig = current_hits_signature;
+ struct summary *summary = summary_leaf;
+
+ do
+ {
+ unsigned long count = summary->sum_hits_count;
+ unsigned char *hits = MALLOC (unsigned char, count + 1);
+ unsigned int level = summary->sum_level;
+ struct token **tokens = summary->sum_tokens;
+ unsigned long init_size = INIT_TOKENS_SIZE (summary->sum_level);
+
+ if (verbose_flag)
+ printf (_("level %d: %ld/%ld = %.0f%%\n"),
+ summary->sum_level, count, init_size,
+ 100.0 * (double) count / (double) init_size);
+
+ qsort (tokens, count, sizeof (struct token *), token_qsort_cmp);
+ summary->sum_hits = hits;
+ while (count--)
+ {
+ unsigned char *hit = &(*tokens++)->tok_hits[level];
+ *hits++ = *hit;
+ *hit = 0;
+ }
+ *hits++ = 0;
+ if (summary->sum_parent)
+ {
+ free (summary->sum_tokens);
+ summary->sum_tokens = 0;
+ }
+ summary = summary->sum_parent;
+ }
+ while (*++hits_sig & 0x80);
+ summary_leaf = make_sibling_summary (summary_leaf);
+}
+
+void
+init_summary (void)
+{
+ unsigned long size = INIT_TOKENS_SIZE (0);
+ summary_root = summary_leaf = CALLOC (struct summary, 1);
+ summary_root->sum_tokens_size = size;
+ summary_root->sum_tokens = MALLOC (struct token *, size);
+}
+
+struct summary *
+make_sibling_summary (struct summary *summary)
+{
+ struct summary *parent = summary->sum_parent;
+ unsigned long size;
+
+ if (parent == NULL)
+ {
+ levels++;
+ summary_root = summary->sum_parent = parent = CALLOC (struct summary, 1);
+ parent->sum_level = levels;
+ parent->sum_kids[0] = summary;
+ parent->sum_hits_count = summary->sum_hits_count;
+ parent->sum_free_index = 1;
+ size = INIT_TOKENS_SIZE (levels);
+ if (summary->sum_tokens_size >= size)
+ {
+ parent->sum_tokens_size = summary->sum_tokens_size;
+ parent->sum_tokens = summary->sum_tokens;
+ }
+ else
+ {
+ parent->sum_tokens_size = size;
+ parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
+ }
+ summary->sum_tokens = 0;
+ }
+ if (parent->sum_free_index == 8)
+ parent = make_sibling_summary (parent);
+ summary = CALLOC (struct summary, 1);
+ summary->sum_level = parent->sum_level - 1;
+ parent->sum_kids[parent->sum_free_index++] = summary;
+ summary->sum_parent = parent;
+ size = INIT_TOKENS_SIZE (summary->sum_level);
+ summary->sum_tokens_size = size;
+ summary->sum_tokens = MALLOC (struct token *, size);
+ return summary;
+}
+
+int
+count_vec_size (struct summary *summary, unsigned char const *tail_hits)
+{
+ struct summary **kids;
+ unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits);
+
+ kids = summary->sum_kids;
+ if (*kids == NULL)
+ {
+ static char bits_per_nybble[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 };
+ return bits_per_nybble[hits & 0xf] + bits_per_nybble[hits >> 4];
+ }
+ else
+ {
+ int bit;
+ int count = 0;
+ --tail_hits;
+ for (bit = 1; bit & 0xff; bit <<= 1, ++kids)
+ if (bit & hits)
+ count += count_vec_size (*kids, tail_hits);
+ return count;
+ }
+}
+
+int
+count_buf_size (struct summary *summary, unsigned char const *tail_hits)
+{
+ struct summary **kids;
+ unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits);
+
+ kids = summary->sum_kids;
+ if (*kids == NULL)
+ return 1;
+ else
+ {
+ int bit;
+ int count = 1;
+ --tail_hits;
+ for (bit = 1; bit & 0xff; bit <<= 1, ++kids)
+ if (bit & hits)
+ count += count_buf_size (*kids, tail_hits);
+ return count;
+ }
+}
+
+void
+assert_hits (struct summary* summary)
+{
+ struct summary **kids = summary->sum_kids;
+ struct summary **end = &kids[8];
+
+ assert (summary->sum_hits == NULL || *summary->sum_hits == 0);
+
+ if (end[-1] == 0)
+ while (*--end == 0)
+ ;
+ while (kids < end)
+ assert_hits (*kids++);
+}
+
+void
+write_hits (FILE *fp, struct summary *summary, unsigned char const *tail_hits)
+{
+ struct summary **kids;
+ unsigned int hits = (summary->sum_hits ? *summary->sum_hits++ : *tail_hits);
+
+ assert (hits);
+ putc (hits, fp);
+
+ kids = summary->sum_kids;
+ if (*kids)
+ {
+ int bit;
+ --tail_hits;
+ for (bit = 1; (bit & 0xff) && *kids; bit <<= 1, ++kids)
+ if (bit & hits)
+ write_hits (fp, *kids, tail_hits);
+ }
+}
+
+void
+sign_token (struct token *token)
+{
+ unsigned char *tok_hits = token->tok_hits;
+ unsigned char *hits_sig = current_hits_signature;
+ unsigned char *end = &current_hits_signature[MAX_LEVELS];
+ struct summary *summary = summary_leaf;
+
+ while (summary)
+ {
+ if (*tok_hits == 0)
+ add_token_to_summary (summary, token);
+ if (*tok_hits & *hits_sig)
+ break;
+ *tok_hits |= *hits_sig;
+ summary = summary->sum_parent;
+ tok_hits++;
+ hits_sig++;
+ }
+ while (hits_sig < end)
+ {
+ if (*tok_hits & *hits_sig)
+ break;
+ *tok_hits |= *hits_sig;
+ tok_hits++;
+ hits_sig++;
+ }
+}
+
+void
+add_token_to_summary (struct summary *summary, struct token *token)
+{
+ unsigned long size = summary->sum_tokens_size;
+
+ if (summary->sum_hits_count >= size)
+ {
+ size *= 2;
+ summary->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
+ summary->sum_tokens_size = size;
+ }
+ summary->sum_tokens[summary->sum_hits_count++] = token;
+}
diff --git a/src/pid b/src/pid
new file mode 100644
index 0000000..7d2c79c
--- /dev/null
+++ b/src/pid
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+lid -E "$@"