imported from id-utils-3.1r3_1

author: Greg McGary <greg@mcgary.org> 1997-04-18 06:43:35 +0000
committer: Greg McGary <greg@mcgary.org> 1997-04-18 06:43:35 +0000
commit: 3720d4b7a1b0ce0903450271aa3d93388e9d8781 (patch)
tree: 12200295d735bf3d1bcaaf8d2065547d41cea3b2 /doc
parent: 916418ea1284e6aa64f50eba077e48ced5944acc (diff)
download: idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.tar.gz
idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.tar.bz2
idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.zip
5 files changed, 1825 insertions, 2002 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am
index b7d1100..48aed71 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,3 +1,5 @@
 ## Process this file with automake to produce Makefile.in
 
 info_TEXINFOS = id-utils.texi
+
+EXTRA_DIST = texinfo.tex
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 650fddf..5877513 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -39,12 +39,14 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 transform = @program_transform_name@
 
 info_TEXINFOS = id-utils.texi
+
+EXTRA_DIST = texinfo.tex
 mkinstalldirs = $(top_srcdir)/mkinstalldirs
 CONFIG_HEADER = ../config.h
 
 MAKEINFO = makeinfo
 TEXI2DVI = texi2dvi
-INFOS = id-utils.info*
+INFOS = id-utils.info id-utils.info-[0-9] id-utils.info-[0-9][0-9]
 INFO_DEPS = id-utils.info
 DVIS = id-utils.dvi
 TEXINFOS = id-utils.texi
@@ -106,8 +108,8 @@ id-utils.info: id-utils.texi version.texi
 install-info: $(INFO_DEPS)
 	$(mkinstalldirs) $(infodir)
 	for file in $(INFO_DEPS); do		\
-	  for ifile in `cd $(srcdir) && echo $$file*`; do \
-	    $(INSTALL_DATA) $(srcdir)/$$ifile $(infodir)/$$ifile; \
+	  for ifile in `cd $(srcdir) && echo $$file $$file-[0-9] $$file-[0-9][0-9]`; do \
+	    test -f $$ifile && $(INSTALL_DATA) $(srcdir)/$$ifile $(infodir)/$$ifile || true; \
 	  done;					\
 	done
 
diff --git a/doc/id-utils.info b/doc/id-utils.info
index dcc76e8..abbc871 100644
--- a/doc/id-utils.info
+++ b/doc/id-utils.info
@@ -2,20 +2,17 @@ This is Info file ../../doc/id-utils.info, produced by Makeinfo-1.63
 from the input file ../../doc/id-utils.texi.
 
 START-INFO-DIR-ENTRY
-* ID database: (id).            Identifier database utilities.
-* aid: (id)aid invocation.                      Matching strings.
-* eid: (id)eid invocation.                      Invoking an editor on matches.
-* fid: (id)fid invocation.                      Listing a file's identifiers.
-* gid: (id)gid invocation.                      Listing all matching lines.
-* idx: (id)idx invocation.                      Testing mkid scanners.
-* lid: (id)lid invocation.                      Matching patterns.
-* mkid: (id)mkid invocation.                    Creating an ID database.
-* pid: (id)pid invocation.                      Looking up filenames.
+* ID database: (id-utils).              Identifier database utilities.
+* mkid: (id-utils)mkid invocation.      Creating an ID database.
+* lid: (id-utils)lid invocation.        Matching words and patterns.
+* fid: (id-utils)fid invocation.        Listing a file's tokens.
+* fnid: (id-utils)fnid invocation.      Looking up file names.
+* xtokid: (id-utils)xtokid invocation.  Testing mkid scanners.
 END-INFO-DIR-ENTRY
 
-   This file documents the `mkid' identifier database utilities.
+   This file documents the `id-utils' database utilities.
 
-   Copyright (C) 1991, 1995 Tom Horsley.
+   Copyright (C) 1996 Free Software Foundation, Inc.
 
    Permission is granted to make and distribute verbatim copies of this
 manual provided the copyright notice and this permission notice are
@@ -34,773 +31,738 @@ translation.
 
 File: id-utils.info,  Node: Top,  Next: Introduction,  Up: (dir)
 
-ID database utilities
-*********************
+ID utilities
+************
 
-   This manual documents version 3.0.9 of the ID database utilities.
+   This manual documents version 3.1 of the ID utilities.
 
 * Menu:
 
-* Introduction::                Overview of the tools, and authors.
+* Introduction::                Overview of the tools with tutorial.
+* Quick start::                 Quick start procedure.
+* Common options::              Common command-line options.
 * mkid invocation::             Creating an ID database.
-* Common query arguments::      Common lookup options and search patterns.
-* gid invocation::              Listing all matching lines.
-* Looking up identifiers::      lid, aid, eid, and fid.
-* pid invocation::              Looking up filenames.
+* lid invocation::              Querying an ID database by token.
+* fid invocation::              Listing a file's tokens.
+* fnid invocation::             Looking up file names.
+* xtokid invocation::           Testing language scanners.
+* Past and Future::             History and future directions.
 * Index::                       General index.
 
 
-File: id-utils.info,  Node: Introduction,  Next: mkid invocation,  Prev: Top,  Up: Top
+File: id-utils.info,  Node: Introduction,  Next: Quick start,  Prev: Top,  Up: Top
 
 Introduction
 ************
 
-   An "ID database" is a binary file containing a list of filenames, a
-list of identifiers, and a matrix indicating which identifiers appear in
-which files.  With this database and some tools to manipulate it
-(described in this manual), a host of tasks become simpler and faster.
-For example, you can list all files containing a particular `#include'
-throughout a huge source hierarchy, search for all the memos containing
-references to a project, or automatically invoke an editor on all files
-containing references to some function.  Anyone with a large software
-project to maintain, or a large set of text files to organize, can
-benefit from an ID database.
+   An "ID database" is a binary file containing a list of file names, a
+list of tokens, and a sparse matrix indicating which tokens appear in
+which files.
 
-   Although the ID utilities are most commonly used with identifiers,
-numeric constants are also stored in the database, and can be searched
-for in the same way (independent of radix, if desired).
+   With this database and some tools to query it (described in this
+manual), many text-searching tasks become simpler and faster.  For
+example, you can list all files that reference a particular `#include'
+file throughout a huge source hierarchy, search for all the memos
+containing references to a project, or automatically invoke an editor
+on all files containing references to some function or variable.
+Anyone with a large software project to maintain, or a large set of text
+files to organize, can benefit from the ID utilities.
 
-   There are a number of programs in the ID family:
+   Although the name `ID' is short for `identifier', the ID utilities
+handle more than just identifiers; they also treat other kinds of
+tokens, most notably numeric constants, and the contents of certain
+character strings.  Thus, this manual will use the word "token" as a
+term that is inclusive of identifiers, numbers and strings.
 
-`mkid'
-     scans files for identifiers and numeric constants and builds the ID
-     database file.
+   There are several programs in the ID utilities family:
 
-`gid'
-     lists all lines that match given patterns.
+`mkid'
+     scans files for tokens and builds the ID database file.
 
 `lid'
-     lists the filenames containing identifiers that match given
-     patterns.
+     queries the ID database for tokens, then reports matching file
+     names or matching lines.
 
-`aid'
-     lists the filenames containing identifiers that contain given
-     strings, independent of case.
+`fid'
+     lists all tokens recorded in the database for given files, or
+     tokens common to two files.
 
-`eid'
-     invokes an editor on each file containing identifiers that match
-     given patterns.
+`fnid'
+     matches the file names in the database, rather than the tokens.
 
-`fid'
-     lists all identifiers recorded in the database for given files, or
-     identifiers common to two files.
+`xtokid'
+     extracts raw tokens--helps with testing of new `mkid' scanners.
 
-`pid'
-     matches the filenames in the database, rather than the identifiers.
+   In addition, the ID utilities have historically provided several
+query programs which are specializations of `lid':
+
+`gid'
+     (alias for `lid -R grep') lists all lines containing the requested
+     pattern.
 
-`idx'
-     helps with testing of new `mkid' scanners.
+`eid'
+     (alias for `lid -R edit') invokes an editor on all files
+     containing the requested pattern, and if possible, initiates a
+     text search for that pattern.
+
+`aid'
+     (alias for `lid -ils') treats the requested pattern as a
+     case-insensitive literal substring.
 
-   Please report bugs to `gkm@magilla.cichlid.com'.  Remember to
+   Please report bugs to `bug-gnu-utils@gnu.ai.mit.edu'.  Remember to
 include the version number, machine architecture, input files, and any
 other information needed to reproduce the bug: your input, what you
 expected, what you got, and why it is wrong.  Diffs are welcome, but
 please include a description of the problem as well, since this is
 sometimes difficult to infer.  *Note Bugs: (gcc)Bugs.
 
-* Menu:
-
-* Past and future::       How the ID tools came about, and where they're going.
-
-
-File: id-utils.info,  Node: Past and future,  Up: Introduction
-
-Past and future
-===============
-
-   Greg McGary conceived of the ideas behind mkid when he began hacking
-the Unix kernel in 1984.  He needed a navigation tool to help him find
-his way around the expansive, unfamiliar landscape.  The first
-`mkid'-like tools were shell scripts, and produced an ASCII database
-that looks much like the output of `lid' with no arguments.  It took
-over an hour on a VAX 11/750 to build a database for a 4.1BSD-ish
-kernel.  Lookups were done with the system utility `look', modified to
-handle very long lines.
-
-   In 1986, Greg rewrote `mkid', `lid', `fid' and `idx' in C to improve
-performance.  Database-build times were shortened by an order of
-magnitude.  The `mkid' tools were first posted to `comp.sources.unix'
-in September 1987.
-
-   Over the next few years, several versions diverged from the original
-source.  Tom Horsley at Harris Computer Systems Division stepped forward
-to take over maintenance and integrated some of the fixes from divergent
-versions.  A first release of `mkid' version 2 was posted to
-`alt.sources' near the end of 1990.  At that time, Tom wrote this
-Texinfo manual with the encouragement the net community.  (Tom
-especially thanks Doug Scofield and Bill Leonard whom he dragooned into
-helping poorfraed and edit--they found several problems in the initial
-version.)  Karl Berry revamped the manual for Texinfo style, indexing,
-and organization in 1995.
-
-   In January 1995, Greg McGary reemerged as the primary maintaner and
-launched development of `mkid' version 3, whose primary new feature is
-an efficient algorithm for building databases that is linear in both
-time and space over the size of the input text.  (The old algorithm was
-quadratic in space and therefore choked on very large source trees.)
-The code is released under the GNU Public License, and might become a
-part of the GNU system.  `mkid' 3 is an interim release, since several
-significant enhancements are still in the works: an optional coupling
-with GNU `grep', so that `grep' can use an ID database for hints; a
-`cscope' work-alike query interface; incremental update of the ID
-database; and an automatic file-tree walker so you need not explicitly
-supply every filename argument to the `mkid' program.
-
-
-File: id-utils.info,  Node: mkid invocation,  Next: Common query arguments,  Prev: Introduction,  Up: Top
-
-`mkid': Creating ID databases
-*****************************
-
-   The `mkid' program builds an ID database.  To do this, it must scan
-each file you tell it to include in the database.  This takes some time,
-but once the work is done the query programs run very rapidly.  (You can
-run `mkid' as a `cron' job to regularly update your databases.)
-
-   The `mkid' program knows how to extract identifiers from various
-types of files.  For example, it can recognize and skip over comments
-and string constants in a C program.
-
-   Identifiers are not the only thing included in the database.  Numbers
-are also recognized and included in the database indexed by their binary
-value.  This feature allows you to find uses of constants without regard
-to the radix used to specify them, since the same number can frequently
-be written in many different ways (for instance, `47', `0x2f', `057' in
-C).
-
-   All the places in this document which mention identifiers should
-really mention both identifiers and numbers, but that gets fairly
-clumsy after a while, so you just need to keep in mind that numbers are
-included in the database as well as identifiers.
-
-   The ID files that `mkid' creates are architecture- and
-byte-order-independent; you can share them at will across systems.
-
-* Menu:
-
-* mkid options::                Command-line options to mkid.
-* Scanners::                    Built-in and defining your own.
-* mkid examples::               Examples of mkid usage.
-
 
-File: id-utils.info,  Node: mkid options,  Next: Scanners,  Up: mkid invocation
+File: id-utils.info,  Node: Quick start,  Next: Common options,  Prev: Introduction,  Up: Top
 
-`mkid' options
-==============
-
-   By default, `mkid' scans the files you specify and writes the
-database to a file named `ID' in the current directory.
-
-     mkid [-v] [-SSCANARG] [-aARGFILE] [-] [-fIDFILE] FILES...
+Quick Start Procedure
+*********************
 
-   The program accepts the following options.
+     Unpack the distribution.
 
-`-v'
-     Verbose.  `mkid' tells you as it scans each file and indicates
-     which scanner it is using.  It also summarizes some statistics
-     about the database at the end.
+     Type `./configure'
 
-`-SSCANARG'
-     Specify options regarding `mkid''s scanners.  *Note Scanner option
-     formats::.
+     Type `make'
 
-`-aARGFILE'
-     Read additional command line arguments from ARGFILE.  This is
-     typically used to specify lists of filenames longer than will fit
-     on a command line; some systems have severe limitations on the
-     total length of a command line.
+     Type `make install' as a user with the appropriate privileges
+     (e.g., `bin' or perhaps even `root').
 
-`-'
-     Read additional command line arguments from standard input.
+     Type `cd /usr/include; mkid' to build an ID database covering all
+     of the system header files.
 
-`-fIDFILE'
-     Write the database to the file IDFILE, instead of `ID'.  The
-     database stores filenames relative to the directory containing the
-     database, so if you move the database to a different directory
-     after creating it, you may have trouble finding files.
+     Type `lid FILE', then `gid strtok', then `aid stdout'.
 
-   The remaining arguments FILES are the files to be scanned and
-included in the database.  If no files are given at all (either on
-command line or via `-a' or `-'), `mkid' does nothing.
+   You have just built, installed and used the most common commands of
+the GNU ID utilities.  If you ever need help remembering which system
+header files contain a particular declaration, or reference a
+particular symbol, you'll want to keep the ID file you built in
+`/usr/include' for later use.  If your working directory is elsewhere
+at the time, simply provide the `-f /usr/include' option to `lid'
+(*note Reading options::.).
 
 
-File: id-utils.info,  Node: Scanners,  Next: mkid examples,  Prev: mkid options,  Up: mkid invocation
+File: id-utils.info,  Node: Common options,  Next: mkid invocation,  Prev: Quick start,  Up: Top
 
-Scanners
-========
+Common command-line options
+***************************
 
-   To determine which identifiers to extract from a file and store in
-the database, `mkid' calls a "scanner"; we say a scanner "recognizes" a
-particular language.  Scanners for several languages are built-in to
-`mkid'; you can add your own scanners as well, as explained in the
-sections below.
-
-   `mkid' determines which scanner to use for a particular file by
-looking at the suffix of the filename.  This "suffix" is everything
-after and including the last `.' in a filename; for example, the suffix
-of `foo.c' is `.c'.  `mkid' has a built-in list of bindings from some
-suffixes to corresponding scanners; for example, `.c' files are (not
-surprisingly) scanned by the predefined C language scanner.
-
-   If `mkid' cannot determine what scanner to use for a particular
-file, either because the file has no suffix (e.g., `foo') or because
-`mkid' has no binding for the file's suffix (e.g., `foo.bar'), it uses
-the scanner bound to the `.default' suffix.  By default, this is the
-plain text scanner (*note Plain text scanner::.), but you can change
-this with the `-S' option, as explained below.
+   Certain options, and regular expression syntax, are shared by various
+groupings of the ID utilities.  We describe these in the sections below,
+rather than repeating them for each program.
 
 * Menu:
 
-* Scanner option formats::      Overview of the -S option.
-* Predefined scanners::         The C, plain text, and assembler scanners.
-* Defining new scanners::       Either in source code or at runtime with -S.
-* idx invocation::              Testing mkid scanners.
+* Universal options::     Options common to all programs.
+* Extraction options::    Options for programs that extract tokens from source files.
+* Walker options::        Options for programs that walk file and directory trees.
+* Reading options::       Options for programs that read ID databases.
+* Writing options::       Options for programs that write ID databases.
+* File listing options::  Options for programs that list file names.
 
 
-File: id-utils.info,  Node: Scanner option formats,  Next: Predefined scanners,  Up: Scanners
-
-Scanner option formats
-----------------------
-
-   With the `-S' option, you can change which language scanner to use
-for which files, give language-specific options, and get some limited
-online help about scanner options.
-
-   Here are the different forms of the `-S' option:
+File: id-utils.info,  Node: Universal options,  Next: Extraction options,  Up: Common options
 
-`-S.SUFFIX=SCANNER'
-     Use SCANNER for a file with the given `.SUFFIX'.  For example,
-     `-S.yacc=c' tells `mkid' to use the `c' language scanner for all
-     files ending in `.yacc'.
+Options Common to All Programs
+==============================
 
-`-S.SUFFIX=?'
-     Display which scanner is used for the given `.SUFFIX'.
+`--help'
+     Print a usage message listing all available options, then exit
+     successfully.
 
-`-S?=SCANNER'
-     Display which suffixes SCANNER is used for.
-
-`-S?=?'
-     Display the scanner binding for every known suffix.
-
-`-SSCANNER+ARG'
-`-SSCANNER-ARG'
-     Each scanner accepts certain scanner-dependent arguments.  These
-     options all have one of these forms.  *Note Predefined scanners::.
-
-`-SSCANNER?'
-     Display the scanner-specific options accepted by SCANNER.
-
-`-SNEW-SCANNER/OLD-SCANNER/FILTER-COMMAND'
-     Define NEW-SCANNER in terms of OLD-SCANNER and FILTER-COMMAND.
-     *Note Defining scanners with options::.
+`--version'
+     Print the version number, then exit successfully.
 
 
-File: id-utils.info,  Node: Predefined scanners,  Next: Defining new scanners,  Prev: Scanner option formats,  Up: Scanners
+File: id-utils.info,  Node: Reading options,  Next: Writing options,  Prev: Walker options,  Up: Common options
 
-Predefined scanners
--------------------
+Options for Programs that Read ID Databases
+===========================================
 
-   `mkid' has built-in scanners for several types of languages; you can
-get the list by running `mkid -S?=?'.  The supported languages are
-documented below(1).
+`-f FILENAME'
+`--file=FILENAME'
+     FILENAME is the ID database to read when processing queries.  At
+     present, only a single `--file' option is processed, but in future
+     releases, more than one ID database may be named on the command
+     line.
 
-* Menu:
+`$IDPATH'
+     `IDPATH' is an environment variable that contains a
+     colon-separated list of ID database names.  If this variable is
+     present, and no `--file' options are presented on the command
+     line, the ID databases named in `IDPATH' are implied.(1)
 
-* C scanner::                   For the C programming language.
-* Plain text scanner::          For documents or other non-source code.
-* Assembler scanner::           For assembly language.
+   If no ID databases are specified either on the command line or via
+the `IDPATH' environment variable, then the ID utilities search for a
+file named `ID' in the current working directory, and then in
+successive parent directories.
 
    ---------- Footnotes ----------
 
-   (1)  This is not strictly true: `vhil' is a supported language, but
-it is an obsolete and arcane dialect of C and should be ignored.
+   (1)  At present, this feature is fully implemented, since only the
+first of a list of ID database names is processed.
 
 
-File: id-utils.info,  Node: C scanner,  Next: Plain text scanner,  Up: Predefined scanners
-
-C scanner
-.........
+File: id-utils.info,  Node: Writing options,  Next: File listing options,  Prev: Reading options,  Up: Common options
 
-   The C scanner is the most commonly used.  Files with the usual `.c'
-and `.h' suffixes, and the `.y' (yacc) and `.l' (lex) suffixes, are
-processed with this scanner (by default).
+Options for Programs that Write ID Databases
+============================================
 
-   Scanner-specific options:
+`-o FILENAME'
+`--output=FILENAME'
+     The `--output' option names the file in which to write a new ID
+     database.  If no `--output' (or `--file') option is present, an
+     output file named `ID' is implied.
 
-`-Sc-sCHARACTER'
-     Allow the specified CHARACTER in identifiers. For example, if you
-     use `$' in identifiers, you'll want to use `-Sc-s$'.
-
-`-Sc+u'
-     Strip leading underscores from identifiers. You might to do this in
-     peculiar circumstances, such as trying to parse the output from
-     `nm' or some other system utility.
-
-`-Sc-u'
-     Don't strip leading underscores from identifiers; this is the
-     default.
+`-f FILENAME'
+`--file=FILENAME'
+     This is a synonym for `--output'
 
 
-File: id-utils.info,  Node: Plain text scanner,  Next: Assembler scanner,  Prev: C scanner,  Up: Predefined scanners
-
-Plain text scanner
-..................
-
-   The plain text scanner is intended for scanning most non-source-code
-files.  This is typically the scanner used when adding custom scanners
-via `-S' (*note Defining scanners with options::.).
+File: id-utils.info,  Node: Walker options,  Next: Reading options,  Prev: Extraction options,  Up: Common options
 
-   Scanner-specific options:
+Options for Programs that Walk File and Directory Trees.
+========================================================
 
-`-Stext+aCHARACTER'
-     Include CHARACTER in identifiers.  By default, letters (a-z and
-     A-Z) and underscore are included.
+   The programs `mkid' and `xtokid' accept the names of files and
+directories on the command line.  Files are scanned if there is a
+scanner available and enabled for the file's source language.
+Directories are recursively descended, searching for files whose names
+match the rules listed in the *language map* file (*note Language
+map::.).
 
-`-Stext-aCHARACTER'
-     Exclude CHARACTER from identifiers.
+   The following option controls the file tree walker:
 
-`-Stext+sCHARACTER'
-     Squeeze CHARACTER from identifiers, i.e., do not terminate an
-     identifier when CHARACTER is seen.  By default, the characters
-     `'', `-', and `.' are squeezed out of identifiers.  For example,
-     the input `fred's' leads to the identifier `freds'.
-
-`-Stext-sCHARACTER'
-     Do not squeeze CHARACTER.
+`-p NAMES'
+`--prune=NAMES'
+     One or more file or directory names may appear in NAMES.  The file
+     tree walker will stop short at these files and directories and
+     their contents will not be scanned.
 
 
-File: id-utils.info,  Node: Assembler scanner,  Prev: Plain text scanner,  Up: Predefined scanners
-
-Assembler scanner
-.................
-
-   Since assembly languages come in several flavors, this scanner has a
-number of options:
+File: id-utils.info,  Node: File listing options,  Prev: Writing options,  Up: Common options
 
-`-Sasm-cCHARACTER'
-     Define CHARACTER as starting a comment that extends to the end of
-     the input line; no default.  In many assemblers this is `;' or `#'.
+Options for Programs that List File Names
+=========================================
 
-`-Sasm+u'
-`-Sasm-u'
-     Strip (`+u') or do not strip (`-u') leading underscores from
-     identifiers.  The default is to strip them.
+   The programs `lid' and `fnid' can print lists of file names as the
+result of queries.  The following option controls how these lists are
+formatted:
 
-`-Sasm+aCHARACTER'
-     Allow CHARACTER in identifiers.
+`-S STYLE'
+`--separator=STYLE'
+     STYLE may be one of `braces', `space' or `newline'.
 
-`-Sasm-aCHARACTER'
-     Allow CHARACTER in identifiers, but if an identifier contains
-     CHARACTER, ignore it. This is useful to ignore temporary labels,
-     which can be generated in great profusion; these often contain `.'
-     or `@'.
+     The STYLE of `braces' means that file names with common directory
+     prefix and common suffix are printed using the shell's brace
+     notation in order to compress the output.  For example,
+     `../src/foo.c ../src/bar.c' can be printed in brace notation as
+     `../src/{foo,bar}.c'.
 
-`-Sasm+p'
-`-Sasm-p'
-     Recognize (`+p') or do not recognize (`-p') C preprocessor
-     directives in assembler source. The default is to recognize them.
+     The STYLEs of `space' and `newline' mean that file names are
+     separated spaces or by newlines, respectively.
 
-`-Sasm+C'
-`-Sasm-C'
-     Skip over (`+C') or do not skip over (`-C') C style comments in
-     assembler source.  The default is to skip them.
+     If the list of files is being printed on a terminal, brace
+     notation is the default.  If not, file names are separated by
+     spaces if the KEY is included in the output, and by newlines the
+     KEY STYLE is `none' (*note lid invocation::.).
 
 
-File: id-utils.info,  Node: Defining new scanners,  Next: idx invocation,  Prev: Predefined scanners,  Up: Scanners
-
-Defining new scanners
----------------------
-
-   You can add new scanners to `mkid' in two ways: modify the source
-code and recompile, or at runtime via the `-S' option.  Each has their
-advantages and disadvantages, as explained below.
+File: id-utils.info,  Node: Extraction options,  Next: Walker options,  Prev: Universal options,  Up: Common options
+
+Options for Programs that Scan Source Files
+===========================================
+
+   `mkid' and `xtokid' walk file trees, select source files by name,
+and extract tokens from source files.  They accept the following
+options:
+
+`-m MAPFILE'
+`--lang-map=MAPFILE'
+     MAPFILE contains rules for determining the source languages from
+     file names.  *Note Language map::
+
+`-i LANGUAGES'
+`--include=LANGUAGES'
+     The `--include' option names LANGUAGES whose source files should
+     be scanned and incorporated into the ID database.  By default, all
+     languages known to the ID utilities are enabled.
+
+`-x LANGUAGES'
+`--exclude=LANGUAGES'
+     The `--exclude' option names LANGUAGES whose source files should
+     NOT be scanned.  The default list of excluded languages is empty.
+     Note that only one of `--include' or `--exclude' may be specified
+     on the command line for a single run.
+
+`-l LANGUAGE:OPTIONS'
+`--lang-option=LANGUAGE:OPTIONS'
+     Language-specific scanners also accept options.  LANGUAGE denotes
+     the desired scanner, and OPTION are the command-line options that
+     should be passed through to it.  For example, to pass the -X
+     -COKE-BOTTLE options to the scanner for the language SWIZZLE, pass
+     this: -L SWIZZLE:"-X -COKE-BOTTLE", or this:
+     -LANG-OPTION=SWIZZLE:"-X -COKE-BOTTLE", or this: -L SWIZZLE-X -L
+     SWIZZLE:-COKE-BOTTLE.  Use the `--help' option to see the
+     command-line option summary for
+
+   To determine which tokens to extract from a file and store in the
+database, `mkid' calls a "scanner"; we say a scanner "recognizes" a
+particular language.  Scanners for several languages are built-in to
+`mkid'; you can add your own scanners as well, as explained in *Note
+Defining scanners::.
 
-   If you create a new scanner that would be of use to others, please
-consider sending it back to the maintainer, `gkm@magilla.cichlid.com',
-for inclusion in future releases of `mkid'.
+   The ID utilities determine which scanner to use for a particular
+file by consulting the language-map file.  Scanners for several are
+already built-in to the ID utilities.  You can see which languages have
+built-in scanners, and examine their language-specific options by
+invoking `mkid --help' or `xtokid --help'.
 
 * Menu:
 
-* Defining scanners in source code::
-* Defining scanners with options::
+* Language map::                Mapping file names to source languages.
+* C/C++ scanner::               For the C and C++ programming language.
+* Assembler scanner::           For assembly language.
+* Text scanner::                For documents or other non-source code.
+* Defining scanners::           Defining new scanners in the source code.
 
 
-File: id-utils.info,  Node: Defining scanners in source code,  Next: Defining scanners with options,  Up: Defining new scanners
-
-Defining scanners in source code
-................................
-
-   To add a new scanner in source code, you should add a new section to
-the file `scanners.c'.  Copy one of the existing scanners (most likely
-either C or plain text), and modify as necessary.  Also add the new
-scanner to the `languages_0' and `suffixes_0' tables near the beginning
-of the file.
-
-   This is not a terribly difficult programming task, but it requires
-recompiling and installing the new version of `mkid', which may be
-inconvenient.
-
-   This method leads to scanners which operate much more quickly than
-ones that depend on external programmers.  It is also likely the
-easiest way to define scanners for new programming languages.
+File: id-utils.info,  Node: Language map,  Next: C/C++ scanner,  Up: Extraction options
+
+Mapping file names to source languages
+--------------------------------------
+
+   The file `id-lang.map', installed by default in
+`$(prefix)/share/id-lang.map', contains rules for mapping file names to
+source languages.  Each rule comprises three parts: a shell GLOB
+pattern, a language name, and language-specific scanner options.
+
+   The special pattern `**' denotes the default source language.  This
+is the language that's assigned to file names that don't match any other
+pattern.
+
+   The special pattern `***' should be followed by a file name.  The
+named file should contain more language-map rules and is included at
+this point.
+
+   The order in which rules are presented in a language-map file is
+significant.  This order influences the order in which files are
+displayed as the result of queries.  For example, the distributed
+language-map file places all rules for C .H files ahead of .C files, so
+that in general, declarations will precede definitions in query output.
+The same thing is done for C++ and its many different source file name
+extensions.
+
+   Here is a pared-down version of the `id-lang.map' file distributed
+with the ID utilities:
+
+
+     # Default language
+     **			IGNORE	# Although this is listed first,
+     				# the default language pattern is
+     				# logically matched last.
+     
+     # Backup files
+     *~			IGNORE
+     *.bak			IGNORE
+     *.bk[0-9]		IGNORE
+     
+     # SCCS files
+     [sp].*			IGNORE
+     
+     # list header files before code files
+     *.h			C
+     *.h.in			C
+     *.H			C++
+     *.hh			C++
+     *.hpp			C++
+     *.hxx			C++
+     
+     # list C `meta' files next
+     *.l			C
+     *.lex			C
+     *.y			C
+     *.yacc			C
+     
+     # list C code files after header files
+     *.c			C
+     *.C			C++
+     *.cc			C++
+     *.cpp			C++
+     *.cxx			C++
+     
+     # list assembly language after C
+     *.[sS]			asm --comment=;
+     *.asm			asm --comment=;
+     
+     # [nt]roff
+     *.[0-9]			roff
+     *.ms			roff
+     *.me			roff
+     *.mm			roff
+     
+     # TeX and friends
+     *.tex			TeX
+     *.ltx			TeX
+     *.texi			texinfo
+     *.texinfo		texinfo
 
 
-File: id-utils.info,  Node: Defining scanners with options,  Prev: Defining scanners in source code,  Up: Defining new scanners
-
-Defining scanners with options
-..............................
-
-   You can use the `-S' option on the command line to define a new
-language scanner:
-
-     -SNEW-SCANNER/EXISTING-SCANNER/FILTER
-
-Here, NEW-SCANNER is the name of the new scanner being defined,
-EXISTING-SCANNER is the name of an existing scanner, and FILTER is a
-shell command or pipeline.
+File: id-utils.info,  Node: C/C++ scanner,  Next: Assembler scanner,  Prev: Language map,  Up: Extraction options
 
-   The new scanner works by passing the input file to FILTER, and then
-arranging for the result to be passed through EXISTING-SCANNER.
-Typically, EXISTING-SCANNER is `text'.
-
-   Somewhere within FILTER, the string`%s' should occur.  This `%s' is
-replaced by the name of the source file being scanned.
-
-   For example, `mkid' has no built-in scanner for Texinfo files (like
-this one).  In indexing a Texinfo file, you most likely would want to
-ignore the Texinfo @-commands. Here's one way to specify a new scanner
-to do this:
-
-     -S/texinfo/text/sed s,@[a-z]*,,g %s
-
-   This defines a new language scanner (`texinfo') defined in terms of
-a `sed' command to strip out Texinfo directives (an `@' character
-followed by letters).  Once the directives are stripped, the remaining
-text is run through the plain text scanner.
+C/C++ Language Scanner
+----------------------
 
-   This is a minimal example; to do a complete job, you would need to
-completely delete some lines, such as those beginning with `@end' or
-@node.
+   The C scanner is the most commonly used.  Files that match the glob
+pattern `*.h', `*.c', as well as `yacc' files that match `*.y' or
+`*.yacc', and `lex' files that match `*.l' or `*.lex', are processed
+with this scanner.
+
+   Scanner-specific options (Note, these options are presented WITHOUT
+the required `-l' or `--lang-option=' prefix):
+
+`-k CHARACTER-CLASS'
+`--keep=CHARACTER-CLASS'
+     Consider the characters in CHARACTER-CLASS as valid constituents of
+     identifier names.  For example, if you are indexing C code that
+     contains `$' in some of its identifiers, you can include these by
+     using `--lang-option=C:--keep=$', or `-l C:"-k $"' (if you don't
+     like to type so much).
+
+`-i CHARACTER-CLASS'
+`--ignore=CHARACTER-CLASS'
+     x mkiConsider the characters in CHARACTER-CLASS as valid
+     constituents of identifier names, but discard all tokens
+     containing these characters.  For example, if some C code has
+     identifiers containing `$', but you don't want these cluttering up
+     your ID database, use `--lang-option=C:--ignore=$', or the terser
+     equivalent `-l C:"-i $"'.
+
+`-u'
+`--strip-underscore'
+     Strip one leading underscore from C identifiers encapsulated as
+     character strings.  This option is useful if you are indexing C
+     code that contains symbol-table name strings for systems that
+     prepend an underscore to external symbols.  By default, the
+     leading underscore is retained.
 
 
-File: id-utils.info,  Node: idx invocation,  Prev: Defining new scanners,  Up: Scanners
-
-`idx': Testing `mkid' scanners
-------------------------------
-
-   `idx' prints the identifiers found in the files you specify to
-standard output. This is useful in debugging new `mkid' scanners (*note
-Scanners::.). Synopsis:
+File: id-utils.info,  Node: Assembler scanner,  Next: Text scanner,  Prev: C/C++ scanner,  Up: Extraction options
+
+Assembly Language Scanner
+-------------------------
+
+   Assembly languages use a variety of commenting conventions, and
+allow a variety of special characters to *dirty up* local symbols,
+preventing name space conflicts with symbols defined by higher-level
+languages.  Also, some compilation systems prepend an underscore to
+external symbols.  The options listed below are designed to address
+these differences.
+
+`-c CHARACTER-CLASS'
+`--comment=CHARACTER-CLASS'
+     The characters in CHARACTER-CLASS are considered left delimiters
+     for comments that extend until the end of the current line.
+
+`-k CHARACTER-CLASS'
+`--keep=CHARACTER-CLASS'
+     Consider the characters of CHARACTER-CLASS as valid constituents of
+     identifier names.  For example, if you are indexing assembly code
+     that prepends `.' to assembler directives, and prepends `%' to
+     register names, you can keep these characters in the tokens by
+     specifying `--lang-option=asm:--keep=.%', or `-l asm:"-k .%"'.
+
+`-i CHARACTER-CLASS'
+`--ignore=CHARACTER-CLASS'
+     Consider the characters of CHARACTER-CLASS as valid consituents of
+     identifier names, but discard all tokens containing these
+     characters.  For example, if you don't want to clutter your ID
+     database with assembler directives that begin with a leading `.'
+     or with assembler labels that contain `@', use
+     `--lang-option=asm:--ignore=.@', or `-l asm:"-i .@"'.
+
+`-u'
+`--strip-underscore'
+     Strip one leading underscore from identifiers.  This option is
+     useful if your compilation system prepends an underscore to
+     external symbols.  By stripping the underscore, you can
+     canonicalize such names and bring them into conformance the way
+     they are expressed in the C language.  By default, the leading
+     underscore is retained.
 
-     idx [-SSCANARG] FILES...
-
-   `idx' accepts the same `-S' options as `mkid'.  *Note Scanner option
-formats::.
-
-   The name "idx" stands for "ID eXtract".  The name may change in
-future releases, since this is such an infrequently used program.
+`-n'
+`--no-cpp'
+     Do not recognize C preprocessor directives.  By default, such
+     lines are handled in the same way as they are by the C language
+     scanner.
 
 
-File: id-utils.info,  Node: mkid examples,  Prev: Scanners,  Up: mkid invocation
-
-`mkid' examples
-===============
-
-   The simplest example of `mkid' is something like:
-
-     mkid *.[chy]
-
-   This will build an ID database indexing identifiers and numbers in
-the all the `.c', `.h', and `.y' files in the current directory.
-Because `mkid' already knows how to scan files with those suffixes, no
-additional options are needed.
+File: id-utils.info,  Node: Text scanner,  Next: Defining scanners,  Prev: Assembler scanner,  Up: Extraction options
 
-   Here's a more complex example. Suppose you want to build a database
-indexing the contents of all the `man' pages, and furthur suppose that
-your system is using `gzip' (*note Top: (gzip)Top.) to store compressed
-`cat' versions of the `man' pages in the directory `/usr/catman'.  The
-`gzip' program creates files with a `.gz' suffix, so you must tell
-`mkid' how to scan `.gz' files.  Here are the commands to do the job:
+Text Scanner
+------------
 
-     cd /usr/catman
-     find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man -
+   The plain text scanner is intended for human-language documents, or
+as the scanner of last resort for files that have no scanner that is
+more specific.  It is customizable to the extent that character classes
+can be designated as token constituents or as token delimiters.  The
+default token constituents are the alpha-numerics; all other characters
+are considered token delimiters.
 
-Explanation:
+`-i CHARACTER-CLASS'
+`--include=CHARACTER-CLASS'
+     Include characters belonging to CHARACTER-CLASS in tokens.
 
-  1. We first `cd' to `/usr/catman' so the ID database will store the
-     correct relative filenames.
+`-x CHARACTER-CLASS'
+`--exclude=CHARACTER-CLASS'
+     Exclude characters belonging to CHARACTER-CLASS from tokens, i.e.,
+     treat them as token delimiters.
 
-  2. The `find' command prints the names of all `.gz' files under the
-     current directory.  *Note find invocation: (sh-utils)find
-     invocation.
-
-  3. This list is piped to `mkid'; the `-' option (at the end of the
-     line) tells `mkid' to read arguments (in this case, as is typical,
-     the list of filenames) from standard input.  *Note mkid options::.
+
+File: id-utils.info,  Node: Defining scanners,  Prev: Text scanner,  Up: Extraction options
 
-  4. The `-Sman/text/gzip ...' defines a new language `man' in terms of
-     the `gzip' program and `mkid''s existing text scanner.  *Note
-     Defining scanners with options::.
+Defining New Scanners in the Source Code
+----------------------------------------
 
-  5. The `-S.gz=man' tells `mkid' to treat all `.gz' files as this new
-     language `man'.  *Note Scanner option formats::.
+   To add a new scanner in source code, you should add a new section to
+the file `scanners.c'.  It might be easiest to clone one of the
+existing scanners and modify it as necessary.  For the hypothetical
+language FOO, you must define the functions `get_token_foo',
+`parse_args_foo', `help_me_foo', as well as the tables
+`long_options_foo' and `args_foo'.  If your scanner is modelled after
+one of the existing scanners, you'll also need a character-attribute
+table `ctype_foo'.
 
+   This is not a terribly difficult programming task, but it requires
+recompiling and installing the new version of `mkid' and `xtokid'.  You
+should use `xtokid' to test the operation of the new scanner.
 
-   As a further complication, `cat' pages typically contain underlining
-and backspace sequences, which will confuse `mkid'.  To handle this,
-the `gzip' command becomes a pipeline, like this:
+   Once these functions and tables are ready, add function prototypes
+and an entry to to the `languages_0' table near the beginning of the
+file.
 
-     mkid '-Sman/text/gzip <%s | col -b' -S.gz=man -
+   Be warned that the existing scanners are built for speed, not
+elegance or readability.  You might wish to create a new scanner that's
+easier to read and understand if you don't feel that speed is so
+important.
 
 
-File: id-utils.info,  Node: Common query arguments,  Next: gid invocation,  Prev: mkid invocation,  Up: Top
-
-Common query arguments
-**********************
-
-   Certain options, and regular expression syntax, are shared by the ID
-query tools.  So we describe those things in the sections below, instead
-of repeating the description for each tool.
+File: id-utils.info,  Node: mkid invocation,  Next: lid invocation,  Prev: Common options,  Up: Top
+
+`mkid': Creating an ID Database
+*******************************
+
+   `mkid' builds an ID database.  It accepts the names of files and/or
+directories on the command line, selects files that have an enabled
+scanner, then extracts and stores tokens from those files.  The
+resulting ID database is architecture- and byte-order-independent so it
+can be shared among all systems.
+
+   The primary virtues of `mkid' are speed and high capacity.  The size
+of the source trees it can index is limited only by available system
+memory.  `mkid''s indexing algorithm is very space-efficient and
+exhibits excellent locality-of-reference, and so is capable of
+operating with a working-set size that is only half the size of its
+virtual address space.  A typical UNIX-like operating system with 16
+megabytes of system memory should be able to build an ID database
+covering approximately 12,000-14,000 source files totalling
+approximately 50-100 Megabytes.  A 66 Mhz 486 computer can build such a
+large ID database in approximately 10-15 minutes.
+
+   In a future release, `mkid' will be able to incrementally update an
+ID database much faster than it can build one from scratch.  Until this
+feature becomes available, it might be a good idea to schedule a `cron'
+job to regularly update large ID databases during off-hours.
+
+   `mkid' writes the ID file, therefore it accepts the `--output' (and
+`--file') options as described in *Note Writing options::.  `mkid'
+extracts tokens from source files, therefore it accepts the
+`--lang-map', `--include', `--exclude', and `--lang-option' options, as
+well as the language-specific scanner options, all of which are
+described in *Note Extraction options::.  `mkid' walks file trees,
+therefore it handles file and directory names on its command line and
+the `--prune' option as described in *Note Walker options::.
+
+   In addition, `mkid' accepts the following command-line options:
+
+`-s'
+`--statistics'
+     `mkid' reports statistics about resource usage at the end of its
+     run.
 
-* Menu:
-
-* Query options::               -f -r -c -ew -kg -n -doxa -m -F -u.
-* Patterns::                    Regular expression syntax for searches.
-* Examples: Query examples.     Some common uses.
+`-v'
+`--verbose'
+     `mkid' reports statistics about each file as it is scanned, and
+     about the resource usage of its indexing algorithm at regular
+     intervals.
 
 
-File: id-utils.info,  Node: Query options,  Next: Patterns,  Up: Common query arguments
-
-Query options
-=============
-
-   The ID query tools (*not* `mkid') share certain command line
-options.  Not all of these options are recognized by all programs, but
-if an option is used by more than one program, it is described below.
-The description of each program gives the options that program uses.
-
-`-fIDFILE'
-     Read the database from IDFILE, in the current directory or in any
-     directory above the current directory.  The default database name
-     is `ID'.  Searching parent directories lets you have a single ID
-     database at the root of a large source tree and then use the query
-     tools from anywhere within that tree.
-
-`-rDIRECTORY'
-     Find files relative to DIRECTORY, instead of the directory in
-     which the ID database was found.  This is useful if the ID
-     database was moved after its creation.
-
-`-c'
-     Equivalent to `-r`pwd`', i.e., find files relative to the current
-     directory, instead of the directory in which the ID database was
-     found.
-
-`-e'
-`-w'
-     `-e' forces pattern arguments to be treated as regular expressions,
-     and `-w' forces pattern arguments to be treated as constant
-     strings.  By default, the query tools guess whether a pattern is
-     regular expressions or constant strings by looking for special
-     characters.  *Note Patterns::.
-
-`-k'
-`-g'
-     `-k' suppresses use of shell brace notation in the output.  By
-     default, the query tools that generate lists of filenames attempt
-     to compress the lists using the usual shell brace notation, e.g.,
-     `{foo,bar}.c' to mean `foo.c' and `bar.c'.  (This is useful if you
-     use `ksh' or the original (not GNU) `sh' and want to feed the list
-     of names to another command, since those shells do not support
-     this brace notation; the name of the `-k' option comes from the
-     `k' in `ksh').
-
-     `-g' turns on use of brace notation; this is only needed if the
-     query tools were compiled with `-k' as the default behavior.
+File: id-utils.info,  Node: lid invocation,  Next: fid invocation,  Prev: mkid invocation,  Up: Top
+
+`lid': Querying an ID Database by Token
+***************************************
+
+   The `lid' program accepts PATTERNS on the command line which it
+matches against the tokens stored in an ID database.  The
+interpretation of a PATTERN is determined by the makeup of the PATTERN
+string itself, or can be overridden by command-line options.  If a
+PATTERN contains regular expression meta-characters, it is used to
+perform a regular-expression substring search.  If no such
+meta-characters are present, PATTERN is used to perform a literal word
+search.  (By default, all searches are sensitive to alphabetic case.)
+If no PATTERN is supplied on the command line, `lid' lists every entry
+in the ID database.
+
+   `lid' reads the ID database, therefore it accepts the `--file'
+option, and consults the `IDPATH' environment variable, as described in
+*Note Reading options::.  `lid' lists file names, therefore it accepts
+the `--separator' option, as described in *Note File listing options::.
+
+   In addition, `lid' accepts the following command-line options:
+
+`-i'
+`--ignore-case'
+     Ignoring differences in alphabetic case between the PATTERN and
+     the tokens in the ID database.
+
+`-l'
+`--literal'
+     Match PATTERN as a literal string.  Use this option if PATTERN
+     contains regular-expression meta-characters, but you don't wish to
+     perform a regular-expression search.
+
+`-r'
+`--regexp'
+     Match PATTERN as an *extended* regular expression(1).  Use this
+     option if no regular-expression expression meta-characters are
+     present in PATTERN, but you wish to force a regular-expression
+     search (note: in this case, a *literal substring* search might be
+     faster).
 
-`-n'
-     Suppress the matching identifier before each list of filenames
-     that the query tools output by default. This is useful if you want
-     a list of just the names to feed to another command.
+`-w'
+`--word'
+     Match PATTERN using a word-delimited (non substring) search.  This
+     is the default for literal searches.
+
+`-s'
+`--substring'
+     Match PATTERN using a substring (non word-delimited) search.  This
+     is the default for regular expression searches.
+
+`-k STYLE'
+`--key=STYLE'
+     STYLE can be one of `token', `pattern' or `none'.  This option
+     controls how the subject of the query is presented.  This is best
+     illustrated by example:
+
+          $ lid --key=token '^dest.'
+          destaddr       libsys/memcpy.c
+          destination    libsys/regex.c
+          destlst        libsys/rx.c
+          destpos        libsys/rx.c
+          destset        libsys/rx.h libsys/rx.c
+          
+          $ lid --key=pattern '^dest.'
+          ^dest.         libsys/rx.h libsys/{memcpy,regex,rx}.c
+          
+          $ lid --key=none '^dest.'
+          libsys/rx.h libsys/{memcpy,regex,rx}.c
+
+     When `--key' is either `token' or `pattern', the first column of
+     output is a TOKEN or PATTERN, respectively.  When `--key' is
+     `none', neither of these is printed, and the file name list begins
+     immediately.  The default is `token'.
+
+`-R STYLE'
+`--result=STYLE'
+     STYLE can be one of `filenames', `grep', `edit' or `none'.  This
+     option controls how the value associated with the query's KEY
+     presented.  When STYLE is `filenames', a list of file names is
+     printed (this is the default).  When STYLE is `grep', the lines
+     that match PATTERN are printed in the same format as `egrep -n'.
+     When STYLE is `edit', the file names are passed to an editor, and
+     if possible PATTERN is passed as an initial search string (*note
+     eid invocation::.).  When STYLE is `none', the file names are not
+     processed in any way.  This can be useful if you wish to see what
+     tokens match a PATTERN, but don't care about where they reside.
 
 `-d'
 `-o'
 `-x'
-`-a'
      These options may be used in any combination to specify the radix
      of numeric matches.  `-d' allows matching on decimal numbers, `-o'
-     on octal numbers, and `-x' on hexadecimal numbers.  The `-a'
-     option is equivalent to specifying all three; this is the default.
-     Any combination of these options may be used.
-
-`-m'
-     Merge multiple lines of output into a single line.  If your query
-     matches more than one identifier, the default is to generate a
-     separate line of output for each matching identifier.
-
-`-F-'
-`-FN'
-`-F-M'
-`-FN-M'
-     Show identifiers matching at least N and at most M times.  `-F-'
-     is equivalent to `-F1', i.e., find identifiers that appear only
-     once in the database.  (This is useful to locate identifiers that
-     are defined but never used, or used once and never defined.)
-
-`-uNUMBER'
-     List identifiers that conflict in the first NUMBER characters.
-     This could be in useful porting programs to brain-dead computers
-     that refuse to support long identifiers, but your best long term
-     option is to set such computers on fire.
+     on octal numbers, and `-x' on hexadecimal numbers.  Any
+     combination of these options may be used.  The default is to match
+     all three radixes.
+
+`-F RANGE'
+`--frequency=RANGE'
+     Match tokens whose occurrence count falls in RANGE.  RANGE may be
+     expressed as a single number N, or as a range N`..'M.  Either
+     limit of the range may be omitted (e.g., `..'M, or N..`..').  If
+     the lower limit N is omitted, it defaults to `1'.  If the upper
+     limit is omitted, it defaults in the present implementation to
+     `65535', the maximum value of an unsigned 16-bit integer.
+
+     Particularly useful queries are `lid -F1', which helps locate
+     identifiers that are defined but never used, or are used but never
+     defined.  Similarly, `lid -F2' can help find functions that possess
+     a prototype declaration and a definition, but are never called.
+
+`-a NUMBER'
+`--ambiguous=NUMBER'
+     List identifiers (not numbers) that are ambiguous for the first
+     NUMBER characters.  This feature might be in useful when porting
+     programs to ancient pea-brained compilers that don't support long
+     identifier names.  However, the best long-term option is to set
+     such systems on fire.
 
-
-File: id-utils.info,  Node: Patterns,  Next: Query examples,  Prev: Query options,  Up: Common query arguments
-
-Patterns
-========
-
-   "Patterns", also called "regular expressions", allow you to match
-many different identifiers in a single query.
-
-   The same regular expression syntax is recognized by all the query
-tools that handle regular expressions.  The exact syntax depends on how
-the ID tools were compiled, but the following constructs should always
-be supported:
-
-`.'
-     Match any single character.
-
-`[CHARS]'
-     Match any of the characters specified within the brackets.  You can
-     match any characters *except* the ones in brackets by typing `^'
-     as the first character.  A range of characters can be specified
-     using `-'.  For example, `[abc]' and `[a-c]' both match `a', `b',
-     or `c', and `[^abc]' matches anything *except* `a', `b', or `c'.
-
-`*'
-     Match the previous construct zero or more times.
-
-`^'
-`$'
-     `^' (`$') at the beginning (end) of a pattern anchors the match to
-     the first (last) character of the identifier.
-
-   The query programs use either the `regex'/`regcmp' or
-`re_comp'/`re_exec' functions, depending on which are available in the
-library on your system.  These do not always support the exact same
-regular expression syntax, so consult your local `man' pages to find
-out.
-
-
-File: id-utils.info,  Node: Query examples,  Prev: Patterns,  Up: Common query arguments
-
-Query examples
-==============
-
-   Here are some examples of the options described in the previous
-sections.
-
-   To restrict searches to exact matches, use `^...$'. For example:
-
-     prompt$ gid '^FILE$'
-     ansi2knr.c:144: {      FILE *in, *out;
-     ansi2knr.c:315:     FILE *out;
-     fid.c:38: FILE *id_FILE;
-     filenames.c:576: FILE *
-     ...
-
-   To show identifiers not unique in the first 16 characters:
-
-     prompt$ lid -u16
-     RE_CONTEXT_INDEP_ANCHORS regex.c
-     RE_CONTEXT_INDEP_OPS regex.c
-     RE_SYNTAX_POSIX_BASIC regex.c
-     RE_SYNTAX_POSIX_EXTENDED regex.c
-     ...
-
-   Numbers are searched for numerically rather than textually. For
-example:
-
-     prompt$ lid 0xff
-     0377           {lid,regex}.c
-     0xff           {bitops,fid,lid,mkid}.c
-     255            regex.c
-
-   On the other hand, you can restrict a numeric search to a particular
-radix if you want:
+* Menu:
 
-     laurie$ lid -x 0xff
-     0xff           {bitops,fid,lid,mkid}.c
+* lid aliases::                 Aliases for specialized lid queries
+* Emacs gid interface::         GNU Emacs query interface
+* eid invocation::              Invoking an editor on query results
 
-   Filenames in the output are always adjusted to be correct for the
-correct working directory. For example:
+   ---------- Footnotes ----------
 
-     prompt$ lid bdevsw
-     bdevsw         sys/conf.h cf/conf.c io/bio.c os/{fio,main,prf,sys3}.c
-     prompt$ cd io
-     prompt$ lid bdevsw
-     bdevsw         ../sys/conf.h ../cf/conf.c bio.c ../os/{fio,main,prf,sys3}.c
+   (1)  Extended regular expressions are the same as those accepted by
+`egrep'.
 
 
-File: id-utils.info,  Node: gid invocation,  Next: Looking up identifiers,  Prev: Common query arguments,  Up: Top
-
-`gid': Listing matching lines
-*****************************
-
-   Synopsis:
-
-     gid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN...]
-
-   `gid' finds the identifiers in the database that match the specified
-PATTERNs, then searches for all occurrences of those identifiers, in
-only the files containing matches.  In a large source tree, this saves
-an enormous amount of time (compared to searching every source file).
-
-   With no PATTERN arguments, `gid' prints every line of every source
-file.
+File: id-utils.info,  Node: lid aliases,  Next: Emacs gid interface,  Up: lid invocation
 
-   The name "gid" stands for "grep for identifiers", `grep' being the
-standard utility to search regular files.
+Aliases for Specialized `lid' Queries
+=====================================
 
-   *Note Common query arguments::, for a description of the command-line
-options and PATTERN arguments.
+   Historically, the ID utilities have provided several query interfaces
+which are specializations of `lid' (*note lid invocation::.).
 
-   `gid' uses the standard GNU output format for identifying source
-lines:
-
-     FILENAME:LINENUM: TEXT
-
-   Here is an example:
-
-     prompt$ gid FILE
-     ansi2knr.c:144: {      FILE *in, *out;
-     ansi2knr.c:315:     FILE *out;
-     fid.c:38: FILE *id_FILE;
-     ...
+`gid'
+     (alias for `lid -R grep') lists all lines containing the requested
+     pattern.
 
-* Menu:
+`eid'
+     (alias for `lid -R edit') invokes an editor on all files
+     containing the requested pattern, and optionally initiates a text
+     search for that pattern.
 
-* GNU Emacs gid interface::     Using next-error with gid.
+`aid'
+     (alias for `lid -ils') treats the requested pattern as a
+     case-insensitive literal substring.
 
 
-File: id-utils.info,  Node: GNU Emacs gid interface,  Up: gid invocation
+File: id-utils.info,  Node: Emacs gid interface,  Next: eid invocation,  Prev: lid aliases,  Up: lid invocation
 
-GNU Emacs `gid' interface
+GNU Emacs query interface
 =========================
 
-   The `mkid' source distribution comes with a file `gid.el', which
-defines a GNU Emacs interface to `gid'.  To install it, put `gid.el'
-somewhere that Emacs will find it (i.e., in your `load-path') and put
+   The `id-utils' source distribution comes with a file `id-utils.el',
+which defines a GNU Emacs interface to `gid'.  To install it, put
+`id-utils.el' somewhere that Emacs will find it (i.e., in your
+`load-path') and put
 
      (autoload 'gid "gid" nil t)
 
@@ -817,108 +779,23 @@ the places the identifier is found (*note Compilation:
 (emacs)Compilation.).
 
 
-File: id-utils.info,  Node: Looking up identifiers,  Next: pid invocation,  Prev: gid invocation,  Up: Top
-
-Looking up identifiers
-**********************
-
-   These commands look up identifiers in the ID database and operate on
-the files containing matches.
-
-* Menu:
-
-* lid invocation::              Matching patterns.
-* aid invocation::              Matching strings.
-* eid invocation::              Invoking an editor on matches.
-* fid invocation::              Listing a file's identifiers.
-
-
-File: id-utils.info,  Node: lid invocation,  Next: aid invocation,  Up: Looking up identifiers
-
-`lid': Matching patterns
-========================
-
-   Synopsis:
-
-     lid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] PATTERN...
-
-   `lid' searches the database for identifiers matching the given
-PATTERN arguments and prints the names of the files that match each
-PATTERN.  With no PATTERNs, `lid' lists every entry in the database.
-
-   The name "lid" stands for "lookup identifier".
-
-   *Note Common query arguments::, for a description of the command-line
-options and PATTERN arguments.
+File: id-utils.info,  Node: eid invocation,  Prev: Emacs gid interface,  Up: lid invocation
 
-   By default, each line of output consists of an identifier and all the
-files containing that identifier.
+`eid': Invoking an Editor on Query Results
+==========================================
 
-   Here is an example showing a search for a single identifier (omitting
-some output to keep lines short):
-
-     prompt$ lid FILE
-     FILE           extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c
-
-   This example shows a regular expression search:
-
-     prompt$ lid 'FILE$'
-     AF_FILE        mkid.c
-     AF_IDFILE      mkid.c
-     FILE           extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c
-     IDFILE         id.h {fid,lid,mkid}.c
-     IdFILE         {fid,lid}.c
-     ...
-
-As you can see, when a regular expression is used, it is possible to
-get more than one line of output.  To merge multiple lines into one,
-use `-m':
-
-     prompt$ lid -m ^get
-     ^get           extern.h {bitsvec,fid,gets0,getsFF,getscan,idx,lid,...}.c
-
-
-File: id-utils.info,  Node: aid invocation,  Next: eid invocation,  Prev: lid invocation,  Up: Looking up identifiers
-
-`aid': Matching strings
-=======================
-
-   Synopsis:
-
-     aid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] STRING...
-
-   `aid' searches the database for identifiers containing the given
-STRING arguments.  The search is case-insensitive.
-
-   The name "aid" stands for "apropos identifier", `apropros' being a
-command that does a similar search of the `whatis' database of `man'
-descriptions.
-
-   For example, `aid get' matches the identifiers `fgets', `GETLINE',
-and `getchar'.
-
-   The default output format is the same as `lid'; see the previous
-section.
-
-   *Note Common query arguments::, for a description of the command-line
-options and PATTERN arguments.
-
-
-File: id-utils.info,  Node: eid invocation,  Next: fid invocation,  Prev: aid invocation,  Up: Looking up identifiers
-
-`eid': Invoking an editor on matches
-====================================
-
-   Synopsis:
-
-     eid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN]...
-
-   `eid' runs the usual search (*note lid invocation::.) on the given
-arguments, shows you the output, and then asks:
+   `lid -R edit' is an editing interface for the ID utilities that is
+most commonly used with `vi'.  Emacs users should use the interface
+defined in `id-utils.el' (*note Emacs gid interface::.).  The ID
+utilities include an alias called `eid', and for the sake of brevity,
+we'll use this alias for the remainder of this section.  `eid' performs
+a `lid'-style, then asks if you wish to edit the files.  If your query
+yields more than one line of output, you will be prompted after each
+line.  This is the prompt you'll see:
 
      Edit? [y1-9^S/nq]
 
-You can respond with:
+You may respond with:
 
 `y'
      Edit all files listed.
@@ -926,46 +803,17 @@ You can respond with:
 `1...9'
      Edit all files starting at the N + 1'st file.
 
-`/STRING or `CTRL-S'STRING'
-     Edit all files whose name contains STRING.
+`/STRING or `CTRL-S'REGEXP'
+     Search into the file list, and begin editing with the first file
+     name that matches the regular expression REGEXP.
 
 `n'
-     Go on to the next PATTERN, i.e., edit no files for this one.
+     Don't edit any files.  If another line of query output is pending,
+     advance to that line, for which another `Edit?' prompt will appear.
 
 `q'
-     Quit `eid'.
-
-   `eid' invokes an editor once per PATTERN; all the specified files
-are given to the editor for you to edit simultaneously.
-
-   `eid' invokes the editor defined by the `EDITOR' environment
-variable.  If the editor can accept an initial search argument on the
-command line, `eid' moves automatically to the location of the match,
-via the environment variables below.
-
-   *Note Common query arguments::, for a description of the command-line
-options and PATTERN arguments.
-
-   Here are the environment variables relevant to `eid':
-
-`EDITOR'
-     The name of the editor program to invoke.
-
-`EIDARG'
-     The argument to pass to the editor to search for the matching
-     identifier.  For `vi', this should be `+/%s/''.
-
-`EIDLDEL'
-     A regular expression to force a match at the beginning of a word
-     ("left delimiter).  `eid' inserts this in front of the matching
-     identifier when composing the search argument.  For `vi', this
-     should be `\<'.
-
-`EIDRDEL'
-     The end-of-word regular expression.  For `vi', this should be `\>'.
-
-   For Emacs users, the interface in `gid.el' is probably preferable to
-`eid'.  *Note GNU Emacs gid interface::.
+     Quit--don't edit any files, and don't process any more lines of
+     query output.
 
    Here is an example:
 
@@ -977,270 +825,352 @@ options and PATTERN arguments.
 
 This will start editing at `getopt'.c.
 
-
-File: id-utils.info,  Node: fid invocation,  Prev: eid invocation,  Up: Looking up identifiers
-
-`fid': Listing a file's identifiers
-===================================
+   `eid' invokes the editor defined by the environment variable
+`VISUAL'.  If `VISUAL' is undefined, it uses the environment variable
+`EDITOR' instead.  If `EDITOR' is undefined, it defaults to `vi'.  It
+is possible for `eid' to pass the editor an initial search pattern so
+that your cursor will immediately alight on the token of interest.
+This feature is controlled by the following environment variables:
 
-   `fid' lists the identifiers found in a given file.  Synopsis:
-
-     fid [-fDBFILE] FILE1 [FILE2]
-
-`-fDBFILE'
-     Read the database from DBFILE instead of `ID'.
-
-`FILE1'
-     List all the identifiers contained in FILE1.
+`EIDARG'
+     A printf(3) format string for the editor argument to search for the
+     matching token.  For `vi', this should be `+/%s/'.
 
-`FILE2'
-     With a second file argument, list only the identifiers both files
-     have in common.
+`EIDLDEL'
+     The regular-expression meta-character(s) for delimiting the
+     beginning of a word (the ``eid' Left DELimiter').  `eid' inserts
+     this in front of the matching token when a word-search is desired.
+     For `vi', this should be `\<'.
 
-   The output is simply one identifier (or number) per line.
+`EIDRDEL'
+     The regular-expression meta-character(s) for delimiting the end of
+     a word (the ``eid' Right DELimiter').  `eid' inserts this in end
+     of the matching token when a word-search is desired.  For `vi',
+     this should be `\>'.
 
 
-File: id-utils.info,  Node: pid invocation,  Next: Index,  Prev: Looking up identifiers,  Up: Top
+File: id-utils.info,  Node: fid invocation,  Next: fnid invocation,  Prev: lid invocation,  Up: Top
 
-`pid': Looking up filenames
-***************************
+`fid': Listing a file's tokens
+******************************
 
-   `pid' matches the filenames stored in the ID database, rather than
-the identifiers.  Synopsis:
+   `fid' prints the tokens found in a given file.  If two file names
+are passed on the command line, `fid' prints the tokens that are common
+to both files (i.e., the *set intersection* of the two token sets).
 
-     pid [-fDBFILE] [-rDIR] [-ebkgnc] WILDCARD...
+   `lid' reads the ID database, therefore it accepts the `--file'
+option, and consults the `IDPATH' environment variable, as described in
+*Note Reading options::.
 
-   By default, the WILDCARD patterns are treated as shell globbing
-patterns, rather than the regular expressions the other utilities
-accept.  See the section below for details.
+   If the standard output is attached to a terminal, the printed tokens
+are separated by spaces.  Otherwise, the tokens are printed one per
+line.
 
-   Besides the standard options given in the synopsis (*note Query
-options::.), `pid' accepts the following:
+
+File: id-utils.info,  Node: fnid invocation,  Next: xtokid invocation,  Prev: fid invocation,  Up: Top
+
+`fnid': Looking up filenames
+****************************
 
-`-e'
-     Do the usual regular expression matching (*note Patterns::.),
-     instead of shell wildcard matching.
+   `fnid' queries the list of file names stored in the ID database.  It
+accepts shell *wildcard* patterns on the command line.  If no pattern
+is supplied, `*' is implied.  `fnid' prints the file names that match
+the given patterns.
 
-`-b'
-     Match the basenames of the files in the database.  For example,
-     `pid -b foo' will match the stored filename `dir/foo', but not
-     `foo/file'.
+   `fnid' prints file names, and as such accepts the `--separator'
+option as described in *Note File listing options::.
 
    For example, the command:
 
-     pid \*.c
+     fnid \*.c
 
 lists all the `.c' files in the database.  (The `\' here protects the
 `*' from being expanded by the shell.)
 
-* Menu:
+
+File: id-utils.info,  Node: xtokid invocation,  Next: Past and Future,  Prev: fnid invocation,  Up: Top
+
+`xtokid': Testing Language Scanners
+***********************************
+
+   `xtokid' accepts the names of files and/or directories on the
+command line, then extracts and prints a stream of tokens from those
+files for which it has a valid, enabled scanner.  This is useful
+primarily for debugging new `mkid' scanners (*note Defining
+scanners::.).
+
+   `xtokid' extracts tokens from source files, therefore it accepts the
+`--lang-map', `--include', `--exclude', and `--lang-option' options, as
+well as the language-specific scanner options, all of which are
+described in *Note Extraction options::.  `xtokid' walks file trees,
+therefore it handles file and directory names on its command line and
+the `--prune' option as described in *Note Walker options::.
 
-* Wildcard patterns::           Shell-style globbing patterns.
+   The name `xtokid' indicates that it is the "eXtract TOKens ID
+utility".
 
 
-File: id-utils.info,  Node: Wildcard patterns,  Up: pid invocation
+File: id-utils.info,  Node: Past and Future,  Next: Index,  Prev: xtokid invocation,  Up: Top
 
-Wildcard patterns
-=================
+Past and Future
+***************
 
-   `pid' does simplified shell wildcard matching (unless the `-e'
-option is specified), rather than the regular expression matching done
-by the other utilities.  Here is a description of wildcard matching,
-also called "globbing":
+   Greg McGary conceived of the ideas behind the ID utilities when he
+began working on the Unix kernel in 1984.  He needed a navigation tool
+to help him find his way around the expansive, unfamiliar landscape.
+The first `id-utils'-like tools were shell scripts, and produced an
+ASCII database that looks much like the output of `lid ".*"'.  It took
+over an hour on a VAX 11/750 to build a database for a 4.1BSD derived
+kernel.  The first version of `lid' used the UNIX system utility
+`look', modified to handle very long lines.
 
-   * `*' matches zero or more characters.
+   In 1986, Greg rewrote the shell scripts in C to improve performance.
+Build times for the ID file were shortened by an order of magnitude.
+The ID utilities were first posted to `comp.sources.unix' in September
+1987 under the name `id'.
+
+   Over the next few years, several versions diverged from the original
+source.  Tom Horsley at Harris Computer Systems Division stepped forward
+to take over maintenance and integrated some of the fixes from divergent
+versions.  A first release of the renamed `mkid' version 2 was posted
+to `alt.sources' near the end of 1990.  At that time, Tom wrote a
+Texinfo manual with the encouragement the net community.  (Tom
+especially thanks Doug Scofield and Bill Leonard whom he dragooned into
+helping poorfraed and edit--they found several problems in the initial
+version.)  Karl Berry revamped the manual for Texinfo style, indexing,
+and organization in 1995.
+
+   In January 1995, Greg McGary reemerged as the primary maintainer and
+launched development of `mkid' version 3, whose primary new feature is
+an efficient algorithm for building databases that is linear in both
+time and space over the size of the input text.  (The old algorithm was
+quadratic in space so it was incapable of handling very large source
+trees.)  For the first time, the code was released under the GNU Public
+License.
 
-   * `?' matches any single character.
+   In June 1996, the package was renamed again to `id-utils' and was
+released for the first time under FSF copyright as part of the GNU
+system.  All programs had their command-line arguments completely
+revised.  The `mkid' and `xtokid' programs also gained a file-tree
+walker, so that directory names can be passed on the command line
+instead of the names of every individual file.  Greg reorganized and
+rewrote most of the Texinfo manual to reflect these changes.
 
-   * `\' forces the next character to be taken literally.
+   Future releases of `id-utils' might include:
 
-   * `[CHARS]' matches any single character listed in CHARS.
+     an optional coupling with GNU `grep', so that `grep' can use an ID
+     database for hints
 
-   * `[!CHARS]' matches any character *not* listed in CHARS.
+     a `cscope' work-alike query interface
 
-   Most shells treat `/' and leading `.' characters specially. `pid'
-does not do this.  It simply matches the filename in the database
-against the wildcard pattern.
+     incremental update of the ID database.
 
 
-File: id-utils.info,  Node: Index,  Prev: pid invocation,  Up: Top
+File: id-utils.info,  Node: Index,  Prev: Past and Future,  Up: Top
 
 Index
 *****
 
 * Menu:
 
-* $ in identifiers:                     C scanner.
-* * in globbing:                        Wildcard patterns.
-* *scratch* Emacs buffer:               GNU Emacs gid interface.
-* -:                                    mkid options.
-* -a:                                   Query options.
-* -aARGFILE:                            mkid options.
-* -b:                                   pid invocation.
-* -c:                                   Query options.
-* -d:                                   Query options.
-* -e <1>:                               pid invocation.
-* -e:                                   Query options.
-* -F:                                   Query options.
-* -fIDFILE:                             Query options.
-* -g:                                   Query options.
-* -k:                                   Query options.
-* -m:                                   Query options.
-* -n:                                   Query options.
-* -o:                                   Query options.
-* -rDIRECTORY:                          Query options.
-* -S scanner option:                    Scanner option formats.
-* -S.:                                  Scanner option formats.
-* -S?:                                  Scanner option formats.
-* -SSCANARG:                            mkid options.
-* -Sasm+a:                              Assembler scanner.
-* -Sasm+C:                              Assembler scanner.
-* -Sasm+p:                              Assembler scanner.
-* -Sasm+u:                              Assembler scanner.
-* -Sasm-c:                              Assembler scanner.
-* -Sc+u:                                C scanner.
-* -Sc-s:                                C scanner.
-* -Sc-u:                                C scanner.
-* -Stext+a:                             Plain text scanner.
-* -Stext+s:                             Plain text scanner.
-* -Stext-a:                             Plain text scanner.
-* -u:                                   Query options.
-* -v:                                   mkid options.
-* -w:                                   Query options.
-* -x:                                   Query options.
-* .[chly] files, scanning:              C scanner.
-* .default scanner:                     Scanners.
-* ? in globbing:                        Wildcard patterns.
-* [!...] in globbing:                   Wildcard patterns.
-* [...] in globbing:                    Wildcard patterns.
-* \ in globbing:                        Wildcard patterns.
-* aid:                                  aid invocation.
+* *compilation* Emacs buffer:           Emacs gid interface.
+* -ambiguous:                           lid invocation.
+* -comment:                             Assembler scanner.
+* -exclude <1>:                         Text scanner.
+* -exclude:                             Extraction options.
+* -file <1>:                            Writing options.
+* -file:                                Reading options.
+* -frequency:                           lid invocation.
+* -help:                                Universal options.
+* -ignore <1>:                          Assembler scanner.
+* -ignore:                              C/C++ scanner.
+* -ignore-case:                         lid invocation.
+* -include <1>:                         Text scanner.
+* -include:                             Extraction options.
+* -keep <1>:                            Assembler scanner.
+* -keep:                                C/C++ scanner.
+* -lang-map:                            Extraction options.
+* -lang-option:                         Extraction options.
+* -lang-option=asm:-comment:            Assembler scanner.
+* -lang-option=asm:-ignore:             Assembler scanner.
+* -lang-option=asm:-keep:               Assembler scanner.
+* -lang-option=asm:-no-cpp:             Assembler scanner.
+* -lang-option=asm:-strip-underscore:   Assembler scanner.
+* -lang-option=asm:-c:                  Assembler scanner.
+* -lang-option=asm:-i:                  Assembler scanner.
+* -lang-option=asm:-k:                  Assembler scanner.
+* -lang-option=asm:-n:                  Assembler scanner.
+* -lang-option=asm:-u:                  Assembler scanner.
+* -lang-option=C:-ignore:               C/C++ scanner.
+* -lang-option=C:-keep:                 C/C++ scanner.
+* -lang-option=C:-strip-underscore:     C/C++ scanner.
+* -lang-option=C:-i:                    C/C++ scanner.
+* -lang-option=C:-k:                    C/C++ scanner.
+* -lang-option=C:-u:                    C/C++ scanner.
+* -lang-option=text:-exclude:           Text scanner.
+* -lang-option=text:-include:           Text scanner.
+* -lang-option=text:-i:                 Text scanner.
+* -lang-option=text:-x:                 Text scanner.
+* -literal:                             lid invocation.
+* -no-cpp:                              Assembler scanner.
+* -output:                              Writing options.
+* -prune:                               Walker options.
+* -regexp:                              lid invocation.
+* -result:                              lid invocation.
+* -separator:                           File listing options.
+* -statistics:                          mkid invocation.
+* -strip-underscore <1>:                Assembler scanner.
+* -strip-underscore:                    C/C++ scanner.
+* -substring:                           lid invocation.
+* -verbose:                             mkid invocation.
+* -version:                             Universal options.
+* -word:                                lid invocation.
+* -a:                                   lid invocation.
+* -c:                                   Assembler scanner.
+* -d:                                   lid invocation.
+* -F:                                   lid invocation.
+* -f <1>:                               Writing options.
+* -f:                                   Reading options.
+* -i <1>:                               lid invocation.
+* -i <1>:                               Text scanner.
+* -i <1>:                               Assembler scanner.
+* -i <1>:                               C/C++ scanner.
+* -i:                                   Extraction options.
+* -k <1>:                               lid invocation.
+* -k <1>:                               Assembler scanner.
+* -k:                                   C/C++ scanner.
+* -l <1>:                               lid invocation.
+* -l:                                   Extraction options.
+* -l asm:-comment:                      Assembler scanner.
+* -l asm:-ignore:                       Assembler scanner.
+* -l asm:-keep:                         Assembler scanner.
+* -l asm:-no-cpp:                       Assembler scanner.
+* -l asm:-strip-underscore:             Assembler scanner.
+* -l asm:-c:                            Assembler scanner.
+* -l asm:-i:                            Assembler scanner.
+* -l asm:-k:                            Assembler scanner.
+* -l asm:-n:                            Assembler scanner.
+* -l asm:-u:                            Assembler scanner.
+* -l C:-ignore:                         C/C++ scanner.
+* -l C:-keep:                           C/C++ scanner.
+* -l C:-strip-underscore:               C/C++ scanner.
+* -l C:-i:                              C/C++ scanner.
+* -l C:-k:                              C/C++ scanner.
+* -l C:-u:                              C/C++ scanner.
+* -l text:-exclude:                     Text scanner.
+* -l text:-include:                     Text scanner.
+* -l text:-i:                           Text scanner.
+* -l text:-x:                           Text scanner.
+* -m:                                   Extraction options.
+* -n:                                   Assembler scanner.
+* -o <1>:                               lid invocation.
+* -o:                                   Writing options.
+* -p:                                   Walker options.
+* -r:                                   lid invocation.
+* -s <1>:                               lid invocation.
+* -s:                                   mkid invocation.
+* -S:                                   File listing options.
+* -u <1>:                               Assembler scanner.
+* -u:                                   C/C++ scanner.
+* -v:                                   mkid invocation.
+* -w:                                   lid invocation.
+* -x <1>:                               lid invocation.
+* -x <1>:                               Text scanner.
+* -x:                                   Extraction options.
+* mkid progress:                        mkid invocation.
+* alphabetic case, ignoring differences in: lid invocation.
+* ambiguous identifier names, finding:  lid invocation.
 * architecture-independence:            mkid invocation.
 * assembler scanner:                    Assembler scanner.
-* basename match:                       pid invocation.
+* assembly language scanner:            Assembler scanner.
 * beginning-of-word editor argument:    eid invocation.
-* Berry, Karl:                          Past and future.
-* brace notation in filename lists:     Query options.
+* Berry, Karl:                          Past and Future.
 * bugs, reporting:                      Introduction.
-* C scanner, predefined:                C scanner.
-* case-insensitive searching:           aid invocation.
-* comments in assembler:                Assembler scanner.
-* common query arguments:               Common query arguments.
-* common query options:                 Query options.
-* compressed files, building ID from:   mkid examples.
-* conflicting identifiers, finding:     Query options.
-* constant strings, forcing evaluation as: Query options.
+* C scanner, predefined:                C/C++ scanner.
+* common command-line options:          Common options.
 * creating databases:                   mkid invocation.
 * cron:                                 mkid invocation.
-* cscope:                               Past and future.
-* database name, specifying:            Query options.
+* cscope:                               Past and Future.
 * databases, creating:                  mkid invocation.
-* EDITOR:                               eid invocation.
 * eid:                                  eid invocation.
 * EIDARG:                               eid invocation.
 * EIDLDEL:                              eid invocation.
 * EIDRDEL:                              eid invocation.
-* Emacs interface to gid:               GNU Emacs gid interface.
+* Emacs interface to gid:               Emacs gid interface.
 * end-of-word editor argument:          eid invocation.
-* examples of mkid:                     mkid examples.
-* examples, queries:                    Query examples.
+* exclude languages:                    Extraction options.
 * fid:                                  fid invocation.
-* filenames, matching:                  pid invocation.
-* future:                               Past and future.
-* gid Emacs function:                   GNU Emacs gid interface.
-* gid.el interface to Emacs:            GNU Emacs gid interface.
-* globbing patterns:                    Wildcard patterns.
-* grep:                                 Past and future.
-* history:                              Past and future.
-* Horsley, Tom:                         Past and future.
+* file name separator:                  File listing options.
+* file tree pruning:                    Walker options.
+* filenames, matching:                  fnid invocation.
+* fnid:                                 fnid invocation.
+* future:                               Past and Future.
+* gid Emacs function:                   Emacs gid interface.
+* grep:                                 Past and Future.
+* help, online:                         Universal options.
+* history:                              Past and Future.
+* Horsley, Tom:                         Past and Future.
+* ID database file name <1>:            Writing options.
+* ID database file name:                Reading options.
 * ID database, definition of:           Introduction.
 * ID file format:                       mkid invocation.
-* identifiers in a file:                fid invocation.
+* id-utils.el interface to Emacs:       Emacs gid interface.
+* ignoring differences in alphabetic case: lid invocation.
+* include languages:                    Extraction options.
 * introduction:                         Introduction.
-* languages_0:                          Defining scanners in source code.
+* language map file:                    Extraction options.
+* language-specific option:             Extraction options.
+* languages_0:                          Defining scanners.
 * left delimiter editor argument:       eid invocation.
-* Leonard, Bill:                        Past and future.
-* lid:                                  lid invocation.
-* load-path:                            GNU Emacs gid interface.
-* look and mkid 1:                      Past and future.
-* man pages, compressed:                mkid examples.
-* matching filenames:                   pid invocation.
-* McGary, Greg:                         Past and future.
-* mkid:                                 mkid invocation.
-* mkid options:                         mkid options.
-* multiple lines, merging:              Query options.
-* numbers, in databases:                mkid invocation.
-* numeric matches, specifying radix of: Query options.
-* numeric searches:                     Query examples.
-* options for mkid:                     mkid options.
+* Leonard, Bill:                        Past and Future.
+* load-path:                            Emacs gid interface.
+* look and mkid 1:                      Past and Future.
+* matching filenames:                   fnid invocation.
+* McGary, Greg:                         Past and Future.
+* numeric matches, specifying radix of: lid invocation.
 * overview:                             Introduction.
-* parent directories, searched for ID:  Query options.
-* patterns:                             Patterns.
-* pid:                                  pid invocation.
-* plain text scanner:                   Plain text scanner.
-* predefined scanners:                  Predefined scanners.
-* query examples:                       Query examples.
-* query options, common:                Query options.
-* radix of numeric matches, specifying: Query options.
-* regular expression syntax:            Patterns.
-* regular expressions, forcing evaluation as: Query options.
+* radix of numeric matches, specifying: lid invocation.
 * right delimiter editor argument:      eid invocation.
-* scanner options:                      Scanner option formats.
-* scanners:                             Scanners.
-* scanners, adding new:                 Defining new scanners.
-* scanners, defining in source code:    Defining scanners in source code.
-* scanners, defining with options:      Defining scanners with options.
-* scanners, predefined:                 Predefined scanners.
-* scanners.c:                           Defining scanners in source code.
-* Scofield, Doug:                       Past and future.
-* search for identifier, initial:       eid invocation.
+* scanners:                             Extraction options.
+* scanners, defining in source code:    Defining scanners.
+* scanners.c:                           Defining scanners.
+* Scofield, Doug:                       Past and Future.
+* search for token, initial:            eid invocation.
 * sharing ID files:                     mkid invocation.
-* shell brace notation in filename lists: Query options.
-* shell wildcard patterns:              Wildcard patterns.
-* single matches, showing:              Query options.
-* squeezing characters from identifiers: Plain text scanner.
-* statistics:                           mkid options.
-* string searching:                     aid invocation.
-* strings, forcing evaluation as:       Query options.
-* suffixes of filenames:                Scanners.
-* suffixes_0:                           Defining scanners in source code.
-* suppressing matching identifier:      Query options.
-* Texinfo, scanning example of:         Defining scanners with options.
-* whatis:                               aid invocation.
-* wildcard wildcard patterns:           Wildcard patterns.
+* single matches, showing:              lid invocation.
+* statistics:                           mkid invocation.
+* text scanner:                         Text scanner.
+* tokens common to two files:           fid invocation.
+* tokens in a file:                     fid invocation.
+* version number, finding:              Universal options.
 
 
 
 Tag Table:
-Node: Top1540
-Node: Introduction2150
-Node: Past and future4367
-Node: mkid invocation6671
-Node: mkid options8241
-Node: Scanners9659
-Node: Scanner option formats11154
-Node: Predefined scanners12330
-Node: C scanner13033
-Node: Plain text scanner13788
-Node: Assembler scanner14699
-Node: Defining new scanners15828
-Node: Defining scanners in source code16451
-Node: Defining scanners with options17296
-Node: idx invocation18750
-Node: mkid examples19316
-Node: Common query arguments21295
-Node: Query options21843
-Node: Patterns25238
-Node: Query examples26578
-Node: gid invocation27965
-Node: GNU Emacs gid interface29132
-Node: Looking up identifiers29996
-Node: lid invocation30492
-Node: aid invocation31926
-Node: eid invocation32712
-Node: fid invocation34854
-Node: pid invocation35412
-Node: Wildcard patterns36510
-Node: Index37280
+Node: Top1298
+Node: Introduction2051
+Node: Quick start4580
+Node: Common options5505
+Node: Universal options6303
+Node: Reading options6628
+Node: Writing options7745
+Node: Walker options8241
+Node: File listing options9080
+Node: Extraction options10171
+Node: Language map12721
+Node: C/C++ scanner14927
+Node: Assembler scanner16542
+Node: Text scanner18638
+Node: Defining scanners19446
+Node: mkid invocation20668
+Node: lid invocation22949
+Node: lid aliases28334
+Node: Emacs gid interface29012
+Node: eid invocation29929
+Node: fid invocation32513
+Node: fnid invocation33200
+Node: xtokid invocation33875
+Node: Past and Future34814
+Node: Index37506
 
 End Tag Table
diff --git a/doc/id-utils.texi b/doc/id-utils.texi
index 9cc7dd4..bda4734 100644
--- a/doc/id-utils.texi
+++ b/doc/id-utils.texi
@@ -6,7 +6,7 @@
 
 @include version.texi
 
-@c Define new indices for filenames, commands and options.
+@c Define new indices for file names, commands and options.
 @defcodeindex fl
 @defcodeindex cm
 @defcodeindex op
@@ -22,23 +22,20 @@
 @ifinfo
 @format
 START-INFO-DIR-ENTRY
-* ID database: (id).            Identifier database utilities.
-* aid: (id)aid invocation.                      Matching strings.
-* eid: (id)eid invocation.                      Invoking an editor on matches.
-* fid: (id)fid invocation.                      Listing a file's identifiers.
-* gid: (id)gid invocation.                      Listing all matching lines.
-* idx: (id)idx invocation.                      Testing mkid scanners.
-* lid: (id)lid invocation.                      Matching patterns.
-* mkid: (id)mkid invocation.                    Creating an ID database.
-* pid: (id)pid invocation.                      Looking up filenames.
+* ID database: (id-utils).              Identifier database utilities.
+* mkid: (id-utils)mkid invocation.      Creating an ID database.
+* lid: (id-utils)lid invocation.        Matching words and patterns.
+* fid: (id-utils)fid invocation.        Listing a file's tokens.
+* fnid: (id-utils)fnid invocation.      Looking up file names.
+* xtokid: (id-utils)xtokid invocation.  Testing mkid scanners.
 END-INFO-DIR-ENTRY
 @end format
 @end ifinfo
 
 @ifinfo
-This file documents the @code{mkid} identifier database utilities.
+This file documents the @file{id-utils} database utilities.
 
-Copyright (C) 1991, 1995 Tom Horsley.
+Copyright (C) 1996 Free Software Foundation, Inc.
 
 Permission is granted to make and distribute verbatim copies of
 this manual provided the copyright notice and this permission notice
@@ -63,970 +60,955 @@ except that this permission notice may be stated in a translation.
 
 @titlepage
 @title ID database utilities
-@subtitle Programs for simple, fast, high-capacity cross-referencing 
+@subtitle Programs for simple, fast, high-capacity cross-referencing
 @subtitle for version @value{VERSION}
-@author Tom Horsley
 @author Greg McGary
-
-@page
-@vskip 0pt plus 1filll
-Copyright @copyright{} 1991, 1995 Tom Horsley.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation.
+@author Tom Horsley
 @end titlepage
 
-
 @ifinfo
+@c ************* gkm *********************************************************
 @node Top
-@top ID database utilities
+@top ID utilities
 
-This manual documents version @value{VERSION} of the ID database
-utilities.
+This manual documents version @value{VERSION} of the ID utilities.
 
 @menu
-* Introduction::                Overview of the tools, and authors.
+* Introduction::                Overview of the tools with tutorial.
+* Quick start::                 Quick start procedure.
+* Common options::              Common command-line options.
 * mkid invocation::             Creating an ID database.
-* Common query arguments::      Common lookup options and search patterns.
-* gid invocation::              Listing all matching lines.
-* Looking up identifiers::      lid, aid, eid, and fid.
-* pid invocation::              Looking up filenames.
+* lid invocation::              Querying an ID database by token.
+* fid invocation::              Listing a file's tokens.
+* fnid invocation::             Looking up file names.
+* xtokid invocation::           Testing language scanners.
+* Past and Future::             History and future directions.
 * Index::                       General index.
 @end menu
 @end ifinfo
 
-
+@c ************* gkm *********************************************************
 @node Introduction
 @chapter Introduction
 
 @cindex overview
 @cindex introduction
-
 @cindex ID database, definition of
-An @dfn{ID database} is a binary file containing a list of filenames, a
-list of identifiers, and a matrix indicating which identifiers appear in
-which files.  With this database and some tools to manipulate it
-(described in this manual), a host of tasks become simpler and faster.
-For example, you can list all files containing a particular
-@code{#include} throughout a huge source hierarchy, search for all the
-memos containing references to a project, or automatically invoke an
-editor on all files containing references to some function.  Anyone with
-a large software project to maintain, or a large set of text files to
-organize, can benefit from an ID database.
 
-Although the ID utilities are most commonly used with identifiers,
-numeric constants are also stored in the database, and can be searched
-for in the same way (independent of radix, if desired).
+An @dfn{ID database} is a binary file containing a list of file names, a
+list of tokens, and a sparse matrix indicating which tokens
+appear in which files.
 
-There are a number of programs in the ID family:
+With this database and some tools to query it (described in this
+manual), many text-searching tasks become simpler and faster.  For
+example, you can list all files that reference a particular
+@code{#include} file throughout a huge source hierarchy, search for all
+the memos containing references to a project, or automatically invoke an
+editor on all files containing references to some function or variable.
+Anyone with a large software project to maintain, or a large set of text
+files to organize, can benefit from the ID utilities.
 
-@table @code
+Although the name `ID' is short for `identifier', the ID utilities
+handle more than just identifiers; they also treat other kinds of
+tokens, most notably numeric constants, and the contents of certain
+character strings.  Thus, this manual will use the word @dfn{token} as a
+term that is inclusive of identifiers, numbers and strings.
 
-@item mkid
-scans files for identifiers and numeric constants and builds the ID
-database file.
+There are several programs in the ID utilities family:
 
-@item gid
-lists all lines that match given patterns.
+@table @file
+
+@item mkid
+scans files for tokens and builds the ID database file.
 
 @item lid
-lists the filenames containing identifiers that match given patterns.
+queries the ID database for tokens, then reports matching file names or
+matching lines.
 
-@item aid
-lists the filenames containing identifiers that contain given strings,
-independent of case.
+@item fid
+lists all tokens recorded in the database for given files, or
+tokens common to two files.
 
-@item eid
-invokes an editor on each file containing identifiers that match given
-patterns.
+@item fnid
+matches the file names in the database, rather than the tokens.
 
-@item fid
-lists all identifiers recorded in the database for given files, or
-identifiers common to two files.
+@item xtokid
+extracts raw tokens---helps with testing of new @file{mkid} scanners.
+
+@end table
+
+In addition, the ID utilities have historically provided several query
+programs which are specializations of @file{lid}:
+
+@table @file
+
+@item gid
+(alias for @samp{lid -R grep})
+lists all lines containing the requested pattern.
 
-@item pid
-matches the filenames in the database, rather than the identifiers.
+@item eid
+(alias for @samp{lid -R edit})
+invokes an editor on all files containing the requested pattern, and
+if possible, initiates a text search for that pattern.
 
-@item idx
-helps with testing of new @code{mkid} scanners.
+@item aid
+(alias for @samp{lid -ils}) treats the requested pattern
+as a case-insensitive literal substring.
 
 @end table
 
 @cindex bugs, reporting
-Please report bugs to @samp{gkm@@magilla.cichlid.com}.  Remember to
+Please report bugs to @samp{bug-gnu-utils@@gnu.ai.mit.edu}.  Remember to
 include the version number, machine architecture, input files, and any
 other information needed to reproduce the bug: your input, what you
 expected, what you got, and why it is wrong.  Diffs are welcome, but
 please include a description of the problem as well, since this is
 sometimes difficult to infer.  @xref{Bugs, , , gcc, GNU CC}.
 
-@menu
-* Past and future::       How the ID tools came about, and where they're going.
-@end menu
+@c ************* gkm *********************************************************
+@node Quick start
+@chapter Quick Start Procedure
 
+@table @bullet
 
-@node Past and future
-@section Past and future
+Unpack the distribution.
 
-@cindex history
+Type @file{./configure}
 
-@pindex look @r{and @code{mkid} 1}
-@cindex McGary, Greg
-Greg McGary conceived of the ideas behind mkid when he began hacking the
-Unix kernel in 1984.  He needed a navigation tool to help him find his
-way around the expansive, unfamiliar landscape.  The first @code{mkid}-like
-tools were shell scripts, and produced an ASCII database that looks much
-like the output of @code{lid} with no arguments.  It took over an hour
-on a VAX 11/750 to build a database for a 4.1BSD-ish kernel.  Lookups
-were done with the system utility @code{look}, modified to handle very
-long lines.
-
-In 1986, Greg rewrote @code{mkid}, @code{lid}, @code{fid} and @code{idx}
-in C to improve performance.  Database-build times were shortened by an
-order of magnitude.  The @code{mkid} tools were first posted to
-@samp{comp.sources.unix} in September 1987.
+Type @samp{make}
 
-@cindex Horsley, Tom
-@cindex Scofield, Doug
-@cindex Leonard, Bill
-@cindex Berry, Karl
-Over the next few years, several versions diverged from the original
-source.  Tom Horsley at Harris Computer Systems Division stepped forward
-to take over maintenance and integrated some of the fixes from divergent
-versions.  A first release of
-@code{mkid} @w{version 2} was posted to @file{alt.sources} near the end
-of 1990.  At that time, Tom wrote this Texinfo manual with the
-encouragement the net community.  (Tom especially thanks Doug Scofield
-and Bill Leonard whom he dragooned into helping poorfraed and
-edit---they found several problems in the initial version.)  Karl Berry
-revamped the manual for Texinfo style, indexing, and organization in
-1995.
+Type @samp{make install} as a user with the appropriate privileges
+(e.g., @samp{bin} or perhaps even @samp{root}).
 
-@pindex cscope
-@pindex grep
-@cindex future
-In January 1995, Greg McGary reemerged as the primary maintaner and
-launched development of @code{mkid} version 3, whose primary new feature
-is an efficient algorithm for building databases that is linear in both
-time and space over the size of the input text.  (The old algorithm was
-quadratic in space and therefore choked on very large source trees.)
-The code is released under the GNU Public License, and might become a
-part of the GNU system.  @code{mkid} 3 is an interim release, since
-several significant enhancements are still in the works: an optional
-coupling with GNU @code{grep}, so that @code{grep} can use an ID
-database for hints; a @code{cscope} work-alike query interface;
-incremental update of the ID database; and an automatic file-tree walker
-so you need not explicitly supply every filename argument to the
-@code{mkid} program.
+Type @samp{cd /usr/include; mkid} to build an ID database covering
+all of the system header files.
 
+Type @samp{lid FILE}, then @samp{gid strtok}, then @samp{aid stdout}.
 
-@node mkid invocation
-@chapter @code{mkid}: Creating ID databases
+@end table
 
-@pindex mkid
-@cindex creating databases
-@cindex databases, creating
+You have just built, installed and used the most common commands of the
+GNU ID utilities.  If you ever need help remembering which system header
+files contain a particular declaration, or reference a particular symbol,
+you'll want to keep the ID file you built in @file{/usr/include} for
+later use.  If your working directory is elsewhere at the time, simply
+provide the @samp{-f /usr/include} option to @file{lid} (@pxref{Reading
+options}).
 
-@pindex cron
-The @code{mkid} program builds an ID database.  To do this, it must scan
-each file you tell it to include in the database.  This takes some time,
-but once the work is done the query programs run very rapidly.  (You can
-run @code{mkid} as a @code{cron} job to regularly update your
-databases.)
-
-The @code{mkid} program knows how to extract identifiers from various
-types of files.  For example, it can recognize and skip over comments
-and string constants in a C program.
-
-@cindex numbers, in databases
-Identifiers are not the only thing included in the database.  Numbers
-are also recognized and included in the database indexed by their binary
-value.  This feature allows you to find uses of constants without regard
-to the radix used to specify them, since the same number can frequently
-be written in many different ways (for instance, @samp{47}, @samp{0x2f},
-@samp{057} in C).
-
-All the places in this document which mention identifiers should really
-mention both identifiers and numbers, but that gets fairly clumsy after
-a while, so you just need to keep in mind that numbers are included in
-the database as well as identifiers.
+@c ************* gkm *********************************************************
+@node Common options
+@chapter Common command-line options
 
-@cindex ID file format
-@cindex architecture-independence
-@cindex sharing ID files
-The ID files that @code{mkid} creates are architecture- and
-byte-order-independent; you can share them at will across systems.
+@cindex common command-line options
+
+Certain options, and regular expression syntax, are shared by various
+groupings of the ID utilities.  We describe these in the sections below,
+rather than repeating them for each program.
 
 @menu
-* mkid options::                Command-line options to mkid.
-* Scanners::                    Built-in and defining your own.
-* mkid examples::               Examples of mkid usage.
+* Universal options::     Options common to all programs.
+* Extraction options::    Options for programs that extract tokens from source files.
+* Walker options::        Options for programs that walk file and directory trees.
+* Reading options::       Options for programs that read ID databases.
+* Writing options::       Options for programs that write ID databases.
+* File listing options::  Options for programs that list file names.
 @end menu
 
-
-@node mkid options
-@section @code{mkid} options
-
-@cindex options for @code{mkid}
-@pindex mkid @r{options}
-
-By default, @code{mkid} scans the files you specify and writes the
-database to a file named @file{ID} in the current directory.
-
-@example
-mkid [-v] [-S@var{scanarg}] [-a@var{argfile}] [-] [-f@var{idfile}] @c
-@var{files}@dots{}
-@end example
-
-The program accepts the following options.
+@c ************* gkm *********************************************************
+@node Universal options
+@section Options Common to All Programs
 
 @table @samp
 
-@item -v
-@opindex -v
-@cindex statistics
-Verbose.  @code{mkid} tells you as it scans each file and indicates
-which scanner it is using.  It also summarizes some statistics about the
-database at the end.
-
-@item -S@var{scanarg}
-@opindex -S@var{scanarg}
-Specify options regarding @code{mkid}'s scanners.  @xref{Scanner option
-formats}.
-
-@item -a@var{argfile}
-@opindex -a@var{argfile}
-Read additional command line arguments from @var{argfile}.  This is
-typically used to specify lists of filenames longer than will fit on a
-command line; some systems have severe limitations on the total length
-of a command line.
-
-@item -
-@opindex -
-Read additional command line arguments from standard input.
-
-@item -f@var{idfile}
-Write the database to the file @var{idfile}, instead of @file{ID}.  The
-database stores filenames relative to the directory containing the
-database, so if you move the database to a different directory after
-creating it, you may have trouble finding files.
-
-@c @item -u
-@c @opindex -u
-@c The @code{-u} option updates an existing database by rescanning any
-@c files that have changed since the database was written.  Unfortunately
-@c you cannot incrementally add new files to a database.
-@c Greg is reimplementing this ...
-
-@end table
+@item --help
+@opindex --help
+@cindex help, online
+Print a usage message listing all available options, then exit successfully.
 
-The remaining arguments @var{files} are the files to be scanned and
-included in the database.  If no files are given at all (either on
-command line or via @samp{-a} or @samp{-}), @code{mkid} does nothing.
+@item --version
+@opindex --version
+@cindex version number, finding
+Print the version number, then exit successfully.
 
+@end table
 
-@node Scanners
-@section Scanners
+@c ************* gkm *********************************************************
+@node Reading options
+@section Options for Programs that Read ID Databases
 
-@cindex scanners
+@table @samp
 
-To determine which identifiers to extract from a file and store in the
-database, @code{mkid} calls a @dfn{scanner}; we say a scanner
-@dfn{recognizes} a particular language.  Scanners for several languages
-are built-in to @code{mkid}; you can add your own scanners as well, as
-explained in the sections below.
-
-@cindex suffixes of filenames
-@code{mkid} determines which scanner to use for a particular file by
-looking at the suffix of the filename.  This @dfn{suffix} is everything
-after and including the last @samp{.} in a filename; for example, the
-suffix of @file{foo.c} is @file{.c}.  @code{mkid} has a built-in list of
-bindings from some suffixes to corresponding scanners; for example,
-@file{.c} files are (not surprisingly) scanned by the predefined C
-language scanner.
-
-@findex .default @r{scanner}
-If @code{mkid} cannot determine what scanner to use for a particular
-file, either because the file has no suffix (e.g., @file{foo}) or
-because @code{mkid} has no binding for the file's suffix (e.g.,
-@file{foo.bar}), it uses the scanner bound to the @samp{.default}
-suffix.  By default, this is the plain text scanner (@pxref{Plain text
-scanner}), but you can change this with the @samp{-S} option, as
-explained below.
+@item -f @var{filename}
+@itemx --file=@var{filename}
+@opindex -f
+@opindex --file
+@cindex ID database file name
 
-@menu
-* Scanner option formats::      Overview of the -S option.
-* Predefined scanners::         The C, plain text, and assembler scanners.
-* Defining new scanners::       Either in source code or at runtime with -S.
-* idx invocation::              Testing mkid scanners.
-@end menu
+@var{Filename} is the ID database to read when processing queries.  At
+present, only a single @samp{--file} option is processed, but in future
+releases, more than one ID database may be named on the command line.
 
+@item $IDPATH
+@cindex ID database file name
 
-@node Scanner option formats
-@subsection Scanner option formats
+@samp{IDPATH} is an environment variable that contains a
+colon-separated list of ID database names.  If this variable is present,
+and no @samp{--file} options are presented on the command line, the ID
+databases named in @samp{IDPATH} are implied.@footnote{At present, this
+feature is fully implemented, since only the first of a list of ID
+database names is processed.}
 
-@cindex scanner options
-@opindex -S @r{scanner option}
+@end table
 
-With the @samp{-S} option, you can change which language scanner to use
-for which files, give language-specific options, and get some limited
-online help about scanner options.
+If no ID databases are specified either on the command line or via the
+@samp{IDPATH} environment variable, then the ID utilities search for a
+file named @file{ID} in the current working directory, and then in
+successive parent directories.
 
-Here are the different forms of the @samp{-S} option:
+@c ************* gkm *********************************************************
+@node Writing options
+@section Options for Programs that Write ID Databases
 
 @table @samp
 
-@item -S.@var{suffix}=@var{scanner}
-@opindex -S.
-Use @var{scanner} for a file with the given @samp{.@var{suffix}}.  For
-example, @samp{-S.yacc=c} tells @code{mkid} to use the @samp{c} language
-scanner for all files ending in @samp{.yacc}.
-
-@item -S.@var{suffix}=?
-Display which scanner is used for the given @samp{.@var{suffix}}.
-
-@item -S?=@var{scanner}
-@opindex -S?
-Display which suffixes @var{scanner} is used for.
-
-@item -S?=?
-Display the scanner binding for every known suffix.
+@item -o @var{filename}
+@itemx --output=@var{filename}
+@opindex -o
+@opindex --output
+@cindex ID database file name
 
-@item -S@var{scanner}+@var{arg}
-@itemx -S@var{scanner}-@var{arg}
-Each scanner accepts certain scanner-dependent arguments.  These options
-all have one of these forms.  @xref{Predefined scanners}.
+The @samp{--output} option names the file in which to write a new ID
+database.  If no @samp{--output} (or @samp{--file}) option is present,
+an output file named @file{ID} is implied.
 
-@item -S@var{scanner}?
-Display the scanner-specific options accepted by @var{scanner}.
+@item -f @var{filename}
+@itemx --file=@var{filename}
+@opindex -f
+@opindex --file
+@cindex ID database file name
 
-@item -S@var{new-scanner}/@var{old-scanner}/@var{filter-command}
-Define @var{new-scanner} in terms of @var{old-scanner} and
-@var{filter-command}.  @xref{Defining scanners with options}.
+This is a synonym for @samp{--output}
 
 @end table
 
+@c ************* gkm *********************************************************
+@node Walker options
+@section Options for Programs that Walk File and Directory Trees.
 
-@node Predefined scanners
-@subsection Predefined scanners
+The programs @file{mkid} and @file{xtokid} accept the names of files and
+directories on the command line.  Files are scanned if there is a
+scanner available and enabled for the file's source language.
+Directories are recursively descended, searching for files whose names
+match the rules listed in the @emph{language map} file (@pxref{Language
+map}).
 
-@cindex predefined scanners
-@cindex scanners, predefined
+The following option controls the file tree walker:
 
-@code{mkid} has built-in scanners for several types of languages; you
-can get the list by running @code{mkid -S?=?}.
-The supported languages are documented
-below@footnote{This is not strictly true: @samp{vhil} is a supported
-language, but it is an obsolete and arcane dialect of C and should be
-ignored.}.
-
-@menu
-* C scanner::                   For the C programming language.
-* Plain text scanner::          For documents or other non-source code.
-* Assembler scanner::           For assembly language.
-@end menu
+@table @samp
 
+@item -p @var{names}
+@itemx --prune=@var{names}
+@opindex -p
+@opindex --prune
+@cindex file tree pruning
 
-@node C scanner
-@subsubsection C scanner
+One or more file or directory names may appear in @var{names}.  The file
+tree walker will stop short at these files and directories and their
+contents will not be scanned.
 
-@cindex C scanner, predefined
-@flindex .[chly] @r{files, scanning}
+@end table
 
-The C scanner is the most commonly used.  Files with the usual @file{.c}
-and @file{.h} suffixes, and the @file{.y} (yacc) and @file{.l} (lex)
-suffixes, are processed with this scanner (by default).
+@c ************* gkm *********************************************************
+@node File listing options
+@section Options for Programs that List File Names
 
-Scanner-specific options:
+The programs @file{lid} and @file{fnid} can print lists of file names as
+the result of queries.  The following option controls how these lists
+are formatted:
 
 @table @samp
 
-@item -Sc-s@var{character}
-@kindex $ @r{in identifiers}
-@opindex -Sc-s
-Allow the specified @var{character} in identifiers. For example, if you
-use @samp{$} in identifiers, you'll want to use @samp{-Sc-s$}.
-
-@item -Sc+u
-@opindex -Sc+u
-Strip leading underscores from identifiers. You might to do this in
-peculiar circumstances, such as trying to parse the output from
-@code{nm} or some other system utility.
+@item -S @var{style}
+@itemx --separator=@var{style}
+@opindex -S
+@opindex --separator
+@cindex file name separator
 
-@item -Sc-u
-@opindex -Sc-u
-Don't strip leading underscores from identifiers; this is the default.
-
-@end table
+@var{Style} may be one of @samp{braces}, @samp{space} or @samp{newline}.
 
+The @var{style} of @samp{braces} means that file names with common
+directory prefix and common suffix are printed using the shell's brace
+notation in order to compress the output.  For example,
+@file{../src/foo.c ../src/bar.c} can be printed in brace notation as
+@file{../src/@{foo,bar@}.c}.
 
-@node Plain text scanner
-@subsubsection Plain text scanner
+The @var{style}s of @samp{space} and @samp{newline} mean that file names
+are separated spaces or by newlines, respectively.
 
-@cindex plain text scanner
+If the list of files is being printed on a terminal, brace notation is
+the default.  If not, file names are separated by spaces if the
+@var{key} is included in the output, and by newlines the @var{key style}
+is @samp{none} (@pxref{lid invocation}).
 
-The plain text scanner is intended for scanning most non-source-code
-files.  This is typically the scanner used when adding custom scanners
-via @samp{-S} (@pxref{Defining scanners with options}).
+@end table
 
-@c @code{mkid} predefines a troff scanner in terms of the plain text
-@c scanner and
-@c the @code{deroff} utility. 
-@c A compressed man page
-@c scanner runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner
-@c runs @code{detex}.
+@c ************* gkm *********************************************************
+@node Extraction options
+@section Options for Programs that Scan Source Files
 
-Scanner-specific options:
+@file{mkid} and @file{xtokid} walk file trees, select source files by
+name, and extract tokens from source files.  They accept the following
+options:
 
 @table @samp
 
-@item -Stext+a@var{character}
-@opindex -Stext+a
-Include @var{character} in identifiers.  By default, letters (a--z and
-A--Z) and underscore are included.
-
-@item -Stext-a@var{character}
-@opindex -Stext-a
-Exclude @var{character} from identifiers.
+@item -m @var{mapfile}
+@itemx --lang-map=@var{mapfile}
+@opindex -m
+@opindex --lang-map
+@cindex language map file
 
-@item -Stext+s@var{character}
-@opindex -Stext+s
-@cindex squeezing characters from identifiers
-Squeeze @var{character} from identifiers, i.e., do not terminate an
-identifier when @var{character} is seen.  By default, the characters
-@samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers.  For
-example, the input @samp{fred's} leads to the identifier @samp{freds}.
+@var{mapfile} contains rules for determining the source languages from
+file names.  @xref{Language map}
 
-@item -Stext-s@var{character}
-Do not squeeze @var{character}.
+@item -i @var{languages}
+@itemx --include=@var{languages}
+@opindex -i
+@opindex --include
+@cindex include languages
 
-@end table
+The @samp{--include} option names @var{languages} whose source files
+should be scanned and incorporated into the ID database.  By default,
+all languages known to the ID utilities are enabled.
 
+@item -x @var{languages}
+@itemx --exclude=@var{languages}
+@opindex -x
+@opindex --exclude
+@cindex exclude languages
+
+The @samp{--exclude} option names @var{languages} whose source files
+should @var{not} be scanned.  The default list of excluded languages is
+empty.  Note that only one of @samp{--include} or @samp{--exclude} may
+be specified on the command line for a single run.
+
+@item -l @var{language}:@var{options}
+@itemx --lang-option=@var{language}:@var{options}
+@opindex -l
+@opindex --lang-option
+@cindex language-specific option
+
+Language-specific scanners also accept options.  @var{Language} denotes
+the desired scanner, and @var{option} are the command-line options that
+should be passed through to it.  For example, to pass the @var{-x
+--coke-bottle} options to the scanner for the language @var{swizzle},
+pass this: @var{-l swizzle:"-x --coke-bottle"}, or this:
+@var{-lang-option=swizzle:"-x --coke-bottle"}, or this: @var{-l
+swizzle-x -l swizzle:--coke-bottle}.  Use the @samp{--help} option to
+see the command-line option summary for
 
-@node Assembler scanner
-@subsubsection Assembler scanner
+@end table
 
-@cindex assembler scanner
+@cindex scanners
 
-Since assembly languages come in several flavors, this scanner has a
-number of options:
+To determine which tokens to extract from a file and store in the
+database, @file{mkid} calls a @dfn{scanner}; we say a scanner
+@dfn{recognizes} a particular language.  Scanners for several languages
+are built-in to @file{mkid}; you can add your own scanners as well, as
+explained in @ref{Defining scanners}.
 
-@table @samp
+The ID utilities determine which scanner to use for a particular file by
+consulting the language-map file.  Scanners for several are already
+built-in to the ID utilities.  You can see which languages have built-in
+scanners, and examine their language-specific options by invoking
+@samp{mkid --help} or @samp{xtokid --help}.
 
-@item -Sasm-c@var{character}
-@opindex -Sasm-c
-@cindex comments in assembler
-Define @var{character} as starting a comment that extends to the end of
-the input line; no default.  In many assemblers this is @samp{;} or
-@samp{#}.
-
-@item -Sasm+u
-@itemx -Sasm-u
-@opindex -Sasm+u
-Strip (@samp{+u}) or do not strip (@samp{-u}) leading underscores from
-identifiers.  The default is to strip them.
-
-@item -Sasm+a@var{character}
-@opindex -Sasm+a
-Allow @var{character} in identifiers.
-
-@item -Sasm-a@var{character}
-Allow @var{character} in identifiers, but if an identifier contains
-@var{character}, ignore it. This is useful to ignore temporary labels,
-which can be generated in great profusion; these often contain @samp{.}
-or @samp{@@}.
-
-@item -Sasm+p
-@itemx -Sasm-p
-@opindex -Sasm+p
-Recognize (@samp{+p}) or do not recognize (@samp{-p}) C preprocessor
-directives in assembler source. The default is to recognize them.
-
-@item -Sasm+C
-@itemx -Sasm-C
-@opindex -Sasm+C
-Skip over (@samp{+C}) or do not skip over (@samp{-C}) C style comments
-in assembler source.  The default is to skip them.
+@menu
+* Language map::                Mapping file names to source languages.
+* C/C++ scanner::               For the C and C++ programming language.
+* Assembler scanner::           For assembly language.
+* Text scanner::                For documents or other non-source code.
+* Defining scanners::           Defining new scanners in the source code.
+@end menu
 
-@end table
+@c ************* gkm *********************************************************
+@node Language map
+@subsection Mapping file names to source languages
 
+The file @file{id-lang.map}, installed by default in
+@file{$(prefix)/share/id-lang.map}, contains rules for mapping file
+names to source languages.  Each rule comprises three parts: a shell
+@var{glob} pattern, a language name, and language-specific scanner
+options.
 
-@node Defining new scanners
-@subsection Defining new scanners
+The special pattern @samp{**} denotes the default source language.  This is
+the language that's assigned to file names that don't match any other
+pattern.
 
-@cindex scanners, adding new
+The special pattern @samp{***} should be followed by a file name.  The
+named file should contain more language-map rules and is included at
+this point.
 
-You can add new scanners to @code{mkid} in two ways: modify the source
-code and recompile, or at runtime via the @samp{-S} option.  Each has
-their advantages and disadvantages, as explained below.
+The order in which rules are presented in a language-map file is
+significant.  This order influences the order in which files are
+displayed as the result of queries.  For example, the distributed
+language-map file places all rules for C @var{.h} files ahead of
+@var{.c} files, so that in general, declarations will precede
+definitions in query output.  The same thing is done for C++ and its
+many different source file name extensions.
 
-If you create a new scanner that would be of use to others, please
-consider sending it back to the maintainer,
-@samp{gkm@@magilla.cichlid.com}, for inclusion in future releases of
-@code{mkid}.
+Here is a pared-down version of the @file{id-lang.map} file distributed
+with the ID utilities:
 
-@menu
-* Defining scanners in source code::
-* Defining scanners with options::
-@end menu
+@example
 
+# Default language
+**			IGNORE	# Although this is listed first,
+				# the default language pattern is
+				# logically matched last.
+
+# Backup files
+*~			IGNORE
+*.bak			IGNORE
+*.bk[0-9]		IGNORE
+
+# SCCS files
+[sp].*			IGNORE
+
+# list header files before code files
+*.h			C
+*.h.in			C
+*.H			C++
+*.hh			C++
+*.hpp			C++
+*.hxx			C++
+
+# list C `meta' files next
+*.l			C
+*.lex			C
+*.y			C
+*.yacc			C
+
+# list C code files after header files
+*.c			C
+*.C			C++
+*.cc			C++
+*.cpp			C++
+*.cxx			C++
+
+# list assembly language after C
+*.[sS]			asm --comment=;
+*.asm			asm --comment=;
+
+# [nt]roff
+*.[0-9]			roff
+*.ms			roff
+*.me			roff
+*.mm			roff
+
+# TeX and friends
+*.tex			TeX
+*.ltx			TeX
+*.texi			texinfo
+*.texinfo		texinfo
 
-@node Defining scanners in source code
-@subsubsection Defining scanners in source code
+@end example
 
-@flindex scanners.c
-@cindex scanners, defining in source code
+@c ************* gkm *********************************************************
+@node C/C++ scanner
+@subsection C/C++ Language Scanner
 
-@vindex languages_0
-@vindex suffixes_0
-To add a new scanner in source code, you should add a new section to the
-file @file{scanners.c}.  Copy one of the existing scanners (most likely
-either C or plain text), and modify as necessary.  Also add the new
-scanner to the @code{languages_0} and @code{suffixes_0} tables near the
-beginning of the file.
+@cindex C scanner, predefined
 
-This is not a terribly difficult programming task, but it requires
-recompiling and installing the new version of @code{mkid}, which may be
-inconvenient.
+The C scanner is the most commonly used.  Files that match the glob
+pattern @file{*.h}, @file{*.c}, as well as @file{yacc} files that match
+@file{*.y} or @file{*.yacc}, and @file{lex} files that match @file{*.l}
+or @file{*.lex}, are processed with this scanner.
 
-This method leads to scanners which operate much more quickly than ones
-that depend on external programmers.  It is also likely the easiest way
-to define scanners for new programming languages.
+Scanner-specific options (Note, these options are presented
+@var{without} the required @samp{-l} or @samp{--lang-option=} prefix):
 
+@table @samp
 
-@node Defining scanners with options
-@subsubsection Defining scanners with options
+@item -k @var{character-class}
+@itemx --keep=@var{character-class}
+@opindex -k
+@opindex --keep
+@opindex -l C:-k
+@opindex -l C:--keep
+@opindex --lang-option=C:-k
+@opindex --lang-option=C:--keep
+
+Consider the characters in @var{character-class} as valid constituents of
+identifier names.  For example, if you are indexing C code that contains
+@samp{$} in some of its identifiers, you can include these by using
+@samp{--lang-option=C:--keep=$}, or @samp{-l C:"-k $"} (if you don't like
+to type so much).
+
+@item -i @var{character-class}
+@itemx --ignore=@var{character-class}
+@opindex -i
+@opindex --ignore
+@opindex -l C:-i
+@opindex -l C:--ignore
+@opindex --lang-option=C:-i
+@opindex --lang-option=C:--ignore
+
+     x mkiConsider the characters in @var{character-class} as valid constituents of
+identifier names, but discard all tokens containing these characters.
+For example, if some C code has identifiers containing @samp{$}, but you
+don't want these cluttering up your ID database, use
+@samp{--lang-option=C:--ignore=$}, or the terser equivalent @samp{-l
+C:"-i $"}.
+
+@item -u
+@itemx --strip-underscore
+@opindex -u
+@opindex --strip-underscore
+@opindex -l C:-u
+@opindex -l C:--strip-underscore
+@opindex --lang-option=C:-u
+@opindex --lang-option=C:--strip-underscore
+
+Strip one leading underscore from C identifiers encapsulated as
+character strings.  This option is useful if you are indexing C code
+that contains symbol-table name strings for systems that prepend an
+underscore to external symbols.  By default, the leading underscore is
+retained.
 
-@cindex scanners, defining with options
+@end table
 
-You can use the @samp{-S} option on the command line to define a new
-language scanner:
+@c ************* gkm *********************************************************
+@node Assembler scanner
+@subsection Assembly Language Scanner
 
-@example
--S@var{new-scanner}/@var{existing-scanner}/@var{filter}
-@end example
+@cindex assembler scanner
+@cindex assembly language scanner
 
-@noindent
-Here, @var{new-scanner} is the name of the new scanner being defined,
-@var{existing-scanner} is the name of an existing scanner, and
-@var{filter} is a shell command or pipeline.
+Assembly languages use a variety of commenting conventions, and allow a
+variety of special characters to @emph{dirty up} local symbols,
+preventing name space conflicts with symbols defined by higher-level
+languages.  Also, some compilation systems prepend an underscore to
+external symbols.  The options listed below are designed to address
+these differences.
 
-The new scanner works by passing the input file to @var{filter}, and
-then arranging for the result to be passed through
-@var{existing-scanner}. Typically, @var{existing-scanner} is @samp{text}.
+@table @samp
 
-Somewhere within @var{filter}, the string@samp{%s} should occur.  This
-@samp{%s} is replaced by the name of the source file being scanned.
+@item -c @var{character-class}
+@itemx --comment=@var{character-class}
+@opindex -c
+@opindex --comment
+@opindex -l asm:-c
+@opindex -l asm:--comment
+@opindex --lang-option=asm:-c
+@opindex --lang-option=asm:--comment
 
-@cindex Texinfo, scanning example of
-For example, @code{mkid} has no built-in scanner for Texinfo files (like
-this one).  In indexing a Texinfo file, you most likely would want
-to ignore the Texinfo @@-commands. Here's one way to specify a new
-scanner to do this:
+The characters in @var{character-class} are considered left delimiters
+for comments that extend until the end of the current line.
 
-@example
--S/texinfo/text/sed s,@@[a-z]*,,g %s
-@end example
+@item -k @var{character-class}
+@itemx --keep=@var{character-class}
+@opindex -k
+@opindex --keep
+@opindex -l asm:-k
+@opindex -l asm:--keep
+@opindex --lang-option=asm:-k
+@opindex --lang-option=asm:--keep
+
+Consider the characters of @var{character-class} as valid constituents of
+identifier names.  For example, if you are indexing assembly code that
+prepends @samp{.} to assembler directives, and prepends @samp{%} to
+register names, you can keep these characters in the tokens by specifying
+@samp{--lang-option=asm:--keep=.%}, or @samp{-l asm:"-k .%"}.
+
+@item -i @var{character-class}
+@itemx --ignore=@var{character-class}
+@opindex -i
+@opindex --ignore
+@opindex -l asm:-i
+@opindex -l asm:--ignore
+@opindex --lang-option=asm:-i
+@opindex --lang-option=asm:--ignore
+
+Consider the characters of @var{character-class} as valid consituents of
+identifier names, but discard all tokens containing these characters.
+For example, if you don't want to clutter your ID database with
+assembler directives that begin with a leading @samp{.} or with
+assembler labels that contain @samp{@@}, use
+@samp{--lang-option=asm:--ignore=.@@}, or @samp{-l asm:"-i .@@"}.
+
+@item -u
+@itemx --strip-underscore
+@opindex -u
+@opindex --strip-underscore
+@opindex -l asm:-u
+@opindex -l asm:--strip-underscore
+@opindex --lang-option=asm:-u
+@opindex --lang-option=asm:--strip-underscore
+
+Strip one leading underscore from identifiers.  This option is useful if
+your compilation system prepends an underscore to external symbols.  By
+stripping the underscore, you can canonicalize such names and bring them
+into conformance the way they are expressed in the C language.  By
+default, the leading underscore is retained.
 
-This defines a new language scanner (@samp{texinfo}) defined in terms of
-a @code{sed} command to strip out Texinfo directives (an @samp{@@}
-character followed by letters).  Once the directives are stripped, the
-remaining text is run through the plain text scanner.
+@item -n
+@itemx --no-cpp
+@opindex -n
+@opindex --no-cpp
+@opindex -l asm:-n
+@opindex -l asm:--no-cpp
+@opindex --lang-option=asm:-n
+@opindex --lang-option=asm:--no-cpp
 
-This is a minimal example; to do a complete job, you would need to
-completely delete some lines, such as those beginning with @code{@@end}
-or @@node.
+Do not recognize C preprocessor directives.  By default, such lines are
+handled in the same way as they are by the C language scanner.
 
+@end table
 
-@node idx invocation
-@subsection @code{idx}: Testing @code{mkid} scanners
+@c ************* gkm *********************************************************
+@node Text scanner
+@subsection Text Scanner
 
-@code{idx} prints the identifiers found in the files you specify to
-standard output. This is useful in debugging new @code{mkid} scanners
-(@pxref{Scanners}). Synopsis:
+@cindex text scanner
 
-@example
-idx [-S@var{scanarg}] @var{files}@dots{}
-@end example
+The plain text scanner is intended for human-language documents, or as the
+scanner of last resort for files that have no scanner that is more
+specific.  It is customizable to the extent that character classes can
+be designated as token constituents or as token delimiters.  The default
+token constituents are the alpha-numerics; all other characters are
+considered token delimiters.
 
-@code{idx} accepts the same @samp{-S} options as @code{mkid}.
-@xref{Scanner option formats}.
+@table @samp
 
-The name ``idx'' stands for ``ID eXtract''.  The name may change in
-future releases, since this is such an infrequently used program.
+@item -i @var{character-class}
+@itemx --include=@var{character-class}
+@opindex -i
+@opindex --include
+@opindex -l text:-i
+@opindex -l text:--include
+@opindex --lang-option=text:-i
+@opindex --lang-option=text:--include
 
+Include characters belonging to @var{character-class} in tokens.
 
-@node mkid examples
-@section @code{mkid} examples
+@item -x @var{character-class}
+@itemx --exclude=@var{character-class}
+@opindex -x
+@opindex --exclude
+@opindex -l text:-x
+@opindex -l text:--exclude
+@opindex --lang-option=text:-x
+@opindex --lang-option=text:--exclude
 
-@cindex examples of @code{mkid}
+Exclude characters belonging to @var{character-class} from tokens, i.e., treat
+them as token delimiters.
 
-The simplest example of @code{mkid} is something like:
+@end table
 
-@example
-mkid *.[chy]
-@end example
+@c ************* gkm *********************************************************
+@node Defining scanners
+@subsection Defining New Scanners in the Source Code
 
-This will build an ID database indexing identifiers and numbers in the
-all the @file{.c}, @file{.h}, and @file{.y} files in the current
-directory.  Because @code{mkid} already knows how to scan files with
-those suffixes, no additional options are needed.
-
-@cindex man pages, compressed
-@cindex compressed files, building ID from
-Here's a more complex example. Suppose you want to build a database
-indexing the contents of all the @code{man} pages, and furthur suppose
-that your system is using @code{gzip} (@pxref{Top, , , gzip, Gzip}) to
-store compressed @code{cat} versions of the @code{man} pages in the
-directory @file{/usr/catman}.  The @code{gzip} program creates files
-with a @code{.gz} suffix, so you must tell @code{mkid} how to scan
-@file{.gz} files.  Here are the commands to do the job:
+@flindex scanners.c
+@cindex scanners, defining in source code
 
-@example
-cd /usr/catman
-find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man -
-@end example
+@vindex languages_0
 
-@noindent Explanation:
+To add a new scanner in source code, you should add a new section to the
+file @file{scanners.c}.  It might be easiest to clone one of the
+existing scanners and modify it as necessary.  For the hypothetical
+language @var{foo}, you must define the functions @code{get_token_foo},
+@code{parse_args_foo}, @code{help_me_foo}, as well as the tables
+@code{long_options_foo} and @code{args_foo}.  If your scanner is
+modelled after one of the existing scanners, you'll also need a
+character-attribute table @code{ctype_foo}.
 
-@enumerate
+This is not a terribly difficult programming task, but it requires
+recompiling and installing the new version of @file{mkid} and @file{xtokid}.
+You should use @file{xtokid} to test the operation of the new scanner.
 
-@item
-We first @code{cd} to @file{/usr/catman} so the ID database
-will store the correct relative filenames.
+Once these functions and tables are ready, add function prototypes and
+an entry to to the @code{languages_0} table near the beginning of the
+file.
 
-@item
-The @code{find} command prints the names of all @file{.gz} files under
-the current directory.  @xref{find invocation, , , sh-utils, GNU shell
-utilities}.
+Be warned that the existing scanners are built for speed, not elegance
+or readability.  You might wish to create a new scanner that's easier to
+read and understand if you don't feel that speed is so important.
 
-@item
-This list is piped to @code{mkid}; the @code{-} option (at the end of
-the line) tells @code{mkid} to read arguments (in this case, as is
-typical, the list of filenames) from standard input.  @xref{mkid options}.
+@c ************* gkm *********************************************************
+@node mkid invocation
+@chapter @samp{mkid}: Creating an ID Database
+@cindex creating databases
+@cindex databases, creating
+@cindex ID file format
+@cindex architecture-independence
+@cindex sharing ID files
 
-@item
-The @samp{-Sman/text/gzip @dots{}} defines a new language @samp{man} in
-terms of the @code{gzip} program and @code{mkid}'s existing text
-scanner.  @xref{Defining scanners with options}.
+@file{mkid} builds an ID database.  It accepts the names of files and/or
+directories on the command line, selects files that have an enabled
+scanner, then extracts and stores tokens from those files.  The
+resulting ID database is architecture- and byte-order-independent so it
+can be shared among all systems.
+
+The primary virtues of @file{mkid} are speed and high capacity.  The
+size of the source trees it can index is limited only by available
+system memory.  @file{mkid}'s indexing algorithm is very space-efficient
+and exhibits excellent locality-of-reference, and so is capable of
+operating with a working-set size that is only half the size of its
+virtual address space.  A typical @sc{UNIX}-like operating system with
+16 megabytes of system memory should be able to build an ID database
+covering approximately 12,000-14,000 source files totalling
+approximately 50--100 Megabytes.  A 66 Mhz 486 computer can build such
+a large ID database in approximately 10-15 minutes.
 
-@item
-The @samp{-S.gz=man} tells @code{mkid} to treat all @file{.gz} files as
-this new language @code{man}.  @xref{Scanner option formats}.
+@pindex cron
+In a future release, @file{mkid} will be able to incrementally update an
+ID database much faster than it can build one from scratch.  Until this
+feature becomes available, it might be a good idea to schedule a
+@file{cron} job to regularly update large ID databases during off-hours.
+
+@file{mkid} writes the ID file, therefore it accepts the @samp{--output}
+(and @samp{--file}) options as described in @ref{Writing options}.
+@file{mkid} extracts tokens from source files, therefore it accepts the
+@samp{--lang-map}, @samp{--include}, @samp{--exclude}, and
+@samp{--lang-option} options, as well as the language-specific scanner
+options, all of which are described in @ref{Extraction options}.
+@file{mkid} walks file trees, therefore it handles file and directory
+names on its command line and the @samp{--prune} option as described in
+@ref{Walker options}.
+
+In addition, @file{mkid} accepts the following command-line options:
 
-@end enumerate
+@table @samp
 
-As a further complication, @code{cat} pages typically contain
-underlining and backspace sequences, which will confuse @code{mkid}.  To
-handle this, the @code{gzip} command becomes a pipeline, like this:
+@item -s
+@itemx --statistics
+@opindex -s
+@opindex --statistics
+@cindex statistics
 
-@example
-mkid '-Sman/text/gzip <%s | col -b' -S.gz=man -
-@end example
+@file{mkid} reports statistics about resource usage at the end of its
+run.
 
+@item -v
+@itemx --verbose
+@opindex -v
+@opindex --verbose
+@cindex @file{mkid} progress
 
-@node Common query arguments
-@chapter Common query arguments
+@file{mkid} reports statistics about each file as it is scanned, and
+about the resource usage of its indexing algorithm at regular intervals.
 
-@cindex common query arguments
+@end table
 
-Certain options, and regular expression syntax, are shared by the ID
-query tools.  So we describe those things in the sections below, instead
-of repeating the description for each tool.
+@c ************* gkm *********************************************************
+@node lid invocation
+@chapter @code{lid}: Querying an ID Database by Token
+
+The @file{lid} program accepts @var{patterns} on the command line which
+it matches against the tokens stored in an ID database.  The
+interpretation of a @var{pattern} is determined by the makeup of the
+@var{pattern} string itself, or can be overridden by command-line
+options.  If a @var{pattern} contains regular expression meta-characters,
+it is used to perform a regular-expression substring search.  If no such
+meta-characters are present, @var{pattern} is used to perform a literal
+word search.  (By default, all searches are sensitive to alphabetic
+case.)  If no @var{pattern} is supplied on the command line, @file{lid}
+lists every entry in the ID database.
+
+@file{lid} reads the ID database, therefore it accepts the @samp{--file}
+option, and consults the @samp{IDPATH} environment variable, as
+described in @ref{Reading options}.  @file{lid} lists file names,
+therefore it accepts the @samp{--separator} option, as described in
+@ref{File listing options}.
+
+In addition, @code{lid} accepts the following command-line options:
 
-@menu
-* Query options::               -f -r -c -ew -kg -n -doxa -m -F -u.
-* Patterns::                    Regular expression syntax for searches.
-* Examples: Query examples.     Some common uses.
-@end menu
+@table @samp
 
+@item -i
+@itemx --ignore-case
+@opindex -i
+@opindex --ignore-case
+@cindex alphabetic case, ignoring differences in
+@cindex ignoring differences in alphabetic case
+
+Ignoring differences in alphabetic case between the @var{pattern} and
+the tokens in the ID database.
+
+@item -l
+@itemx --literal
+@opindex -l
+@opindex --literal
+
+Match @var{pattern} as a literal string.  Use this option if
+@var{pattern} contains regular-expression meta-characters, but you don't
+wish to perform a regular-expression search.
+
+@item -r
+@itemx --regexp
+@opindex -r
+@opindex --regexp
+
+Match @var{pattern} as an @emph{extended} regular expression@footnote{Extended
+regular expressions are the same as those accepted by @file{egrep}.}.
+Use this option if no regular-expression expression meta-characters are
+present in @var{pattern}, but you wish to force a regular-expression
+search (note: in this case, a @emph{literal substring} search might be
+faster).
+
+@item -w
+@itemx --word
+@opindex -w
+@opindex --word
 
-@node Query options
-@section Query options
+Match @var{pattern} using a word-delimited (non substring) search.  This is the default
+for literal searches.
 
-@cindex query options, common
-@cindex common query options
+@item -s
+@itemx --substring
+@opindex -s
+@opindex --substring
 
-The ID query tools (@emph{not} @code{mkid}) share certain command line
-options.  Not all of these options are recognized by all programs, but
-if an option is used by more than one program, it is described below.
-The description of each program gives the options that program uses.
+Match @var{pattern} using a substring (non word-delimited) search.  This
+is the default for regular expression searches.
 
-@table @samp
+@item -k @var{style}
+@itemx --key=@var{style}
+@opindex -k
+@opindex --substring
 
-@item -f@var{idfile}
-@opindex -f@var{idfile}
-@cindex database name, specifying
-@cindex parent directories, searched for ID
-Read the database from @var{idfile}, in the current directory or in any
-directory above the current directory.  The default database name is
-@file{ID}.  Searching parent directories lets you have a single ID
-database at the root of a large source tree and then use the query tools
-from anywhere within that tree.
-
-@item -r@var{directory}
-@opindex -r@var{directory}
-Find files relative to @var{directory}, instead of the directory in
-which the ID database was found.  This is useful if the ID database was
-moved after its creation.
-
-@item -c
-@opindex -c
-Equivalent to @code{-r`pwd`}, i.e., find files relative to the current
-directory, instead of the directory in which the ID database was found.
+@var{Style} can be one of @samp{token}, @samp{pattern} or @samp{none}.
+This option controls how the subject of the query is presented.  This is
+best illustrated by example:
 
-@item -e
-@itemx -w
-@opindex -e
-@opindex -w
-@cindex regular expressions, forcing evaluation as
-@cindex strings, forcing evaluation as
-@cindex constant strings, forcing evaluation as
-@samp{-e} forces pattern arguments to be treated as regular expressions,
-and @samp{-w} forces pattern arguments to be treated as constant
-strings.  By default, the query tools guess whether a pattern is regular
-expressions or constant strings by looking for special characters.
-@xref{Patterns}.
-
-@item -k
-@itemx -g
-@opindex -k
-@opindex -g
-@cindex brace notation in filename lists
-@cindex shell brace notation in filename lists
-@samp{-k} suppresses use of shell brace notation in the output.  By
-default, the query tools that generate lists of filenames attempt to
-compress the lists using the usual shell brace notation, e.g.,
-@file{@{foo,bar@}.c} to mean @file{foo.c} and @file{bar.c}.  (This is
-useful if you use @code{ksh} or the original (not GNU) @code{sh} and
-want to feed the list of names to another command, since those shells do
-not support this brace notation; the name of the @code{-k} option comes
-from the @code{k} in @code{ksh}).
-
-@samp{-g} turns on use of brace notation; this is only needed if the
-query tools were compiled with @samp{-k} as the default behavior.
+@example
+$ lid --key=token '^dest.'
+destaddr       libsys/memcpy.c
+destination    libsys/regex.c
+destlst        libsys/rx.c
+destpos        libsys/rx.c
+destset        libsys/rx.h libsys/rx.c
+
+$ lid --key=pattern '^dest.'
+^dest.         libsys/rx.h libsys/@{memcpy,regex,rx@}.c
+
+$ lid --key=none '^dest.'
+libsys/rx.h libsys/@{memcpy,regex,rx@}.c
+@end example
 
-@item -n
-@opindex -n
-@cindex suppressing matching identifier
-Suppress the matching identifier before each list of filenames that the
-query tools output by default. This is useful if you want a list of just
-the names to feed to another command.
+When @samp{--key} is either @samp{token} or @samp{pattern}, the first
+column of output is a @var{token} or @var{pattern}, respectively.  When
+@samp{--key} is @samp{none}, neither of these is printed, and the file
+name list begins immediately.  The default is @samp{token}.
+
+@item -R @var{style}
+@itemx --result=@var{style}
+@opindex -R
+@opindex --result
+
+@var{Style} can be one of @samp{filenames}, @samp{grep}, @samp{edit} or
+@samp{none}.  This option controls how the value associated with the
+query's @var{key} presented.  When @var{style} is @samp{filenames}, a
+list of file names is printed (this is the default).  When @var{style}
+is @samp{grep}, the lines that match @var{pattern} are printed in the
+same format as @samp{egrep -n}.  When @var{style} is @samp{edit}, the
+file names are passed to an editor, and if possible @var{pattern} is
+passed as an initial search string (@pxref{eid invocation}).  When
+@var{style} is @samp{none}, the file names are not processed in any way.
+This can be useful if you wish to see what tokens match a @var{pattern},
+but don't care about where they reside.
 
 @item -d
 @itemx -o
 @itemx -x
-@itemx -a
 @opindex -d
 @opindex -o
 @opindex -x
-@opindex -a
 @cindex radix of numeric matches, specifying
 @cindex numeric matches, specifying radix of
+
 These options may be used in any combination to specify the radix of
 numeric matches.  @samp{-d} allows matching on decimal numbers,
-@samp{-o} on octal numbers, and @samp{-x} on hexadecimal numbers.  The
-@code{-a} option is equivalent to specifying all three; this is the
-default.  Any combination of these options may be used.
+@samp{-o} on octal numbers, and @samp{-x} on hexadecimal numbers.  Any
+combination of these options may be used.  The default is to match all
+three radixes.
 
-@item -m
-@opindex -m
-@cindex multiple lines, merging
-Merge multiple lines of output into a single line.  If your query
-matches more than one identifier, the default is to generate a separate
-line of output for each matching identifier.
-
-@itemx -F-
-@itemx -F@var{n}
-@itemx -F-@var{m}
-@itemx -F@var{n}-@var{m}
+@item -F @var{range}
+@itemx --frequency=@var{range}
 @opindex -F
+@opindex --frequency
 @cindex single matches, showing
-Show identifiers matching at least @var{n} and at most @var{m} times.
-@samp{-F-} is equivalent to @samp{-F1}, i.e., find identifiers that
-appear only once in the database.  (This is useful to locate identifiers
-that are defined but never used, or used once and never defined.)
-
-@item -u@var{number}
-@opindex -u
-@cindex conflicting identifiers, finding
-List identifiers that conflict in the first @var{number} characters.
-This could be in useful porting programs to brain-dead computers that
-refuse to support long identifiers, but your best long term option is to
-set such computers on fire.
-
-@end table
-
 
-@node Patterns
-@section Patterns
-
-@cindex patterns
-@cindex regular expression syntax
-
-@dfn{Patterns}, also called @dfn{regular expressions}, allow you to
-match many different identifiers in a single query.
-
-The same regular expression syntax is recognized by all the query tools
-that handle regular expressions.  The exact syntax depends on how the ID
-tools were compiled, but the following constructs should always be
-supported:
-
-@table @samp
-
-@item .
-Match any single character.
-
-@item [@var{chars}]
-Match any of the characters specified within the brackets.  You can
-match any characters @emph{except} the ones in brackets by typing
-@samp{^} as the first character.  A range of characters can be specified
-using @samp{-}.  For example, @samp{[abc]} and @samp{[a-c]} both match
-@samp{a}, @samp{b}, or @samp{c}, and @samp{[^abc]} matches anything
-@emph{except} @samp{a}, @samp{b}, or @samp{c}.
-
-@item *
-Match the previous construct zero or more times.
+Match tokens whose occurrence count falls in @var{range}.  @var{Range}
+may be expressed as a single number @var{n}, or as a range
+@var{n@code{..}m}.  Either limit of the range may be omitted (e.g.,
+@var{@code{..}m}, or @var{n..@code{..}}).  If the lower limit @var{n} is
+omitted, it defaults to @code{1}.  If the upper limit is omitted, it
+defaults in the present implementation to @code{65535}, the maximum
+value of an unsigned 16-bit integer.
+
+Particularly useful queries are @samp{lid -F1}, which helps locate
+identifiers that are defined but never used, or are used but never
+defined.  Similarly, @code{lid -F2} can help find functions that possess
+a prototype declaration and a definition, but are never called.
+
+@item -a @var{number}
+@itemx --ambiguous=@var{number}
+@opindex -a
+@opindex --ambiguous
+@cindex ambiguous identifier names, finding
 
-@item ^
-@itemx $
-@samp{^} (@samp{$}) at the beginning (end) of a pattern anchors the
-match to the first (last) character of the identifier.
+List identifiers (not numbers) that are ambiguous for the first
+@var{number} characters.  This feature might be in useful when porting
+programs to ancient pea-brained compilers that don't support long
+identifier names.  However, the best long-term option is to set such
+systems on fire.
 
 @end table
 
-The query programs use either the @code{regex}/@code{regcmp} or
-@code{re_comp}/@code{re_exec} functions, depending on which are
-available in the library on your system.  These do not always support
-the exact same regular expression syntax, so consult your local
-@code{man} pages to find out.
-
-
-@node Query examples
-@section Query examples
-
-@cindex examples, queries
-@cindex query examples
-Here are some examples of the options described in the previous
-sections.
-
-To restrict searches to exact matches, use @samp{^@dots{}$}. For example:
-
-@example
-prompt$ gid '^FILE$'
-ansi2knr.c:144: @{      FILE *in, *out;
-ansi2knr.c:315:     FILE *out;
-fid.c:38: FILE *id_FILE;
-filenames.c:576: FILE *
-@dots{}
-@end example
-
-To show identifiers not unique in the first 16 characters:
-
-@example
-prompt$ lid -u16
-RE_CONTEXT_INDEP_ANCHORS regex.c
-RE_CONTEXT_INDEP_OPS regex.c
-RE_SYNTAX_POSIX_BASIC regex.c
-RE_SYNTAX_POSIX_EXTENDED regex.c
-@dots{}
-@end example
-
-@cindex numeric searches
-Numbers are searched for numerically rather than textually. For example:
-
-@example
-prompt$ lid 0xff
-0377           @{lid,regex@}.c
-0xff           @{bitops,fid,lid,mkid@}.c
-255            regex.c
-@end example
-
-On the other hand, you can restrict a numeric search to a particular
-radix if you want:
-
-@example
-laurie$ lid -x 0xff
-0xff           @{bitops,fid,lid,mkid@}.c
-@end example
-
-Filenames in the output are always adjusted to be correct for the
-correct working directory. For example:
-
-@example
-prompt$ lid bdevsw
-bdevsw         sys/conf.h cf/conf.c io/bio.c os/@{fio,main,prf,sys3@}.c
-prompt$ cd io
-prompt$ lid bdevsw
-bdevsw         ../sys/conf.h ../cf/conf.c bio.c ../os/@{fio,main,prf,sys3@}.c
-@end example
-
-
-@node gid invocation
-@chapter @code{gid}: Listing matching lines
-
-Synopsis:
-
-@example
-gid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}@dots{}]
-@end example
-
-@code{gid} finds the identifiers in the database that match the
-specified @var{pattern}s, then searches for all occurrences of those
-identifiers, in only the files containing matches.  In a large source
-tree, this saves an enormous amount of time (compared to searching every
-source file).
-
-With no @var{pattern} arguments, @code{gid} prints every line of every
-source file.
-
-The name ``gid'' stands for ``grep for identifiers'', @code{grep} being
-the standard utility to search regular files.
+@menu
+* lid aliases::                 Aliases for specialized lid queries
+* Emacs gid interface::         GNU Emacs query interface
+* eid invocation::              Invoking an editor on query results
+@end menu
 
-@xref{Common query arguments}, for a description of the command-line
-options and @var{pattern} arguments.
+@c ************* gkm *********************************************************
+@node lid aliases
+@section Aliases for Specialized @file{lid} Queries
 
-@code{gid} uses the standard GNU output format for identifying source lines:
+Historically, the ID utilities have provided several query interfaces
+which are specializations of @code{lid} (@pxref{lid invocation}).
 
-@example
-@var{filename}:@var{linenum}: @var{text}
-@end example
+@table @file
 
-Here is an example:
+@item gid
+(alias for @samp{lid -R grep})
+lists all lines containing the requested pattern.
 
-@example
-prompt$ gid FILE
-ansi2knr.c:144: @{      FILE *in, *out;
-ansi2knr.c:315:     FILE *out;
-fid.c:38: FILE *id_FILE;
-@dots{}
-@end example
+@item eid
+(alias for @samp{lid -R edit})
+invokes an editor on all files containing the requested pattern, and
+optionally initiates a text search for that pattern.
 
-@menu
-* GNU Emacs gid interface::     Using next-error with gid.
-@end menu
+@item aid
+(alias for @samp{lid -ils}) treats the requested pattern
+as a case-insensitive literal substring.
 
+@end table
 
-@node GNU Emacs gid interface
-@section GNU Emacs @code{gid} interface
+@c ***************************************************************************
+@node Emacs gid interface
+@section GNU Emacs query interface
 
 @cindex Emacs interface to @code{gid}
-@flindex gid.el @r{interface to Emacs}
+@flindex id-utils.el @r{interface to Emacs}
 
 @vindex load-path
-The @code{mkid} source distribution comes with a file @file{gid.el},
+The @code{id-utils} source distribution comes with a file @file{id-utils.el},
 which defines a GNU Emacs interface to @code{gid}.  To install it, put
-@file{gid.el} somewhere that Emacs will find it (i.e., in your
+@file{id-utils.el} somewhere that Emacs will find it (i.e., in your
 @code{load-path}) and put
 
 @example
@@ -1041,334 +1023,241 @@ The @code{gid} function prompts you with the word around point.  If you
 want to search for something else, simply delete the line and type the
 pattern of interest.
 
-@flindex *scratch* @r{Emacs buffer}
+@flindex *compilation* @r{Emacs buffer}
 The function then runs the @code{gid} program in a @samp{*compilation*}
 buffer, so the normal @code{next-error} function can be used to visit
 all the places the identifier is found (@pxref{Compilation,,, emacs, The
 GNU Emacs Manual}).
 
-
-@node Looking up identifiers
-@chapter Looking up identifiers
-
-These commands look up identifiers in the ID database and operate on the
-files containing matches.
-
-@menu
-* lid invocation::              Matching patterns.
-* aid invocation::              Matching strings.
-* eid invocation::              Invoking an editor on matches.
-* fid invocation::              Listing a file's identifiers.
-@end menu
-
-
-@node lid invocation
-@section @code{lid}: Matching patterns
-
-@pindex lid
-
-Synopsis:
-
-@example
-lid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c
-@var{pattern}@dots{}
-@end example
-
-@code{lid} searches the database for identifiers matching the given
-@var{pattern} arguments and prints the names of the files that match
-each @var{pattern}.  With no @var{pattern}s, @code{lid} lists every
-entry in the database.
-
-The name ``lid'' stands for ``lookup identifier''.
-
-@xref{Common query arguments}, for a description of the command-line
-options and @var{pattern} arguments.
-
-By default, each line of output consists of an identifier and all the
-files containing that identifier.
-
-Here is an example showing a search for a single identifier (omitting
-some output to keep lines short):
-
-@example
-prompt$ lid FILE
-FILE           extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c
-@end example
-
-This example shows a regular expression search:
-
-@example
-prompt$ lid 'FILE$'
-AF_FILE        mkid.c
-AF_IDFILE      mkid.c
-FILE           extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c
-IDFILE         id.h @{fid,lid,mkid@}.c
-IdFILE         @{fid,lid@}.c
-@dots{}
-@end example
-
-@noindent As you can see, when a regular expression is used, it is
-possible to get more than one line of output.  To merge multiple lines
-into one, use @samp{-m}:
-
-@example
-prompt$ lid -m ^get
-^get           extern.h @{bitsvec,fid,gets0,getsFF,getscan,idx,lid,@dots{}@}.c
-@end example
-
-
-@node aid invocation
-@section @code{aid}: Matching strings
-
-@pindex aid
-
-Synopsis:
-
-@example
-aid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c
-@var{string}@dots{}
-@end example
-
-@cindex case-insensitive searching
-@cindex string searching
-@code{aid} searches the database for identifiers containing the given
-@var{string} arguments.  The search is case-insensitive.
-
-@flindex whatis
-The name ``aid'' stands for ``apropos identifier'', @code{apropros}
-being a command that does a similar search of the @code{whatis} database
-of @code{man} descriptions.
-
-For example, @samp{aid get} matches the identifiers @code{fgets},
-@code{GETLINE}, and @code{getchar}.
-
-The default output format is the same as @code{lid}; see the previous
-section.
-
-@xref{Common query arguments}, for a description of the command-line
-options and @var{pattern} arguments.
-
-
+@c ************* gkm *********************************************************
 @node eid invocation
-@section @code{eid}: Invoking an editor on matches
+@section @code{eid}: Invoking an Editor on Query Results
 
 @pindex eid
 
-Synopsis:
-
-@example
-eid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}]@dots{}
-@end example
-
-@code{eid} runs the usual search (@pxref{lid invocation}) on the given
-arguments, shows you the output, and then asks:
+@samp{lid -R edit} is an editing interface for the ID utilities that is
+most commonly used with @file{vi}.  Emacs users should use the interface
+defined in @code{id-utils.el} (@pxref{Emacs gid interface}).  The ID
+utilities include an alias called @file{eid}, and for the sake of
+brevity, we'll use this alias for the remainder of this section.
+@file{eid} performs a @file{lid}-style, then asks if you wish to edit
+the files.  If your query yields more than one line of output, you will
+be prompted after each line.  This is the prompt you'll see:
 
 @example
-Edit? [y1-9^S/nq] 
+Edit? [y1-9^S/nq]
 @end example
 
 @noindent
-You can respond with:
+You may respond with:
 
 @table @samp
+
 @item y
 Edit all files listed.
 
 @item 1@dots{}9
 Edit all files starting at the @math{@var{n} + 1}'st file.
 
-@item /@var{string} @r{or} @kbd{CTRL-S}@var{string}
-Edit all files whose name contains @var{string}.
+@item /@var{string} @r{or} @kbd{CTRL-S}@var{regexp}
+Search into the file list, and begin editing with the first file name
+that matches the regular expression @var{regexp}.
 
 @item n
-Go on to the next @var{pattern}, i.e., edit no files for this one.
+Don't edit any files.  If another line of query output is pending,
+advance to that line, for which another @samp{Edit?} prompt will appear.
 
 @item q
-Quit @code{eid}.
+Quit---don't edit any files, and don't process any more lines of query
+output.
 
 @end table
 
-@code{eid} invokes an editor once per @var{pattern}; all the specified
-files are given to the editor for you to edit simultaneously.
+Here is an example:
 
-@code{eid} invokes the editor defined by the @samp{EDITOR} environment
-variable.  If the editor can accept an initial search argument on the
-command line, @code{eid} moves automatically to the location of the
-match, via the environment variables below.
+@example
+prompt$ eid FILE \^print
+FILE           @{ansi2knr,fid,filenames,idfile,idx,lid,misc,@dots{}@}.c
+Edit? [y1-9^S/nq] n
+^print         @{ansi2knr,fid,getopt,getopt1,lid,mkid,regex,scanners@}.c
+Edit? [y1-9^S/nq] 2
+@end example
 
-@xref{Common query arguments}, for a description of the command-line
-options and @var{pattern} arguments.
+@noindent This will start editing at @file{getopt}.c.
 
-Here are the environment variables relevant to @code{eid}:
+@code{eid} invokes the editor defined by the environment variable
+@samp{VISUAL}.  If @samp{VISUAL} is undefined, it uses the environment
+variable @samp{EDITOR} instead.  If @samp{EDITOR} is undefined, it
+defaults to @file{vi}.  It is possible for @file{eid} to pass the editor
+an initial search pattern so that your cursor will immediately alight on
+the token of interest.  This feature is controlled by the following
+environment variables:
 
 @table @samp
 
-@item EDITOR
-@vindex EDITOR
-The name of the editor program to invoke.
-
 @item EIDARG
 @vindex EIDARG
-@cindex search for identifier, initial
-The argument to pass to the editor to search for the matching
-identifier.  For @code{vi}, this should be @samp{+/%s/'}.
+@cindex search for token, initial
+A printf(3) format string for the editor argument to search for the
+matching token.  For @code{vi}, this should be @samp{+/%s/}.
 
 @item EIDLDEL
 @vindex EIDLDEL
 @cindex left delimiter editor argument
 @cindex beginning-of-word editor argument
-A regular expression to force a match at the beginning of a word (``left
-delimiter).  @code{eid} inserts this in front of the matching identifier
-when composing the search argument.  For @code{vi}, this should be
-@samp{\<}.
+The regular-expression meta-character(s) for delimiting the beginning of
+a word (the `@file{eid} Left DELimiter').  @code{eid} inserts this in
+front of the matching token when a word-search is desired.  For
+@file{vi}, this should be @samp{\<}.
 
 @item EIDRDEL
 @vindex EIDRDEL
 @cindex right delimiter editor argument
 @cindex end-of-word editor argument
-The end-of-word regular expression.  For @code{vi}, this should be
-@samp{\>}.
+The regular-expression meta-character(s) for delimiting the end of
+a word (the `@file{eid} Right DELimiter').  @code{eid} inserts this in
+end of the matching token when a word-search is desired.  For
+@file{vi}, this should be @samp{\>}.
 
 @end table
 
-For Emacs users, the interface in @code{gid.el} is probably preferable
-to @code{eid}.  @xref{GNU Emacs gid interface}.
-
-
-Here is an example:
-
-@example
-prompt$ eid FILE \^print
-FILE           @{ansi2knr,fid,filenames,idfile,idx,lid,misc,@dots{}@}.c
-Edit? [y1-9^S/nq] n
-^print         @{ansi2knr,fid,getopt,getopt1,lid,mkid,regex,scanners@}.c
-Edit? [y1-9^S/nq] 2
-@end example
-
-@noindent This will start editing at @file{getopt}.c.
-
-
+@c ************* gkm *********************************************************
 @node fid invocation
-@section @code{fid}: Listing a file's identifiers
+@chapter @code{fid}: Listing a file's tokens
 
 @pindex fid
-@cindex identifiers in a file
-
-@code{fid} lists the identifiers found in a given file.  Synopsis:
-
-@example
-fid [-f@var{dbfile}] @var{file1} [@var{file2}]
-@end example
-
-@table @samp
-
-@item -f@var{dbfile}
-Read the database from @var{dbfile} instead of @file{ID}.
-
-@item @var{file1}
-List all the identifiers contained in @var{file1}.
+@cindex tokens in a file
+@cindex tokens common to two files
 
-@item @var{file2}
-With a second file argument, list only the identifiers both files have
-in common.
+@file{fid} prints the tokens found in a given file.  If two file names
+are passed on the command line, @file{fid} prints the tokens that are
+common to both files (i.e., the @emph{set intersection} of the two token
+sets).
 
-@end table
-
-The output is simply one identifier (or number) per line.
+@file{lid} reads the ID database, therefore it accepts the @samp{--file}
+option, and consults the @samp{IDPATH} environment variable, as
+described in @ref{Reading options}.
 
+If the standard output is attached to a terminal, the printed tokens are
+separated by spaces.  Otherwise, the tokens are printed one per line.
 
-@node pid invocation
-@chapter @code{pid}: Looking up filenames
+@c ************* gkm *********************************************************
+@node fnid invocation
+@chapter @code{fnid}: Looking up filenames
 
-@pindex pid
+@pindex fnid
 @cindex filenames, matching
 @cindex matching filenames
 
-@code{pid} matches the filenames stored in the ID database, rather than
-the identifiers.  Synopsis:
+@code{fnid} queries the list of file names stored in the ID database.
+It accepts shell @emph{wildcard} patterns on the command line.  If no
+pattern is supplied, @file{*} is implied.  @file{fnid} prints the
+file names that match the given patterns.
 
-@example
-pid [-f@var{dbfile}] [-r@var{dir}] [-ebkgnc] @var{wildcard}@dots{}
-@end example
-
-By default, the @var{wildcard} patterns are treated as shell globbing
-patterns, rather than the regular expressions the other utilities
-accept.  See the section below for details.
-
-Besides the standard options given in the synopsis (@pxref{Query
-options}), @code{pid} accepts the following:
-
-@table @samp
-
-@item -e
-@opindex -e
-Do the usual regular expression matching (@pxref{Patterns}), instead
-of shell wildcard matching.
-
-@item -b
-@opindex -b
-@cindex basename match
-Match the basenames of the files in the database.  For example,
-@samp{pid -b foo} will match the stored filename @file{dir/foo}, but not
-@file{foo/file}.
-
-@end table
+@code{fnid} prints file names, and as such accepts the
+@samp{--separator} option as described in @ref{File listing options}.
 
 For example, the command:
 
 @example
-pid \*.c
+fnid \*.c
 @end example
 
 @noindent lists all the @file{.c} files in the database.  (The @samp{\}
 here protects the @samp{*} from being expanded by the shell.)
 
-@menu
-* Wildcard patterns::           Shell-style globbing patterns.
-@end menu
+@c ************* gkm *********************************************************
+@node xtokid invocation
+@chapter @file{xtokid}: Testing Language Scanners
 
+@file{xtokid} accepts the names of files and/or directories on the
+command line, then extracts and prints a stream of tokens from those
+files for which it has a valid, enabled scanner.  This is useful
+primarily for debugging new @file{mkid} scanners (@pxref{Defining
+scanners}).
 
-@node Wildcard patterns
-@section Wildcard patterns
+@file{xtokid} extracts tokens from source files, therefore it accepts
+the @samp{--lang-map}, @samp{--include}, @samp{--exclude}, and
+@samp{--lang-option} options, as well as the language-specific scanner
+options, all of which are described in @ref{Extraction options}.
+@file{xtokid} walks file trees, therefore it handles file and directory
+names on its command line and the @samp{--prune} option as described in
+@ref{Walker options}.
 
-@cindex globbing patterns
-@cindex shell wildcard patterns
-@cindex wildcard wildcard patterns
+The name @samp{xtokid} indicates that it is the ``eXtract TOKens ID
+utility''.
 
-@code{pid} does simplified shell wildcard matching (unless the @samp{-e}
-option is specified), rather than the regular expression matching done
-by the other utilities.  Here is a description of wildcard matching,
-also called @dfn{globbing}:
+@c ************* gkm *********************************************************
+@node Past and Future
+@chapter Past and Future
 
-@itemize @bullet
+@cindex history
 
-@item
-@kindex * @r{in globbing}
-@samp{*} matches zero or more characters.
+@pindex look @r{and @file{mkid} 1}
+@cindex McGary, Greg
+Greg McGary conceived of the ideas behind the ID utilities when he
+began working on the Unix kernel in 1984.  He needed a navigation tool
+to help him find his way around the expansive, unfamiliar landscape.
+The first @code{id-utils}-like tools were shell scripts, and produced an
+ASCII database that looks much like the output of @samp{lid ".*"}.  It
+took over an hour on a @sc{vax 11/750} to build a database for a
+@sc{4.1bsd} derived kernel.  The first version of @file{lid} used the
+@sc{unix} system utility @code{look}, modified to handle very long
+lines.
+
+In 1986, Greg rewrote the shell scripts in C to improve performance.
+Build times for the ID file were shortened by an order of magnitude.
+The ID utilities were first posted to @samp{comp.sources.unix} in
+September 1987 under the name @code{id}.
 
-@item
-@kindex ? @r{in globbing}
-@samp{?} matches any single character.
+@cindex Horsley, Tom
+@cindex Scofield, Doug
+@cindex Leonard, Bill
+@cindex Berry, Karl
+Over the next few years, several versions diverged from the original
+source.  Tom Horsley at Harris Computer Systems Division stepped forward
+to take over maintenance and integrated some of the fixes from divergent
+versions.  A first release of the renamed @file{mkid} @w{version 2} was
+posted to @file{alt.sources} near the end of 1990.  At that time, Tom
+wrote a Texinfo manual with the encouragement the net community.
+(Tom especially thanks Doug Scofield and Bill Leonard whom he dragooned
+into helping poorfraed and edit---they found several problems in the
+initial version.)  Karl Berry revamped the manual for Texinfo style,
+indexing, and organization in 1995.
+
+In January 1995, Greg McGary reemerged as the primary maintainer and
+launched development of @file{mkid} version 3, whose primary new feature
+is an efficient algorithm for building databases that is linear in both
+time and space over the size of the input text.  (The old algorithm was
+quadratic in space so it was incapable of handling very large source
+trees.)  For the first time, the code was released under the GNU Public
+License.
+
+In June 1996, the package was renamed again to @code{id-utils} and was
+released for the first time under FSF copyright as part of the GNU
+system.  All programs had their command-line arguments completely
+revised.  The @file{mkid} and @file{xtokid} programs also gained a
+file-tree walker, so that directory names can be passed on the command
+line instead of the names of every individual file.  Greg reorganized
+and rewrote most of the Texinfo manual to reflect these changes.
 
-@item
-@kindex \ @r{in globbing}
-@samp{\} forces the next character to be taken literally.
+@pindex cscope
+@pindex grep
+@cindex future
+Future releases of @code{id-utils} might include:
 
-@item
-@kindex [@dots{}] @r{in globbing}
-@samp{[@var{chars}]} matches any single character listed in @var{chars}.
+@table @bullet
 
-@item
-@kindex [!@dots{}] @r{in globbing}
-@samp{[!@var{chars}]} matches any character @emph{not} listed in @var{chars}.
+an optional coupling with GNU @code{grep}, so that @code{grep} can use
+an ID database for hints
 
-@end itemize
+a @code{cscope} work-alike query interface
 
-Most shells treat @samp{/} and leading @samp{.} characters
-specially. @code{pid} does not do this.  It simply matches the filename
-in the database against the wildcard pattern.
+incremental update of the ID database.
 
+@end table
 
+@c ***************************************************************************
 @node Index
 @unnumbered Index
 
diff --git a/doc/version.texi b/doc/version.texi
index 4893f0a..2ec9dd9 100644
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,3 +1,3 @@
-@set UPDATED 16 March 1996
-@set EDITION 3.0.9
-@set VERSION 3.0.9
+@set UPDATED 4 July 1996
+@set EDITION 3.1
+@set VERSION 3.1
author	Greg McGary <greg@mcgary.org>	1997-04-18 06:43:35 +0000
committer	Greg McGary <greg@mcgary.org>	1997-04-18 06:43:35 +0000
commit	3720d4b7a1b0ce0903450271aa3d93388e9d8781 (patch)
tree	12200295d735bf3d1bcaaf8d2065547d41cea3b2 /doc
parent	916418ea1284e6aa64f50eba077e48ced5944acc (diff)
download	idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.tar.gz idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.tar.bz2 idutils-3720d4b7a1b0ce0903450271aa3d93388e9d8781.zip