diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile.am | 3 | ||||
-rw-r--r-- | doc/Makefile.in | 204 | ||||
-rw-r--r-- | doc/id-utils.info | 1246 | ||||
-rw-r--r-- | doc/id-utils.texi | 1378 | ||||
-rwxr-xr-x | doc/mdate-sh | 91 | ||||
-rw-r--r-- | doc/stamp-vti | 1 | ||||
-rw-r--r-- | doc/version.texi | 3 |
7 files changed, 2926 insertions, 0 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..b7d1100 --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,3 @@ +## Process this file with automake to produce Makefile.in + +info_TEXINFOS = id-utils.texi diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..650fddf --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,204 @@ +# Makefile.in generated automatically by automake 1.0 from Makefile.am + +# Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. + + +SHELL = /bin/sh + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +transform = @program_transform_name@ + +info_TEXINFOS = id-utils.texi +mkinstalldirs = $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../config.h + +MAKEINFO = makeinfo +TEXI2DVI = texi2dvi +INFOS = id-utils.info* +INFO_DEPS = id-utils.info +DVIS = id-utils.dvi +TEXINFOS = id-utils.texi + +DIST_COMMON = Makefile.am Makefile.in mdate-sh stamp-vti version.texi + + +PACKAGE = @PACKAGE@ +VERSION = @VERSION@ + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \ + $(TEXINFOS) $(INFOS) $(MANS) $(EXTRA_DIST) $(DATA) +DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \ + $(TEXINFOS) $(INFO_DEPS) $(MANS) $(EXTRA_DIST) $(DATA) + +TAR = tar +default: all + + +$(srcdir)/Makefile.in: @MAINT@Makefile.am $(top_srcdir)/configure.in + cd $(top_srcdir) && automake $(subdir)/Makefile + +Makefile: $(top_builddir)/config.status Makefile.in + cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status + +version.texi: @MAINT@stamp-vti + +stamp-vti: id-utils.texi $(top_srcdir)/configure.in + echo "@set UPDATED `cd $(srcdir) \ + && $(SHELL) ./mdate-sh id-utils.texi`" > vti.tmp + echo "@set EDITION $(VERSION)" >> vti.tmp + echo "@set VERSION $(VERSION)" >> vti.tmp + if cmp -s vti.tmp $(srcdir)/version.texi; then \ + rm vti.tmp; \ + else \ + mv vti.tmp $(srcdir)/version.texi; \ + fi + echo timestamp > $(srcdir)/stamp-vti + +mostlyclean-vti: + rm -f vti.tmp + +clean-vti: + +distclean-vti: + +maintainer-clean-vti: + @MAINT@rm -f stamp-vti version.texi + +id-utils.info: id-utils.texi version.texi + + +.texi.info: + $(MAKEINFO) -I$(srcdir) $< -o $@ + +.texi.dvi: + TEXINPUTS=$(srcdir):$$TEXINPUTS $(TEXI2DVI) $< + +install-info: $(INFO_DEPS) + $(mkinstalldirs) $(infodir) + for file in $(INFO_DEPS); do \ + for ifile in `cd $(srcdir) && echo $$file*`; do \ + $(INSTALL_DATA) $(srcdir)/$$ifile $(infodir)/$$ifile; \ + done; \ + done + +uninstall-info: + cd $(srcdir) && for file in *.info*; do \ + rm -f $(infodir)/$$file; \ + done + +mostlyclean-info: + rm -f id-utils.aux id-utils.cp id-utils.cps id-utils.dvi id-utils.fn \ + id-utils.fns id-utils.ky id-utils.log id-utils.pg \ + id-utils.toc id-utils.tp id-utils.vr id-utils.op + +clean-info: + +distclean-info: + +maintainer-clean-info: + rm -f $(INFOS) +tags: TAGS +TAGS: + + +subdir = doc +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) +distdir: $(DEP_DISTFILES) + @for file in `cd $(srcdir) && echo $(DISTFILES)`; do \ + test -f $(distdir)/$$file \ + || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $(srcdir)/$$file $(distdir)/$$file; \ + done +info: $(INFO_DEPS) + +dvi: $(DVIS) + +check: all + +installcheck: + +install-exec: + +install-data: install-info + +install: install-exec install-data all + @: + +uninstall: uninstall-info + +all: $(INFO_DEPS) Makefile + +install-strip: + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install +installdirs: + $(mkinstalldirs) $(infodir) + + +mostlyclean-generic: + test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + rm -f Makefile $(DISTCLEANFILES) + rm -f config.cache config.log $(CONFIG_HEADER) stamp-h + +maintainer-clean-generic: + test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) + test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +mostlyclean: mostlyclean-vti mostlyclean-info mostlyclean-generic + +clean: clean-vti clean-info clean-generic mostlyclean + +distclean: distclean-vti distclean-info distclean-generic clean + rm -f config.status + +maintainer-clean: maintainer-clean-vti maintainer-clean-info \ + maintainer-clean-generic distclean + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +.PHONY: default mostlyclean-vti distclean-vti clean-vti \ +maintainer-clean-vti install-info uninstall-info mostlyclean-info \ +distclean-info clean-info maintainer-clean-info tags distdir info dvi \ +check installcheck install-exec install-data install uninstall all \ +installdirs mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + +.SUFFIXES: +.SUFFIXES: .texi .info .dvi + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/id-utils.info b/doc/id-utils.info new file mode 100644 index 0000000..dcc76e8 --- /dev/null +++ b/doc/id-utils.info @@ -0,0 +1,1246 @@ +This is Info file ../../doc/id-utils.info, produced by Makeinfo-1.63 +from the input file ../../doc/id-utils.texi. + +START-INFO-DIR-ENTRY +* ID database: (id). Identifier database utilities. +* aid: (id)aid invocation. Matching strings. +* eid: (id)eid invocation. Invoking an editor on matches. +* fid: (id)fid invocation. Listing a file's identifiers. +* gid: (id)gid invocation. Listing all matching lines. +* idx: (id)idx invocation. Testing mkid scanners. +* lid: (id)lid invocation. Matching patterns. +* mkid: (id)mkid invocation. Creating an ID database. +* pid: (id)pid invocation. Looking up filenames. +END-INFO-DIR-ENTRY + + This file documents the `mkid' identifier database utilities. + + Copyright (C) 1991, 1995 Tom Horsley. + + Permission is granted to make and distribute verbatim copies of this +manual provided the copyright notice and this permission notice are +preserved on all copies. + + Permission is granted to copy and distribute modified versions of +this manual under the conditions for verbatim copying, provided that +the entire resulting derived work is distributed under the terms of a +permission notice identical to this one. + + Permission is granted to copy and distribute translations of this +manual into another language, under the above conditions for modified +versions, except that this permission notice may be stated in a +translation. + + +File: id-utils.info, Node: Top, Next: Introduction, Up: (dir) + +ID database utilities +********************* + + This manual documents version 3.0.9 of the ID database utilities. + +* Menu: + +* Introduction:: Overview of the tools, and authors. +* mkid invocation:: Creating an ID database. +* Common query arguments:: Common lookup options and search patterns. +* gid invocation:: Listing all matching lines. +* Looking up identifiers:: lid, aid, eid, and fid. +* pid invocation:: Looking up filenames. +* Index:: General index. + + +File: id-utils.info, Node: Introduction, Next: mkid invocation, Prev: Top, Up: Top + +Introduction +************ + + An "ID database" is a binary file containing a list of filenames, a +list of identifiers, and a matrix indicating which identifiers appear in +which files. With this database and some tools to manipulate it +(described in this manual), a host of tasks become simpler and faster. +For example, you can list all files containing a particular `#include' +throughout a huge source hierarchy, search for all the memos containing +references to a project, or automatically invoke an editor on all files +containing references to some function. Anyone with a large software +project to maintain, or a large set of text files to organize, can +benefit from an ID database. + + Although the ID utilities are most commonly used with identifiers, +numeric constants are also stored in the database, and can be searched +for in the same way (independent of radix, if desired). + + There are a number of programs in the ID family: + +`mkid' + scans files for identifiers and numeric constants and builds the ID + database file. + +`gid' + lists all lines that match given patterns. + +`lid' + lists the filenames containing identifiers that match given + patterns. + +`aid' + lists the filenames containing identifiers that contain given + strings, independent of case. + +`eid' + invokes an editor on each file containing identifiers that match + given patterns. + +`fid' + lists all identifiers recorded in the database for given files, or + identifiers common to two files. + +`pid' + matches the filenames in the database, rather than the identifiers. + +`idx' + helps with testing of new `mkid' scanners. + + Please report bugs to `gkm@magilla.cichlid.com'. Remember to +include the version number, machine architecture, input files, and any +other information needed to reproduce the bug: your input, what you +expected, what you got, and why it is wrong. Diffs are welcome, but +please include a description of the problem as well, since this is +sometimes difficult to infer. *Note Bugs: (gcc)Bugs. + +* Menu: + +* Past and future:: How the ID tools came about, and where they're going. + + +File: id-utils.info, Node: Past and future, Up: Introduction + +Past and future +=============== + + Greg McGary conceived of the ideas behind mkid when he began hacking +the Unix kernel in 1984. He needed a navigation tool to help him find +his way around the expansive, unfamiliar landscape. The first +`mkid'-like tools were shell scripts, and produced an ASCII database +that looks much like the output of `lid' with no arguments. It took +over an hour on a VAX 11/750 to build a database for a 4.1BSD-ish +kernel. Lookups were done with the system utility `look', modified to +handle very long lines. + + In 1986, Greg rewrote `mkid', `lid', `fid' and `idx' in C to improve +performance. Database-build times were shortened by an order of +magnitude. The `mkid' tools were first posted to `comp.sources.unix' +in September 1987. + + Over the next few years, several versions diverged from the original +source. Tom Horsley at Harris Computer Systems Division stepped forward +to take over maintenance and integrated some of the fixes from divergent +versions. A first release of `mkid' version 2 was posted to +`alt.sources' near the end of 1990. At that time, Tom wrote this +Texinfo manual with the encouragement the net community. (Tom +especially thanks Doug Scofield and Bill Leonard whom he dragooned into +helping poorfraed and edit--they found several problems in the initial +version.) Karl Berry revamped the manual for Texinfo style, indexing, +and organization in 1995. + + In January 1995, Greg McGary reemerged as the primary maintaner and +launched development of `mkid' version 3, whose primary new feature is +an efficient algorithm for building databases that is linear in both +time and space over the size of the input text. (The old algorithm was +quadratic in space and therefore choked on very large source trees.) +The code is released under the GNU Public License, and might become a +part of the GNU system. `mkid' 3 is an interim release, since several +significant enhancements are still in the works: an optional coupling +with GNU `grep', so that `grep' can use an ID database for hints; a +`cscope' work-alike query interface; incremental update of the ID +database; and an automatic file-tree walker so you need not explicitly +supply every filename argument to the `mkid' program. + + +File: id-utils.info, Node: mkid invocation, Next: Common query arguments, Prev: Introduction, Up: Top + +`mkid': Creating ID databases +***************************** + + The `mkid' program builds an ID database. To do this, it must scan +each file you tell it to include in the database. This takes some time, +but once the work is done the query programs run very rapidly. (You can +run `mkid' as a `cron' job to regularly update your databases.) + + The `mkid' program knows how to extract identifiers from various +types of files. For example, it can recognize and skip over comments +and string constants in a C program. + + Identifiers are not the only thing included in the database. Numbers +are also recognized and included in the database indexed by their binary +value. This feature allows you to find uses of constants without regard +to the radix used to specify them, since the same number can frequently +be written in many different ways (for instance, `47', `0x2f', `057' in +C). + + All the places in this document which mention identifiers should +really mention both identifiers and numbers, but that gets fairly +clumsy after a while, so you just need to keep in mind that numbers are +included in the database as well as identifiers. + + The ID files that `mkid' creates are architecture- and +byte-order-independent; you can share them at will across systems. + +* Menu: + +* mkid options:: Command-line options to mkid. +* Scanners:: Built-in and defining your own. +* mkid examples:: Examples of mkid usage. + + +File: id-utils.info, Node: mkid options, Next: Scanners, Up: mkid invocation + +`mkid' options +============== + + By default, `mkid' scans the files you specify and writes the +database to a file named `ID' in the current directory. + + mkid [-v] [-SSCANARG] [-aARGFILE] [-] [-fIDFILE] FILES... + + The program accepts the following options. + +`-v' + Verbose. `mkid' tells you as it scans each file and indicates + which scanner it is using. It also summarizes some statistics + about the database at the end. + +`-SSCANARG' + Specify options regarding `mkid''s scanners. *Note Scanner option + formats::. + +`-aARGFILE' + Read additional command line arguments from ARGFILE. This is + typically used to specify lists of filenames longer than will fit + on a command line; some systems have severe limitations on the + total length of a command line. + +`-' + Read additional command line arguments from standard input. + +`-fIDFILE' + Write the database to the file IDFILE, instead of `ID'. The + database stores filenames relative to the directory containing the + database, so if you move the database to a different directory + after creating it, you may have trouble finding files. + + The remaining arguments FILES are the files to be scanned and +included in the database. If no files are given at all (either on +command line or via `-a' or `-'), `mkid' does nothing. + + +File: id-utils.info, Node: Scanners, Next: mkid examples, Prev: mkid options, Up: mkid invocation + +Scanners +======== + + To determine which identifiers to extract from a file and store in +the database, `mkid' calls a "scanner"; we say a scanner "recognizes" a +particular language. Scanners for several languages are built-in to +`mkid'; you can add your own scanners as well, as explained in the +sections below. + + `mkid' determines which scanner to use for a particular file by +looking at the suffix of the filename. This "suffix" is everything +after and including the last `.' in a filename; for example, the suffix +of `foo.c' is `.c'. `mkid' has a built-in list of bindings from some +suffixes to corresponding scanners; for example, `.c' files are (not +surprisingly) scanned by the predefined C language scanner. + + If `mkid' cannot determine what scanner to use for a particular +file, either because the file has no suffix (e.g., `foo') or because +`mkid' has no binding for the file's suffix (e.g., `foo.bar'), it uses +the scanner bound to the `.default' suffix. By default, this is the +plain text scanner (*note Plain text scanner::.), but you can change +this with the `-S' option, as explained below. + +* Menu: + +* Scanner option formats:: Overview of the -S option. +* Predefined scanners:: The C, plain text, and assembler scanners. +* Defining new scanners:: Either in source code or at runtime with -S. +* idx invocation:: Testing mkid scanners. + + +File: id-utils.info, Node: Scanner option formats, Next: Predefined scanners, Up: Scanners + +Scanner option formats +---------------------- + + With the `-S' option, you can change which language scanner to use +for which files, give language-specific options, and get some limited +online help about scanner options. + + Here are the different forms of the `-S' option: + +`-S.SUFFIX=SCANNER' + Use SCANNER for a file with the given `.SUFFIX'. For example, + `-S.yacc=c' tells `mkid' to use the `c' language scanner for all + files ending in `.yacc'. + +`-S.SUFFIX=?' + Display which scanner is used for the given `.SUFFIX'. + +`-S?=SCANNER' + Display which suffixes SCANNER is used for. + +`-S?=?' + Display the scanner binding for every known suffix. + +`-SSCANNER+ARG' +`-SSCANNER-ARG' + Each scanner accepts certain scanner-dependent arguments. These + options all have one of these forms. *Note Predefined scanners::. + +`-SSCANNER?' + Display the scanner-specific options accepted by SCANNER. + +`-SNEW-SCANNER/OLD-SCANNER/FILTER-COMMAND' + Define NEW-SCANNER in terms of OLD-SCANNER and FILTER-COMMAND. + *Note Defining scanners with options::. + + +File: id-utils.info, Node: Predefined scanners, Next: Defining new scanners, Prev: Scanner option formats, Up: Scanners + +Predefined scanners +------------------- + + `mkid' has built-in scanners for several types of languages; you can +get the list by running `mkid -S?=?'. The supported languages are +documented below(1). + +* Menu: + +* C scanner:: For the C programming language. +* Plain text scanner:: For documents or other non-source code. +* Assembler scanner:: For assembly language. + + ---------- Footnotes ---------- + + (1) This is not strictly true: `vhil' is a supported language, but +it is an obsolete and arcane dialect of C and should be ignored. + + +File: id-utils.info, Node: C scanner, Next: Plain text scanner, Up: Predefined scanners + +C scanner +......... + + The C scanner is the most commonly used. Files with the usual `.c' +and `.h' suffixes, and the `.y' (yacc) and `.l' (lex) suffixes, are +processed with this scanner (by default). + + Scanner-specific options: + +`-Sc-sCHARACTER' + Allow the specified CHARACTER in identifiers. For example, if you + use `$' in identifiers, you'll want to use `-Sc-s$'. + +`-Sc+u' + Strip leading underscores from identifiers. You might to do this in + peculiar circumstances, such as trying to parse the output from + `nm' or some other system utility. + +`-Sc-u' + Don't strip leading underscores from identifiers; this is the + default. + + +File: id-utils.info, Node: Plain text scanner, Next: Assembler scanner, Prev: C scanner, Up: Predefined scanners + +Plain text scanner +.................. + + The plain text scanner is intended for scanning most non-source-code +files. This is typically the scanner used when adding custom scanners +via `-S' (*note Defining scanners with options::.). + + Scanner-specific options: + +`-Stext+aCHARACTER' + Include CHARACTER in identifiers. By default, letters (a-z and + A-Z) and underscore are included. + +`-Stext-aCHARACTER' + Exclude CHARACTER from identifiers. + +`-Stext+sCHARACTER' + Squeeze CHARACTER from identifiers, i.e., do not terminate an + identifier when CHARACTER is seen. By default, the characters + `'', `-', and `.' are squeezed out of identifiers. For example, + the input `fred's' leads to the identifier `freds'. + +`-Stext-sCHARACTER' + Do not squeeze CHARACTER. + + +File: id-utils.info, Node: Assembler scanner, Prev: Plain text scanner, Up: Predefined scanners + +Assembler scanner +................. + + Since assembly languages come in several flavors, this scanner has a +number of options: + +`-Sasm-cCHARACTER' + Define CHARACTER as starting a comment that extends to the end of + the input line; no default. In many assemblers this is `;' or `#'. + +`-Sasm+u' +`-Sasm-u' + Strip (`+u') or do not strip (`-u') leading underscores from + identifiers. The default is to strip them. + +`-Sasm+aCHARACTER' + Allow CHARACTER in identifiers. + +`-Sasm-aCHARACTER' + Allow CHARACTER in identifiers, but if an identifier contains + CHARACTER, ignore it. This is useful to ignore temporary labels, + which can be generated in great profusion; these often contain `.' + or `@'. + +`-Sasm+p' +`-Sasm-p' + Recognize (`+p') or do not recognize (`-p') C preprocessor + directives in assembler source. The default is to recognize them. + +`-Sasm+C' +`-Sasm-C' + Skip over (`+C') or do not skip over (`-C') C style comments in + assembler source. The default is to skip them. + + +File: id-utils.info, Node: Defining new scanners, Next: idx invocation, Prev: Predefined scanners, Up: Scanners + +Defining new scanners +--------------------- + + You can add new scanners to `mkid' in two ways: modify the source +code and recompile, or at runtime via the `-S' option. Each has their +advantages and disadvantages, as explained below. + + If you create a new scanner that would be of use to others, please +consider sending it back to the maintainer, `gkm@magilla.cichlid.com', +for inclusion in future releases of `mkid'. + +* Menu: + +* Defining scanners in source code:: +* Defining scanners with options:: + + +File: id-utils.info, Node: Defining scanners in source code, Next: Defining scanners with options, Up: Defining new scanners + +Defining scanners in source code +................................ + + To add a new scanner in source code, you should add a new section to +the file `scanners.c'. Copy one of the existing scanners (most likely +either C or plain text), and modify as necessary. Also add the new +scanner to the `languages_0' and `suffixes_0' tables near the beginning +of the file. + + This is not a terribly difficult programming task, but it requires +recompiling and installing the new version of `mkid', which may be +inconvenient. + + This method leads to scanners which operate much more quickly than +ones that depend on external programmers. It is also likely the +easiest way to define scanners for new programming languages. + + +File: id-utils.info, Node: Defining scanners with options, Prev: Defining scanners in source code, Up: Defining new scanners + +Defining scanners with options +.............................. + + You can use the `-S' option on the command line to define a new +language scanner: + + -SNEW-SCANNER/EXISTING-SCANNER/FILTER + +Here, NEW-SCANNER is the name of the new scanner being defined, +EXISTING-SCANNER is the name of an existing scanner, and FILTER is a +shell command or pipeline. + + The new scanner works by passing the input file to FILTER, and then +arranging for the result to be passed through EXISTING-SCANNER. +Typically, EXISTING-SCANNER is `text'. + + Somewhere within FILTER, the string`%s' should occur. This `%s' is +replaced by the name of the source file being scanned. + + For example, `mkid' has no built-in scanner for Texinfo files (like +this one). In indexing a Texinfo file, you most likely would want to +ignore the Texinfo @-commands. Here's one way to specify a new scanner +to do this: + + -S/texinfo/text/sed s,@[a-z]*,,g %s + + This defines a new language scanner (`texinfo') defined in terms of +a `sed' command to strip out Texinfo directives (an `@' character +followed by letters). Once the directives are stripped, the remaining +text is run through the plain text scanner. + + This is a minimal example; to do a complete job, you would need to +completely delete some lines, such as those beginning with `@end' or +@node. + + +File: id-utils.info, Node: idx invocation, Prev: Defining new scanners, Up: Scanners + +`idx': Testing `mkid' scanners +------------------------------ + + `idx' prints the identifiers found in the files you specify to +standard output. This is useful in debugging new `mkid' scanners (*note +Scanners::.). Synopsis: + + idx [-SSCANARG] FILES... + + `idx' accepts the same `-S' options as `mkid'. *Note Scanner option +formats::. + + The name "idx" stands for "ID eXtract". The name may change in +future releases, since this is such an infrequently used program. + + +File: id-utils.info, Node: mkid examples, Prev: Scanners, Up: mkid invocation + +`mkid' examples +=============== + + The simplest example of `mkid' is something like: + + mkid *.[chy] + + This will build an ID database indexing identifiers and numbers in +the all the `.c', `.h', and `.y' files in the current directory. +Because `mkid' already knows how to scan files with those suffixes, no +additional options are needed. + + Here's a more complex example. Suppose you want to build a database +indexing the contents of all the `man' pages, and furthur suppose that +your system is using `gzip' (*note Top: (gzip)Top.) to store compressed +`cat' versions of the `man' pages in the directory `/usr/catman'. The +`gzip' program creates files with a `.gz' suffix, so you must tell +`mkid' how to scan `.gz' files. Here are the commands to do the job: + + cd /usr/catman + find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - + +Explanation: + + 1. We first `cd' to `/usr/catman' so the ID database will store the + correct relative filenames. + + 2. The `find' command prints the names of all `.gz' files under the + current directory. *Note find invocation: (sh-utils)find + invocation. + + 3. This list is piped to `mkid'; the `-' option (at the end of the + line) tells `mkid' to read arguments (in this case, as is typical, + the list of filenames) from standard input. *Note mkid options::. + + 4. The `-Sman/text/gzip ...' defines a new language `man' in terms of + the `gzip' program and `mkid''s existing text scanner. *Note + Defining scanners with options::. + + 5. The `-S.gz=man' tells `mkid' to treat all `.gz' files as this new + language `man'. *Note Scanner option formats::. + + + As a further complication, `cat' pages typically contain underlining +and backspace sequences, which will confuse `mkid'. To handle this, +the `gzip' command becomes a pipeline, like this: + + mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - + + +File: id-utils.info, Node: Common query arguments, Next: gid invocation, Prev: mkid invocation, Up: Top + +Common query arguments +********************** + + Certain options, and regular expression syntax, are shared by the ID +query tools. So we describe those things in the sections below, instead +of repeating the description for each tool. + +* Menu: + +* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. +* Patterns:: Regular expression syntax for searches. +* Examples: Query examples. Some common uses. + + +File: id-utils.info, Node: Query options, Next: Patterns, Up: Common query arguments + +Query options +============= + + The ID query tools (*not* `mkid') share certain command line +options. Not all of these options are recognized by all programs, but +if an option is used by more than one program, it is described below. +The description of each program gives the options that program uses. + +`-fIDFILE' + Read the database from IDFILE, in the current directory or in any + directory above the current directory. The default database name + is `ID'. Searching parent directories lets you have a single ID + database at the root of a large source tree and then use the query + tools from anywhere within that tree. + +`-rDIRECTORY' + Find files relative to DIRECTORY, instead of the directory in + which the ID database was found. This is useful if the ID + database was moved after its creation. + +`-c' + Equivalent to `-r`pwd`', i.e., find files relative to the current + directory, instead of the directory in which the ID database was + found. + +`-e' +`-w' + `-e' forces pattern arguments to be treated as regular expressions, + and `-w' forces pattern arguments to be treated as constant + strings. By default, the query tools guess whether a pattern is + regular expressions or constant strings by looking for special + characters. *Note Patterns::. + +`-k' +`-g' + `-k' suppresses use of shell brace notation in the output. By + default, the query tools that generate lists of filenames attempt + to compress the lists using the usual shell brace notation, e.g., + `{foo,bar}.c' to mean `foo.c' and `bar.c'. (This is useful if you + use `ksh' or the original (not GNU) `sh' and want to feed the list + of names to another command, since those shells do not support + this brace notation; the name of the `-k' option comes from the + `k' in `ksh'). + + `-g' turns on use of brace notation; this is only needed if the + query tools were compiled with `-k' as the default behavior. + +`-n' + Suppress the matching identifier before each list of filenames + that the query tools output by default. This is useful if you want + a list of just the names to feed to another command. + +`-d' +`-o' +`-x' +`-a' + These options may be used in any combination to specify the radix + of numeric matches. `-d' allows matching on decimal numbers, `-o' + on octal numbers, and `-x' on hexadecimal numbers. The `-a' + option is equivalent to specifying all three; this is the default. + Any combination of these options may be used. + +`-m' + Merge multiple lines of output into a single line. If your query + matches more than one identifier, the default is to generate a + separate line of output for each matching identifier. + +`-F-' +`-FN' +`-F-M' +`-FN-M' + Show identifiers matching at least N and at most M times. `-F-' + is equivalent to `-F1', i.e., find identifiers that appear only + once in the database. (This is useful to locate identifiers that + are defined but never used, or used once and never defined.) + +`-uNUMBER' + List identifiers that conflict in the first NUMBER characters. + This could be in useful porting programs to brain-dead computers + that refuse to support long identifiers, but your best long term + option is to set such computers on fire. + + +File: id-utils.info, Node: Patterns, Next: Query examples, Prev: Query options, Up: Common query arguments + +Patterns +======== + + "Patterns", also called "regular expressions", allow you to match +many different identifiers in a single query. + + The same regular expression syntax is recognized by all the query +tools that handle regular expressions. The exact syntax depends on how +the ID tools were compiled, but the following constructs should always +be supported: + +`.' + Match any single character. + +`[CHARS]' + Match any of the characters specified within the brackets. You can + match any characters *except* the ones in brackets by typing `^' + as the first character. A range of characters can be specified + using `-'. For example, `[abc]' and `[a-c]' both match `a', `b', + or `c', and `[^abc]' matches anything *except* `a', `b', or `c'. + +`*' + Match the previous construct zero or more times. + +`^' +`$' + `^' (`$') at the beginning (end) of a pattern anchors the match to + the first (last) character of the identifier. + + The query programs use either the `regex'/`regcmp' or +`re_comp'/`re_exec' functions, depending on which are available in the +library on your system. These do not always support the exact same +regular expression syntax, so consult your local `man' pages to find +out. + + +File: id-utils.info, Node: Query examples, Prev: Patterns, Up: Common query arguments + +Query examples +============== + + Here are some examples of the options described in the previous +sections. + + To restrict searches to exact matches, use `^...$'. For example: + + prompt$ gid '^FILE$' + ansi2knr.c:144: { FILE *in, *out; + ansi2knr.c:315: FILE *out; + fid.c:38: FILE *id_FILE; + filenames.c:576: FILE * + ... + + To show identifiers not unique in the first 16 characters: + + prompt$ lid -u16 + RE_CONTEXT_INDEP_ANCHORS regex.c + RE_CONTEXT_INDEP_OPS regex.c + RE_SYNTAX_POSIX_BASIC regex.c + RE_SYNTAX_POSIX_EXTENDED regex.c + ... + + Numbers are searched for numerically rather than textually. For +example: + + prompt$ lid 0xff + 0377 {lid,regex}.c + 0xff {bitops,fid,lid,mkid}.c + 255 regex.c + + On the other hand, you can restrict a numeric search to a particular +radix if you want: + + laurie$ lid -x 0xff + 0xff {bitops,fid,lid,mkid}.c + + Filenames in the output are always adjusted to be correct for the +correct working directory. For example: + + prompt$ lid bdevsw + bdevsw sys/conf.h cf/conf.c io/bio.c os/{fio,main,prf,sys3}.c + prompt$ cd io + prompt$ lid bdevsw + bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/{fio,main,prf,sys3}.c + + +File: id-utils.info, Node: gid invocation, Next: Looking up identifiers, Prev: Common query arguments, Up: Top + +`gid': Listing matching lines +***************************** + + Synopsis: + + gid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN...] + + `gid' finds the identifiers in the database that match the specified +PATTERNs, then searches for all occurrences of those identifiers, in +only the files containing matches. In a large source tree, this saves +an enormous amount of time (compared to searching every source file). + + With no PATTERN arguments, `gid' prints every line of every source +file. + + The name "gid" stands for "grep for identifiers", `grep' being the +standard utility to search regular files. + + *Note Common query arguments::, for a description of the command-line +options and PATTERN arguments. + + `gid' uses the standard GNU output format for identifying source +lines: + + FILENAME:LINENUM: TEXT + + Here is an example: + + prompt$ gid FILE + ansi2knr.c:144: { FILE *in, *out; + ansi2knr.c:315: FILE *out; + fid.c:38: FILE *id_FILE; + ... + +* Menu: + +* GNU Emacs gid interface:: Using next-error with gid. + + +File: id-utils.info, Node: GNU Emacs gid interface, Up: gid invocation + +GNU Emacs `gid' interface +========================= + + The `mkid' source distribution comes with a file `gid.el', which +defines a GNU Emacs interface to `gid'. To install it, put `gid.el' +somewhere that Emacs will find it (i.e., in your `load-path') and put + + (autoload 'gid "gid" nil t) + +in one of Emacs' initialization files, e.g., `~/.emacs'. You will then +be able to use `M-x gid' to run the command. + + The `gid' function prompts you with the word around point. If you +want to search for something else, simply delete the line and type the +pattern of interest. + + The function then runs the `gid' program in a `*compilation*' +buffer, so the normal `next-error' function can be used to visit all +the places the identifier is found (*note Compilation: +(emacs)Compilation.). + + +File: id-utils.info, Node: Looking up identifiers, Next: pid invocation, Prev: gid invocation, Up: Top + +Looking up identifiers +********************** + + These commands look up identifiers in the ID database and operate on +the files containing matches. + +* Menu: + +* lid invocation:: Matching patterns. +* aid invocation:: Matching strings. +* eid invocation:: Invoking an editor on matches. +* fid invocation:: Listing a file's identifiers. + + +File: id-utils.info, Node: lid invocation, Next: aid invocation, Up: Looking up identifiers + +`lid': Matching patterns +======================== + + Synopsis: + + lid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] PATTERN... + + `lid' searches the database for identifiers matching the given +PATTERN arguments and prints the names of the files that match each +PATTERN. With no PATTERNs, `lid' lists every entry in the database. + + The name "lid" stands for "lookup identifier". + + *Note Common query arguments::, for a description of the command-line +options and PATTERN arguments. + + By default, each line of output consists of an identifier and all the +files containing that identifier. + + Here is an example showing a search for a single identifier (omitting +some output to keep lines short): + + prompt$ lid FILE + FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c + + This example shows a regular expression search: + + prompt$ lid 'FILE$' + AF_FILE mkid.c + AF_IDFILE mkid.c + FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c + IDFILE id.h {fid,lid,mkid}.c + IdFILE {fid,lid}.c + ... + +As you can see, when a regular expression is used, it is possible to +get more than one line of output. To merge multiple lines into one, +use `-m': + + prompt$ lid -m ^get + ^get extern.h {bitsvec,fid,gets0,getsFF,getscan,idx,lid,...}.c + + +File: id-utils.info, Node: aid invocation, Next: eid invocation, Prev: lid invocation, Up: Looking up identifiers + +`aid': Matching strings +======================= + + Synopsis: + + aid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] STRING... + + `aid' searches the database for identifiers containing the given +STRING arguments. The search is case-insensitive. + + The name "aid" stands for "apropos identifier", `apropros' being a +command that does a similar search of the `whatis' database of `man' +descriptions. + + For example, `aid get' matches the identifiers `fgets', `GETLINE', +and `getchar'. + + The default output format is the same as `lid'; see the previous +section. + + *Note Common query arguments::, for a description of the command-line +options and PATTERN arguments. + + +File: id-utils.info, Node: eid invocation, Next: fid invocation, Prev: aid invocation, Up: Looking up identifiers + +`eid': Invoking an editor on matches +==================================== + + Synopsis: + + eid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN]... + + `eid' runs the usual search (*note lid invocation::.) on the given +arguments, shows you the output, and then asks: + + Edit? [y1-9^S/nq] + +You can respond with: + +`y' + Edit all files listed. + +`1...9' + Edit all files starting at the N + 1'st file. + +`/STRING or `CTRL-S'STRING' + Edit all files whose name contains STRING. + +`n' + Go on to the next PATTERN, i.e., edit no files for this one. + +`q' + Quit `eid'. + + `eid' invokes an editor once per PATTERN; all the specified files +are given to the editor for you to edit simultaneously. + + `eid' invokes the editor defined by the `EDITOR' environment +variable. If the editor can accept an initial search argument on the +command line, `eid' moves automatically to the location of the match, +via the environment variables below. + + *Note Common query arguments::, for a description of the command-line +options and PATTERN arguments. + + Here are the environment variables relevant to `eid': + +`EDITOR' + The name of the editor program to invoke. + +`EIDARG' + The argument to pass to the editor to search for the matching + identifier. For `vi', this should be `+/%s/''. + +`EIDLDEL' + A regular expression to force a match at the beginning of a word + ("left delimiter). `eid' inserts this in front of the matching + identifier when composing the search argument. For `vi', this + should be `\<'. + +`EIDRDEL' + The end-of-word regular expression. For `vi', this should be `\>'. + + For Emacs users, the interface in `gid.el' is probably preferable to +`eid'. *Note GNU Emacs gid interface::. + + Here is an example: + + prompt$ eid FILE \^print + FILE {ansi2knr,fid,filenames,idfile,idx,lid,misc,...}.c + Edit? [y1-9^S/nq] n + ^print {ansi2knr,fid,getopt,getopt1,lid,mkid,regex,scanners}.c + Edit? [y1-9^S/nq] 2 + +This will start editing at `getopt'.c. + + +File: id-utils.info, Node: fid invocation, Prev: eid invocation, Up: Looking up identifiers + +`fid': Listing a file's identifiers +=================================== + + `fid' lists the identifiers found in a given file. Synopsis: + + fid [-fDBFILE] FILE1 [FILE2] + +`-fDBFILE' + Read the database from DBFILE instead of `ID'. + +`FILE1' + List all the identifiers contained in FILE1. + +`FILE2' + With a second file argument, list only the identifiers both files + have in common. + + The output is simply one identifier (or number) per line. + + +File: id-utils.info, Node: pid invocation, Next: Index, Prev: Looking up identifiers, Up: Top + +`pid': Looking up filenames +*************************** + + `pid' matches the filenames stored in the ID database, rather than +the identifiers. Synopsis: + + pid [-fDBFILE] [-rDIR] [-ebkgnc] WILDCARD... + + By default, the WILDCARD patterns are treated as shell globbing +patterns, rather than the regular expressions the other utilities +accept. See the section below for details. + + Besides the standard options given in the synopsis (*note Query +options::.), `pid' accepts the following: + +`-e' + Do the usual regular expression matching (*note Patterns::.), + instead of shell wildcard matching. + +`-b' + Match the basenames of the files in the database. For example, + `pid -b foo' will match the stored filename `dir/foo', but not + `foo/file'. + + For example, the command: + + pid \*.c + +lists all the `.c' files in the database. (The `\' here protects the +`*' from being expanded by the shell.) + +* Menu: + +* Wildcard patterns:: Shell-style globbing patterns. + + +File: id-utils.info, Node: Wildcard patterns, Up: pid invocation + +Wildcard patterns +================= + + `pid' does simplified shell wildcard matching (unless the `-e' +option is specified), rather than the regular expression matching done +by the other utilities. Here is a description of wildcard matching, +also called "globbing": + + * `*' matches zero or more characters. + + * `?' matches any single character. + + * `\' forces the next character to be taken literally. + + * `[CHARS]' matches any single character listed in CHARS. + + * `[!CHARS]' matches any character *not* listed in CHARS. + + Most shells treat `/' and leading `.' characters specially. `pid' +does not do this. It simply matches the filename in the database +against the wildcard pattern. + + +File: id-utils.info, Node: Index, Prev: pid invocation, Up: Top + +Index +***** + +* Menu: + +* $ in identifiers: C scanner. +* * in globbing: Wildcard patterns. +* *scratch* Emacs buffer: GNU Emacs gid interface. +* -: mkid options. +* -a: Query options. +* -aARGFILE: mkid options. +* -b: pid invocation. +* -c: Query options. +* -d: Query options. +* -e <1>: pid invocation. +* -e: Query options. +* -F: Query options. +* -fIDFILE: Query options. +* -g: Query options. +* -k: Query options. +* -m: Query options. +* -n: Query options. +* -o: Query options. +* -rDIRECTORY: Query options. +* -S scanner option: Scanner option formats. +* -S.: Scanner option formats. +* -S?: Scanner option formats. +* -SSCANARG: mkid options. +* -Sasm+a: Assembler scanner. +* -Sasm+C: Assembler scanner. +* -Sasm+p: Assembler scanner. +* -Sasm+u: Assembler scanner. +* -Sasm-c: Assembler scanner. +* -Sc+u: C scanner. +* -Sc-s: C scanner. +* -Sc-u: C scanner. +* -Stext+a: Plain text scanner. +* -Stext+s: Plain text scanner. +* -Stext-a: Plain text scanner. +* -u: Query options. +* -v: mkid options. +* -w: Query options. +* -x: Query options. +* .[chly] files, scanning: C scanner. +* .default scanner: Scanners. +* ? in globbing: Wildcard patterns. +* [!...] in globbing: Wildcard patterns. +* [...] in globbing: Wildcard patterns. +* \ in globbing: Wildcard patterns. +* aid: aid invocation. +* architecture-independence: mkid invocation. +* assembler scanner: Assembler scanner. +* basename match: pid invocation. +* beginning-of-word editor argument: eid invocation. +* Berry, Karl: Past and future. +* brace notation in filename lists: Query options. +* bugs, reporting: Introduction. +* C scanner, predefined: C scanner. +* case-insensitive searching: aid invocation. +* comments in assembler: Assembler scanner. +* common query arguments: Common query arguments. +* common query options: Query options. +* compressed files, building ID from: mkid examples. +* conflicting identifiers, finding: Query options. +* constant strings, forcing evaluation as: Query options. +* creating databases: mkid invocation. +* cron: mkid invocation. +* cscope: Past and future. +* database name, specifying: Query options. +* databases, creating: mkid invocation. +* EDITOR: eid invocation. +* eid: eid invocation. +* EIDARG: eid invocation. +* EIDLDEL: eid invocation. +* EIDRDEL: eid invocation. +* Emacs interface to gid: GNU Emacs gid interface. +* end-of-word editor argument: eid invocation. +* examples of mkid: mkid examples. +* examples, queries: Query examples. +* fid: fid invocation. +* filenames, matching: pid invocation. +* future: Past and future. +* gid Emacs function: GNU Emacs gid interface. +* gid.el interface to Emacs: GNU Emacs gid interface. +* globbing patterns: Wildcard patterns. +* grep: Past and future. +* history: Past and future. +* Horsley, Tom: Past and future. +* ID database, definition of: Introduction. +* ID file format: mkid invocation. +* identifiers in a file: fid invocation. +* introduction: Introduction. +* languages_0: Defining scanners in source code. +* left delimiter editor argument: eid invocation. +* Leonard, Bill: Past and future. +* lid: lid invocation. +* load-path: GNU Emacs gid interface. +* look and mkid 1: Past and future. +* man pages, compressed: mkid examples. +* matching filenames: pid invocation. +* McGary, Greg: Past and future. +* mkid: mkid invocation. +* mkid options: mkid options. +* multiple lines, merging: Query options. +* numbers, in databases: mkid invocation. +* numeric matches, specifying radix of: Query options. +* numeric searches: Query examples. +* options for mkid: mkid options. +* overview: Introduction. +* parent directories, searched for ID: Query options. +* patterns: Patterns. +* pid: pid invocation. +* plain text scanner: Plain text scanner. +* predefined scanners: Predefined scanners. +* query examples: Query examples. +* query options, common: Query options. +* radix of numeric matches, specifying: Query options. +* regular expression syntax: Patterns. +* regular expressions, forcing evaluation as: Query options. +* right delimiter editor argument: eid invocation. +* scanner options: Scanner option formats. +* scanners: Scanners. +* scanners, adding new: Defining new scanners. +* scanners, defining in source code: Defining scanners in source code. +* scanners, defining with options: Defining scanners with options. +* scanners, predefined: Predefined scanners. +* scanners.c: Defining scanners in source code. +* Scofield, Doug: Past and future. +* search for identifier, initial: eid invocation. +* sharing ID files: mkid invocation. +* shell brace notation in filename lists: Query options. +* shell wildcard patterns: Wildcard patterns. +* single matches, showing: Query options. +* squeezing characters from identifiers: Plain text scanner. +* statistics: mkid options. +* string searching: aid invocation. +* strings, forcing evaluation as: Query options. +* suffixes of filenames: Scanners. +* suffixes_0: Defining scanners in source code. +* suppressing matching identifier: Query options. +* Texinfo, scanning example of: Defining scanners with options. +* whatis: aid invocation. +* wildcard wildcard patterns: Wildcard patterns. + + + +Tag Table: +Node: Top1540 +Node: Introduction2150 +Node: Past and future4367 +Node: mkid invocation6671 +Node: mkid options8241 +Node: Scanners9659 +Node: Scanner option formats11154 +Node: Predefined scanners12330 +Node: C scanner13033 +Node: Plain text scanner13788 +Node: Assembler scanner14699 +Node: Defining new scanners15828 +Node: Defining scanners in source code16451 +Node: Defining scanners with options17296 +Node: idx invocation18750 +Node: mkid examples19316 +Node: Common query arguments21295 +Node: Query options21843 +Node: Patterns25238 +Node: Query examples26578 +Node: gid invocation27965 +Node: GNU Emacs gid interface29132 +Node: Looking up identifiers29996 +Node: lid invocation30492 +Node: aid invocation31926 +Node: eid invocation32712 +Node: fid invocation34854 +Node: pid invocation35412 +Node: Wildcard patterns36510 +Node: Index37280 + +End Tag Table diff --git a/doc/id-utils.texi b/doc/id-utils.texi new file mode 100644 index 0000000..9cc7dd4 --- /dev/null +++ b/doc/id-utils.texi @@ -0,0 +1,1378 @@ +\input texinfo +@comment %**start of header +@setfilename id-utils.info +@settitle ID database utilities +@comment %**end of header + +@include version.texi + +@c Define new indices for filenames, commands and options. +@defcodeindex fl +@defcodeindex cm +@defcodeindex op + +@c Put everything in one index (arbitrarily chosen to be the concept index). +@syncodeindex fl cp +@syncodeindex fn cp +@syncodeindex ky cp +@syncodeindex op cp +@syncodeindex pg cp +@syncodeindex vr cp + +@ifinfo +@format +START-INFO-DIR-ENTRY +* ID database: (id). Identifier database utilities. +* aid: (id)aid invocation. Matching strings. +* eid: (id)eid invocation. Invoking an editor on matches. +* fid: (id)fid invocation. Listing a file's identifiers. +* gid: (id)gid invocation. Listing all matching lines. +* idx: (id)idx invocation. Testing mkid scanners. +* lid: (id)lid invocation. Matching patterns. +* mkid: (id)mkid invocation. Creating an ID database. +* pid: (id)pid invocation. Looking up filenames. +END-INFO-DIR-ENTRY +@end format +@end ifinfo + +@ifinfo +This file documents the @code{mkid} identifier database utilities. + +Copyright (C) 1991, 1995 Tom Horsley. + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +@ignore +Permission is granted to process this file through TeX and print the +results, provided the printed document carries copying permission +notice identical to this one except for the removal of this paragraph +(this paragraph not being relevant to the printed manual). + +@end ignore +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the entire +resulting derived work is distributed under the terms of a permission +notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that this permission notice may be stated in a translation. +@end ifinfo + +@titlepage +@title ID database utilities +@subtitle Programs for simple, fast, high-capacity cross-referencing +@subtitle for version @value{VERSION} +@author Tom Horsley +@author Greg McGary + +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 1991, 1995 Tom Horsley. + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the entire +resulting derived work is distributed under the terms of a permission +notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that this permission notice may be stated in a translation. +@end titlepage + + +@ifinfo +@node Top +@top ID database utilities + +This manual documents version @value{VERSION} of the ID database +utilities. + +@menu +* Introduction:: Overview of the tools, and authors. +* mkid invocation:: Creating an ID database. +* Common query arguments:: Common lookup options and search patterns. +* gid invocation:: Listing all matching lines. +* Looking up identifiers:: lid, aid, eid, and fid. +* pid invocation:: Looking up filenames. +* Index:: General index. +@end menu +@end ifinfo + + +@node Introduction +@chapter Introduction + +@cindex overview +@cindex introduction + +@cindex ID database, definition of +An @dfn{ID database} is a binary file containing a list of filenames, a +list of identifiers, and a matrix indicating which identifiers appear in +which files. With this database and some tools to manipulate it +(described in this manual), a host of tasks become simpler and faster. +For example, you can list all files containing a particular +@code{#include} throughout a huge source hierarchy, search for all the +memos containing references to a project, or automatically invoke an +editor on all files containing references to some function. Anyone with +a large software project to maintain, or a large set of text files to +organize, can benefit from an ID database. + +Although the ID utilities are most commonly used with identifiers, +numeric constants are also stored in the database, and can be searched +for in the same way (independent of radix, if desired). + +There are a number of programs in the ID family: + +@table @code + +@item mkid +scans files for identifiers and numeric constants and builds the ID +database file. + +@item gid +lists all lines that match given patterns. + +@item lid +lists the filenames containing identifiers that match given patterns. + +@item aid +lists the filenames containing identifiers that contain given strings, +independent of case. + +@item eid +invokes an editor on each file containing identifiers that match given +patterns. + +@item fid +lists all identifiers recorded in the database for given files, or +identifiers common to two files. + +@item pid +matches the filenames in the database, rather than the identifiers. + +@item idx +helps with testing of new @code{mkid} scanners. + +@end table + +@cindex bugs, reporting +Please report bugs to @samp{gkm@@magilla.cichlid.com}. Remember to +include the version number, machine architecture, input files, and any +other information needed to reproduce the bug: your input, what you +expected, what you got, and why it is wrong. Diffs are welcome, but +please include a description of the problem as well, since this is +sometimes difficult to infer. @xref{Bugs, , , gcc, GNU CC}. + +@menu +* Past and future:: How the ID tools came about, and where they're going. +@end menu + + +@node Past and future +@section Past and future + +@cindex history + +@pindex look @r{and @code{mkid} 1} +@cindex McGary, Greg +Greg McGary conceived of the ideas behind mkid when he began hacking the +Unix kernel in 1984. He needed a navigation tool to help him find his +way around the expansive, unfamiliar landscape. The first @code{mkid}-like +tools were shell scripts, and produced an ASCII database that looks much +like the output of @code{lid} with no arguments. It took over an hour +on a VAX 11/750 to build a database for a 4.1BSD-ish kernel. Lookups +were done with the system utility @code{look}, modified to handle very +long lines. + +In 1986, Greg rewrote @code{mkid}, @code{lid}, @code{fid} and @code{idx} +in C to improve performance. Database-build times were shortened by an +order of magnitude. The @code{mkid} tools were first posted to +@samp{comp.sources.unix} in September 1987. + +@cindex Horsley, Tom +@cindex Scofield, Doug +@cindex Leonard, Bill +@cindex Berry, Karl +Over the next few years, several versions diverged from the original +source. Tom Horsley at Harris Computer Systems Division stepped forward +to take over maintenance and integrated some of the fixes from divergent +versions. A first release of +@code{mkid} @w{version 2} was posted to @file{alt.sources} near the end +of 1990. At that time, Tom wrote this Texinfo manual with the +encouragement the net community. (Tom especially thanks Doug Scofield +and Bill Leonard whom he dragooned into helping poorfraed and +edit---they found several problems in the initial version.) Karl Berry +revamped the manual for Texinfo style, indexing, and organization in +1995. + +@pindex cscope +@pindex grep +@cindex future +In January 1995, Greg McGary reemerged as the primary maintaner and +launched development of @code{mkid} version 3, whose primary new feature +is an efficient algorithm for building databases that is linear in both +time and space over the size of the input text. (The old algorithm was +quadratic in space and therefore choked on very large source trees.) +The code is released under the GNU Public License, and might become a +part of the GNU system. @code{mkid} 3 is an interim release, since +several significant enhancements are still in the works: an optional +coupling with GNU @code{grep}, so that @code{grep} can use an ID +database for hints; a @code{cscope} work-alike query interface; +incremental update of the ID database; and an automatic file-tree walker +so you need not explicitly supply every filename argument to the +@code{mkid} program. + + +@node mkid invocation +@chapter @code{mkid}: Creating ID databases + +@pindex mkid +@cindex creating databases +@cindex databases, creating + +@pindex cron +The @code{mkid} program builds an ID database. To do this, it must scan +each file you tell it to include in the database. This takes some time, +but once the work is done the query programs run very rapidly. (You can +run @code{mkid} as a @code{cron} job to regularly update your +databases.) + +The @code{mkid} program knows how to extract identifiers from various +types of files. For example, it can recognize and skip over comments +and string constants in a C program. + +@cindex numbers, in databases +Identifiers are not the only thing included in the database. Numbers +are also recognized and included in the database indexed by their binary +value. This feature allows you to find uses of constants without regard +to the radix used to specify them, since the same number can frequently +be written in many different ways (for instance, @samp{47}, @samp{0x2f}, +@samp{057} in C). + +All the places in this document which mention identifiers should really +mention both identifiers and numbers, but that gets fairly clumsy after +a while, so you just need to keep in mind that numbers are included in +the database as well as identifiers. + +@cindex ID file format +@cindex architecture-independence +@cindex sharing ID files +The ID files that @code{mkid} creates are architecture- and +byte-order-independent; you can share them at will across systems. + +@menu +* mkid options:: Command-line options to mkid. +* Scanners:: Built-in and defining your own. +* mkid examples:: Examples of mkid usage. +@end menu + + +@node mkid options +@section @code{mkid} options + +@cindex options for @code{mkid} +@pindex mkid @r{options} + +By default, @code{mkid} scans the files you specify and writes the +database to a file named @file{ID} in the current directory. + +@example +mkid [-v] [-S@var{scanarg}] [-a@var{argfile}] [-] [-f@var{idfile}] @c +@var{files}@dots{} +@end example + +The program accepts the following options. + +@table @samp + +@item -v +@opindex -v +@cindex statistics +Verbose. @code{mkid} tells you as it scans each file and indicates +which scanner it is using. It also summarizes some statistics about the +database at the end. + +@item -S@var{scanarg} +@opindex -S@var{scanarg} +Specify options regarding @code{mkid}'s scanners. @xref{Scanner option +formats}. + +@item -a@var{argfile} +@opindex -a@var{argfile} +Read additional command line arguments from @var{argfile}. This is +typically used to specify lists of filenames longer than will fit on a +command line; some systems have severe limitations on the total length +of a command line. + +@item - +@opindex - +Read additional command line arguments from standard input. + +@item -f@var{idfile} +Write the database to the file @var{idfile}, instead of @file{ID}. The +database stores filenames relative to the directory containing the +database, so if you move the database to a different directory after +creating it, you may have trouble finding files. + +@c @item -u +@c @opindex -u +@c The @code{-u} option updates an existing database by rescanning any +@c files that have changed since the database was written. Unfortunately +@c you cannot incrementally add new files to a database. +@c Greg is reimplementing this ... + +@end table + +The remaining arguments @var{files} are the files to be scanned and +included in the database. If no files are given at all (either on +command line or via @samp{-a} or @samp{-}), @code{mkid} does nothing. + + +@node Scanners +@section Scanners + +@cindex scanners + +To determine which identifiers to extract from a file and store in the +database, @code{mkid} calls a @dfn{scanner}; we say a scanner +@dfn{recognizes} a particular language. Scanners for several languages +are built-in to @code{mkid}; you can add your own scanners as well, as +explained in the sections below. + +@cindex suffixes of filenames +@code{mkid} determines which scanner to use for a particular file by +looking at the suffix of the filename. This @dfn{suffix} is everything +after and including the last @samp{.} in a filename; for example, the +suffix of @file{foo.c} is @file{.c}. @code{mkid} has a built-in list of +bindings from some suffixes to corresponding scanners; for example, +@file{.c} files are (not surprisingly) scanned by the predefined C +language scanner. + +@findex .default @r{scanner} +If @code{mkid} cannot determine what scanner to use for a particular +file, either because the file has no suffix (e.g., @file{foo}) or +because @code{mkid} has no binding for the file's suffix (e.g., +@file{foo.bar}), it uses the scanner bound to the @samp{.default} +suffix. By default, this is the plain text scanner (@pxref{Plain text +scanner}), but you can change this with the @samp{-S} option, as +explained below. + +@menu +* Scanner option formats:: Overview of the -S option. +* Predefined scanners:: The C, plain text, and assembler scanners. +* Defining new scanners:: Either in source code or at runtime with -S. +* idx invocation:: Testing mkid scanners. +@end menu + + +@node Scanner option formats +@subsection Scanner option formats + +@cindex scanner options +@opindex -S @r{scanner option} + +With the @samp{-S} option, you can change which language scanner to use +for which files, give language-specific options, and get some limited +online help about scanner options. + +Here are the different forms of the @samp{-S} option: + +@table @samp + +@item -S.@var{suffix}=@var{scanner} +@opindex -S. +Use @var{scanner} for a file with the given @samp{.@var{suffix}}. For +example, @samp{-S.yacc=c} tells @code{mkid} to use the @samp{c} language +scanner for all files ending in @samp{.yacc}. + +@item -S.@var{suffix}=? +Display which scanner is used for the given @samp{.@var{suffix}}. + +@item -S?=@var{scanner} +@opindex -S? +Display which suffixes @var{scanner} is used for. + +@item -S?=? +Display the scanner binding for every known suffix. + +@item -S@var{scanner}+@var{arg} +@itemx -S@var{scanner}-@var{arg} +Each scanner accepts certain scanner-dependent arguments. These options +all have one of these forms. @xref{Predefined scanners}. + +@item -S@var{scanner}? +Display the scanner-specific options accepted by @var{scanner}. + +@item -S@var{new-scanner}/@var{old-scanner}/@var{filter-command} +Define @var{new-scanner} in terms of @var{old-scanner} and +@var{filter-command}. @xref{Defining scanners with options}. + +@end table + + +@node Predefined scanners +@subsection Predefined scanners + +@cindex predefined scanners +@cindex scanners, predefined + +@code{mkid} has built-in scanners for several types of languages; you +can get the list by running @code{mkid -S?=?}. +The supported languages are documented +below@footnote{This is not strictly true: @samp{vhil} is a supported +language, but it is an obsolete and arcane dialect of C and should be +ignored.}. + +@menu +* C scanner:: For the C programming language. +* Plain text scanner:: For documents or other non-source code. +* Assembler scanner:: For assembly language. +@end menu + + +@node C scanner +@subsubsection C scanner + +@cindex C scanner, predefined +@flindex .[chly] @r{files, scanning} + +The C scanner is the most commonly used. Files with the usual @file{.c} +and @file{.h} suffixes, and the @file{.y} (yacc) and @file{.l} (lex) +suffixes, are processed with this scanner (by default). + +Scanner-specific options: + +@table @samp + +@item -Sc-s@var{character} +@kindex $ @r{in identifiers} +@opindex -Sc-s +Allow the specified @var{character} in identifiers. For example, if you +use @samp{$} in identifiers, you'll want to use @samp{-Sc-s$}. + +@item -Sc+u +@opindex -Sc+u +Strip leading underscores from identifiers. You might to do this in +peculiar circumstances, such as trying to parse the output from +@code{nm} or some other system utility. + +@item -Sc-u +@opindex -Sc-u +Don't strip leading underscores from identifiers; this is the default. + +@end table + + +@node Plain text scanner +@subsubsection Plain text scanner + +@cindex plain text scanner + +The plain text scanner is intended for scanning most non-source-code +files. This is typically the scanner used when adding custom scanners +via @samp{-S} (@pxref{Defining scanners with options}). + +@c @code{mkid} predefines a troff scanner in terms of the plain text +@c scanner and +@c the @code{deroff} utility. +@c A compressed man page +@c scanner runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner +@c runs @code{detex}. + +Scanner-specific options: + +@table @samp + +@item -Stext+a@var{character} +@opindex -Stext+a +Include @var{character} in identifiers. By default, letters (a--z and +A--Z) and underscore are included. + +@item -Stext-a@var{character} +@opindex -Stext-a +Exclude @var{character} from identifiers. + +@item -Stext+s@var{character} +@opindex -Stext+s +@cindex squeezing characters from identifiers +Squeeze @var{character} from identifiers, i.e., do not terminate an +identifier when @var{character} is seen. By default, the characters +@samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers. For +example, the input @samp{fred's} leads to the identifier @samp{freds}. + +@item -Stext-s@var{character} +Do not squeeze @var{character}. + +@end table + + +@node Assembler scanner +@subsubsection Assembler scanner + +@cindex assembler scanner + +Since assembly languages come in several flavors, this scanner has a +number of options: + +@table @samp + +@item -Sasm-c@var{character} +@opindex -Sasm-c +@cindex comments in assembler +Define @var{character} as starting a comment that extends to the end of +the input line; no default. In many assemblers this is @samp{;} or +@samp{#}. + +@item -Sasm+u +@itemx -Sasm-u +@opindex -Sasm+u +Strip (@samp{+u}) or do not strip (@samp{-u}) leading underscores from +identifiers. The default is to strip them. + +@item -Sasm+a@var{character} +@opindex -Sasm+a +Allow @var{character} in identifiers. + +@item -Sasm-a@var{character} +Allow @var{character} in identifiers, but if an identifier contains +@var{character}, ignore it. This is useful to ignore temporary labels, +which can be generated in great profusion; these often contain @samp{.} +or @samp{@@}. + +@item -Sasm+p +@itemx -Sasm-p +@opindex -Sasm+p +Recognize (@samp{+p}) or do not recognize (@samp{-p}) C preprocessor +directives in assembler source. The default is to recognize them. + +@item -Sasm+C +@itemx -Sasm-C +@opindex -Sasm+C +Skip over (@samp{+C}) or do not skip over (@samp{-C}) C style comments +in assembler source. The default is to skip them. + +@end table + + +@node Defining new scanners +@subsection Defining new scanners + +@cindex scanners, adding new + +You can add new scanners to @code{mkid} in two ways: modify the source +code and recompile, or at runtime via the @samp{-S} option. Each has +their advantages and disadvantages, as explained below. + +If you create a new scanner that would be of use to others, please +consider sending it back to the maintainer, +@samp{gkm@@magilla.cichlid.com}, for inclusion in future releases of +@code{mkid}. + +@menu +* Defining scanners in source code:: +* Defining scanners with options:: +@end menu + + +@node Defining scanners in source code +@subsubsection Defining scanners in source code + +@flindex scanners.c +@cindex scanners, defining in source code + +@vindex languages_0 +@vindex suffixes_0 +To add a new scanner in source code, you should add a new section to the +file @file{scanners.c}. Copy one of the existing scanners (most likely +either C or plain text), and modify as necessary. Also add the new +scanner to the @code{languages_0} and @code{suffixes_0} tables near the +beginning of the file. + +This is not a terribly difficult programming task, but it requires +recompiling and installing the new version of @code{mkid}, which may be +inconvenient. + +This method leads to scanners which operate much more quickly than ones +that depend on external programmers. It is also likely the easiest way +to define scanners for new programming languages. + + +@node Defining scanners with options +@subsubsection Defining scanners with options + +@cindex scanners, defining with options + +You can use the @samp{-S} option on the command line to define a new +language scanner: + +@example +-S@var{new-scanner}/@var{existing-scanner}/@var{filter} +@end example + +@noindent +Here, @var{new-scanner} is the name of the new scanner being defined, +@var{existing-scanner} is the name of an existing scanner, and +@var{filter} is a shell command or pipeline. + +The new scanner works by passing the input file to @var{filter}, and +then arranging for the result to be passed through +@var{existing-scanner}. Typically, @var{existing-scanner} is @samp{text}. + +Somewhere within @var{filter}, the string@samp{%s} should occur. This +@samp{%s} is replaced by the name of the source file being scanned. + +@cindex Texinfo, scanning example of +For example, @code{mkid} has no built-in scanner for Texinfo files (like +this one). In indexing a Texinfo file, you most likely would want +to ignore the Texinfo @@-commands. Here's one way to specify a new +scanner to do this: + +@example +-S/texinfo/text/sed s,@@[a-z]*,,g %s +@end example + +This defines a new language scanner (@samp{texinfo}) defined in terms of +a @code{sed} command to strip out Texinfo directives (an @samp{@@} +character followed by letters). Once the directives are stripped, the +remaining text is run through the plain text scanner. + +This is a minimal example; to do a complete job, you would need to +completely delete some lines, such as those beginning with @code{@@end} +or @@node. + + +@node idx invocation +@subsection @code{idx}: Testing @code{mkid} scanners + +@code{idx} prints the identifiers found in the files you specify to +standard output. This is useful in debugging new @code{mkid} scanners +(@pxref{Scanners}). Synopsis: + +@example +idx [-S@var{scanarg}] @var{files}@dots{} +@end example + +@code{idx} accepts the same @samp{-S} options as @code{mkid}. +@xref{Scanner option formats}. + +The name ``idx'' stands for ``ID eXtract''. The name may change in +future releases, since this is such an infrequently used program. + + +@node mkid examples +@section @code{mkid} examples + +@cindex examples of @code{mkid} + +The simplest example of @code{mkid} is something like: + +@example +mkid *.[chy] +@end example + +This will build an ID database indexing identifiers and numbers in the +all the @file{.c}, @file{.h}, and @file{.y} files in the current +directory. Because @code{mkid} already knows how to scan files with +those suffixes, no additional options are needed. + +@cindex man pages, compressed +@cindex compressed files, building ID from +Here's a more complex example. Suppose you want to build a database +indexing the contents of all the @code{man} pages, and furthur suppose +that your system is using @code{gzip} (@pxref{Top, , , gzip, Gzip}) to +store compressed @code{cat} versions of the @code{man} pages in the +directory @file{/usr/catman}. The @code{gzip} program creates files +with a @code{.gz} suffix, so you must tell @code{mkid} how to scan +@file{.gz} files. Here are the commands to do the job: + +@example +cd /usr/catman +find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - +@end example + +@noindent Explanation: + +@enumerate + +@item +We first @code{cd} to @file{/usr/catman} so the ID database +will store the correct relative filenames. + +@item +The @code{find} command prints the names of all @file{.gz} files under +the current directory. @xref{find invocation, , , sh-utils, GNU shell +utilities}. + +@item +This list is piped to @code{mkid}; the @code{-} option (at the end of +the line) tells @code{mkid} to read arguments (in this case, as is +typical, the list of filenames) from standard input. @xref{mkid options}. + +@item +The @samp{-Sman/text/gzip @dots{}} defines a new language @samp{man} in +terms of the @code{gzip} program and @code{mkid}'s existing text +scanner. @xref{Defining scanners with options}. + +@item +The @samp{-S.gz=man} tells @code{mkid} to treat all @file{.gz} files as +this new language @code{man}. @xref{Scanner option formats}. + +@end enumerate + +As a further complication, @code{cat} pages typically contain +underlining and backspace sequences, which will confuse @code{mkid}. To +handle this, the @code{gzip} command becomes a pipeline, like this: + +@example +mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - +@end example + + +@node Common query arguments +@chapter Common query arguments + +@cindex common query arguments + +Certain options, and regular expression syntax, are shared by the ID +query tools. So we describe those things in the sections below, instead +of repeating the description for each tool. + +@menu +* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. +* Patterns:: Regular expression syntax for searches. +* Examples: Query examples. Some common uses. +@end menu + + +@node Query options +@section Query options + +@cindex query options, common +@cindex common query options + +The ID query tools (@emph{not} @code{mkid}) share certain command line +options. Not all of these options are recognized by all programs, but +if an option is used by more than one program, it is described below. +The description of each program gives the options that program uses. + +@table @samp + +@item -f@var{idfile} +@opindex -f@var{idfile} +@cindex database name, specifying +@cindex parent directories, searched for ID +Read the database from @var{idfile}, in the current directory or in any +directory above the current directory. The default database name is +@file{ID}. Searching parent directories lets you have a single ID +database at the root of a large source tree and then use the query tools +from anywhere within that tree. + +@item -r@var{directory} +@opindex -r@var{directory} +Find files relative to @var{directory}, instead of the directory in +which the ID database was found. This is useful if the ID database was +moved after its creation. + +@item -c +@opindex -c +Equivalent to @code{-r`pwd`}, i.e., find files relative to the current +directory, instead of the directory in which the ID database was found. + +@item -e +@itemx -w +@opindex -e +@opindex -w +@cindex regular expressions, forcing evaluation as +@cindex strings, forcing evaluation as +@cindex constant strings, forcing evaluation as +@samp{-e} forces pattern arguments to be treated as regular expressions, +and @samp{-w} forces pattern arguments to be treated as constant +strings. By default, the query tools guess whether a pattern is regular +expressions or constant strings by looking for special characters. +@xref{Patterns}. + +@item -k +@itemx -g +@opindex -k +@opindex -g +@cindex brace notation in filename lists +@cindex shell brace notation in filename lists +@samp{-k} suppresses use of shell brace notation in the output. By +default, the query tools that generate lists of filenames attempt to +compress the lists using the usual shell brace notation, e.g., +@file{@{foo,bar@}.c} to mean @file{foo.c} and @file{bar.c}. (This is +useful if you use @code{ksh} or the original (not GNU) @code{sh} and +want to feed the list of names to another command, since those shells do +not support this brace notation; the name of the @code{-k} option comes +from the @code{k} in @code{ksh}). + +@samp{-g} turns on use of brace notation; this is only needed if the +query tools were compiled with @samp{-k} as the default behavior. + +@item -n +@opindex -n +@cindex suppressing matching identifier +Suppress the matching identifier before each list of filenames that the +query tools output by default. This is useful if you want a list of just +the names to feed to another command. + +@item -d +@itemx -o +@itemx -x +@itemx -a +@opindex -d +@opindex -o +@opindex -x +@opindex -a +@cindex radix of numeric matches, specifying +@cindex numeric matches, specifying radix of +These options may be used in any combination to specify the radix of +numeric matches. @samp{-d} allows matching on decimal numbers, +@samp{-o} on octal numbers, and @samp{-x} on hexadecimal numbers. The +@code{-a} option is equivalent to specifying all three; this is the +default. Any combination of these options may be used. + +@item -m +@opindex -m +@cindex multiple lines, merging +Merge multiple lines of output into a single line. If your query +matches more than one identifier, the default is to generate a separate +line of output for each matching identifier. + +@itemx -F- +@itemx -F@var{n} +@itemx -F-@var{m} +@itemx -F@var{n}-@var{m} +@opindex -F +@cindex single matches, showing +Show identifiers matching at least @var{n} and at most @var{m} times. +@samp{-F-} is equivalent to @samp{-F1}, i.e., find identifiers that +appear only once in the database. (This is useful to locate identifiers +that are defined but never used, or used once and never defined.) + +@item -u@var{number} +@opindex -u +@cindex conflicting identifiers, finding +List identifiers that conflict in the first @var{number} characters. +This could be in useful porting programs to brain-dead computers that +refuse to support long identifiers, but your best long term option is to +set such computers on fire. + +@end table + + +@node Patterns +@section Patterns + +@cindex patterns +@cindex regular expression syntax + +@dfn{Patterns}, also called @dfn{regular expressions}, allow you to +match many different identifiers in a single query. + +The same regular expression syntax is recognized by all the query tools +that handle regular expressions. The exact syntax depends on how the ID +tools were compiled, but the following constructs should always be +supported: + +@table @samp + +@item . +Match any single character. + +@item [@var{chars}] +Match any of the characters specified within the brackets. You can +match any characters @emph{except} the ones in brackets by typing +@samp{^} as the first character. A range of characters can be specified +using @samp{-}. For example, @samp{[abc]} and @samp{[a-c]} both match +@samp{a}, @samp{b}, or @samp{c}, and @samp{[^abc]} matches anything +@emph{except} @samp{a}, @samp{b}, or @samp{c}. + +@item * +Match the previous construct zero or more times. + +@item ^ +@itemx $ +@samp{^} (@samp{$}) at the beginning (end) of a pattern anchors the +match to the first (last) character of the identifier. + +@end table + +The query programs use either the @code{regex}/@code{regcmp} or +@code{re_comp}/@code{re_exec} functions, depending on which are +available in the library on your system. These do not always support +the exact same regular expression syntax, so consult your local +@code{man} pages to find out. + + +@node Query examples +@section Query examples + +@cindex examples, queries +@cindex query examples +Here are some examples of the options described in the previous +sections. + +To restrict searches to exact matches, use @samp{^@dots{}$}. For example: + +@example +prompt$ gid '^FILE$' +ansi2knr.c:144: @{ FILE *in, *out; +ansi2knr.c:315: FILE *out; +fid.c:38: FILE *id_FILE; +filenames.c:576: FILE * +@dots{} +@end example + +To show identifiers not unique in the first 16 characters: + +@example +prompt$ lid -u16 +RE_CONTEXT_INDEP_ANCHORS regex.c +RE_CONTEXT_INDEP_OPS regex.c +RE_SYNTAX_POSIX_BASIC regex.c +RE_SYNTAX_POSIX_EXTENDED regex.c +@dots{} +@end example + +@cindex numeric searches +Numbers are searched for numerically rather than textually. For example: + +@example +prompt$ lid 0xff +0377 @{lid,regex@}.c +0xff @{bitops,fid,lid,mkid@}.c +255 regex.c +@end example + +On the other hand, you can restrict a numeric search to a particular +radix if you want: + +@example +laurie$ lid -x 0xff +0xff @{bitops,fid,lid,mkid@}.c +@end example + +Filenames in the output are always adjusted to be correct for the +correct working directory. For example: + +@example +prompt$ lid bdevsw +bdevsw sys/conf.h cf/conf.c io/bio.c os/@{fio,main,prf,sys3@}.c +prompt$ cd io +prompt$ lid bdevsw +bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/@{fio,main,prf,sys3@}.c +@end example + + +@node gid invocation +@chapter @code{gid}: Listing matching lines + +Synopsis: + +@example +gid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}@dots{}] +@end example + +@code{gid} finds the identifiers in the database that match the +specified @var{pattern}s, then searches for all occurrences of those +identifiers, in only the files containing matches. In a large source +tree, this saves an enormous amount of time (compared to searching every +source file). + +With no @var{pattern} arguments, @code{gid} prints every line of every +source file. + +The name ``gid'' stands for ``grep for identifiers'', @code{grep} being +the standard utility to search regular files. + +@xref{Common query arguments}, for a description of the command-line +options and @var{pattern} arguments. + +@code{gid} uses the standard GNU output format for identifying source lines: + +@example +@var{filename}:@var{linenum}: @var{text} +@end example + +Here is an example: + +@example +prompt$ gid FILE +ansi2knr.c:144: @{ FILE *in, *out; +ansi2knr.c:315: FILE *out; +fid.c:38: FILE *id_FILE; +@dots{} +@end example + +@menu +* GNU Emacs gid interface:: Using next-error with gid. +@end menu + + +@node GNU Emacs gid interface +@section GNU Emacs @code{gid} interface + +@cindex Emacs interface to @code{gid} +@flindex gid.el @r{interface to Emacs} + +@vindex load-path +The @code{mkid} source distribution comes with a file @file{gid.el}, +which defines a GNU Emacs interface to @code{gid}. To install it, put +@file{gid.el} somewhere that Emacs will find it (i.e., in your +@code{load-path}) and put + +@example +(autoload 'gid "gid" nil t) +@end example + +@noindent in one of Emacs' initialization files, e.g., @file{~/.emacs}. +You will then be able to use @kbd{M-x gid} to run the command. + +@findex gid @r{Emacs function} +The @code{gid} function prompts you with the word around point. If you +want to search for something else, simply delete the line and type the +pattern of interest. + +@flindex *scratch* @r{Emacs buffer} +The function then runs the @code{gid} program in a @samp{*compilation*} +buffer, so the normal @code{next-error} function can be used to visit +all the places the identifier is found (@pxref{Compilation,,, emacs, The +GNU Emacs Manual}). + + +@node Looking up identifiers +@chapter Looking up identifiers + +These commands look up identifiers in the ID database and operate on the +files containing matches. + +@menu +* lid invocation:: Matching patterns. +* aid invocation:: Matching strings. +* eid invocation:: Invoking an editor on matches. +* fid invocation:: Listing a file's identifiers. +@end menu + + +@node lid invocation +@section @code{lid}: Matching patterns + +@pindex lid + +Synopsis: + +@example +lid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c +@var{pattern}@dots{} +@end example + +@code{lid} searches the database for identifiers matching the given +@var{pattern} arguments and prints the names of the files that match +each @var{pattern}. With no @var{pattern}s, @code{lid} lists every +entry in the database. + +The name ``lid'' stands for ``lookup identifier''. + +@xref{Common query arguments}, for a description of the command-line +options and @var{pattern} arguments. + +By default, each line of output consists of an identifier and all the +files containing that identifier. + +Here is an example showing a search for a single identifier (omitting +some output to keep lines short): + +@example +prompt$ lid FILE +FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c +@end example + +This example shows a regular expression search: + +@example +prompt$ lid 'FILE$' +AF_FILE mkid.c +AF_IDFILE mkid.c +FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c +IDFILE id.h @{fid,lid,mkid@}.c +IdFILE @{fid,lid@}.c +@dots{} +@end example + +@noindent As you can see, when a regular expression is used, it is +possible to get more than one line of output. To merge multiple lines +into one, use @samp{-m}: + +@example +prompt$ lid -m ^get +^get extern.h @{bitsvec,fid,gets0,getsFF,getscan,idx,lid,@dots{}@}.c +@end example + + +@node aid invocation +@section @code{aid}: Matching strings + +@pindex aid + +Synopsis: + +@example +aid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c +@var{string}@dots{} +@end example + +@cindex case-insensitive searching +@cindex string searching +@code{aid} searches the database for identifiers containing the given +@var{string} arguments. The search is case-insensitive. + +@flindex whatis +The name ``aid'' stands for ``apropos identifier'', @code{apropros} +being a command that does a similar search of the @code{whatis} database +of @code{man} descriptions. + +For example, @samp{aid get} matches the identifiers @code{fgets}, +@code{GETLINE}, and @code{getchar}. + +The default output format is the same as @code{lid}; see the previous +section. + +@xref{Common query arguments}, for a description of the command-line +options and @var{pattern} arguments. + + +@node eid invocation +@section @code{eid}: Invoking an editor on matches + +@pindex eid + +Synopsis: + +@example +eid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}]@dots{} +@end example + +@code{eid} runs the usual search (@pxref{lid invocation}) on the given +arguments, shows you the output, and then asks: + +@example +Edit? [y1-9^S/nq] +@end example + +@noindent +You can respond with: + +@table @samp +@item y +Edit all files listed. + +@item 1@dots{}9 +Edit all files starting at the @math{@var{n} + 1}'st file. + +@item /@var{string} @r{or} @kbd{CTRL-S}@var{string} +Edit all files whose name contains @var{string}. + +@item n +Go on to the next @var{pattern}, i.e., edit no files for this one. + +@item q +Quit @code{eid}. + +@end table + +@code{eid} invokes an editor once per @var{pattern}; all the specified +files are given to the editor for you to edit simultaneously. + +@code{eid} invokes the editor defined by the @samp{EDITOR} environment +variable. If the editor can accept an initial search argument on the +command line, @code{eid} moves automatically to the location of the +match, via the environment variables below. + +@xref{Common query arguments}, for a description of the command-line +options and @var{pattern} arguments. + +Here are the environment variables relevant to @code{eid}: + +@table @samp + +@item EDITOR +@vindex EDITOR +The name of the editor program to invoke. + +@item EIDARG +@vindex EIDARG +@cindex search for identifier, initial +The argument to pass to the editor to search for the matching +identifier. For @code{vi}, this should be @samp{+/%s/'}. + +@item EIDLDEL +@vindex EIDLDEL +@cindex left delimiter editor argument +@cindex beginning-of-word editor argument +A regular expression to force a match at the beginning of a word (``left +delimiter). @code{eid} inserts this in front of the matching identifier +when composing the search argument. For @code{vi}, this should be +@samp{\<}. + +@item EIDRDEL +@vindex EIDRDEL +@cindex right delimiter editor argument +@cindex end-of-word editor argument +The end-of-word regular expression. For @code{vi}, this should be +@samp{\>}. + +@end table + +For Emacs users, the interface in @code{gid.el} is probably preferable +to @code{eid}. @xref{GNU Emacs gid interface}. + + +Here is an example: + +@example +prompt$ eid FILE \^print +FILE @{ansi2knr,fid,filenames,idfile,idx,lid,misc,@dots{}@}.c +Edit? [y1-9^S/nq] n +^print @{ansi2knr,fid,getopt,getopt1,lid,mkid,regex,scanners@}.c +Edit? [y1-9^S/nq] 2 +@end example + +@noindent This will start editing at @file{getopt}.c. + + +@node fid invocation +@section @code{fid}: Listing a file's identifiers + +@pindex fid +@cindex identifiers in a file + +@code{fid} lists the identifiers found in a given file. Synopsis: + +@example +fid [-f@var{dbfile}] @var{file1} [@var{file2}] +@end example + +@table @samp + +@item -f@var{dbfile} +Read the database from @var{dbfile} instead of @file{ID}. + +@item @var{file1} +List all the identifiers contained in @var{file1}. + +@item @var{file2} +With a second file argument, list only the identifiers both files have +in common. + +@end table + +The output is simply one identifier (or number) per line. + + +@node pid invocation +@chapter @code{pid}: Looking up filenames + +@pindex pid +@cindex filenames, matching +@cindex matching filenames + +@code{pid} matches the filenames stored in the ID database, rather than +the identifiers. Synopsis: + +@example +pid [-f@var{dbfile}] [-r@var{dir}] [-ebkgnc] @var{wildcard}@dots{} +@end example + +By default, the @var{wildcard} patterns are treated as shell globbing +patterns, rather than the regular expressions the other utilities +accept. See the section below for details. + +Besides the standard options given in the synopsis (@pxref{Query +options}), @code{pid} accepts the following: + +@table @samp + +@item -e +@opindex -e +Do the usual regular expression matching (@pxref{Patterns}), instead +of shell wildcard matching. + +@item -b +@opindex -b +@cindex basename match +Match the basenames of the files in the database. For example, +@samp{pid -b foo} will match the stored filename @file{dir/foo}, but not +@file{foo/file}. + +@end table + +For example, the command: + +@example +pid \*.c +@end example + +@noindent lists all the @file{.c} files in the database. (The @samp{\} +here protects the @samp{*} from being expanded by the shell.) + +@menu +* Wildcard patterns:: Shell-style globbing patterns. +@end menu + + +@node Wildcard patterns +@section Wildcard patterns + +@cindex globbing patterns +@cindex shell wildcard patterns +@cindex wildcard wildcard patterns + +@code{pid} does simplified shell wildcard matching (unless the @samp{-e} +option is specified), rather than the regular expression matching done +by the other utilities. Here is a description of wildcard matching, +also called @dfn{globbing}: + +@itemize @bullet + +@item +@kindex * @r{in globbing} +@samp{*} matches zero or more characters. + +@item +@kindex ? @r{in globbing} +@samp{?} matches any single character. + +@item +@kindex \ @r{in globbing} +@samp{\} forces the next character to be taken literally. + +@item +@kindex [@dots{}] @r{in globbing} +@samp{[@var{chars}]} matches any single character listed in @var{chars}. + +@item +@kindex [!@dots{}] @r{in globbing} +@samp{[!@var{chars}]} matches any character @emph{not} listed in @var{chars}. + +@end itemize + +Most shells treat @samp{/} and leading @samp{.} characters +specially. @code{pid} does not do this. It simply matches the filename +in the database against the wildcard pattern. + + +@node Index +@unnumbered Index + +@printindex cp + +@contents +@bye diff --git a/doc/mdate-sh b/doc/mdate-sh new file mode 100755 index 0000000..60293a1 --- /dev/null +++ b/doc/mdate-sh @@ -0,0 +1,91 @@ +#!/bin/sh +# mdate-sh - get modification time of a file and pretty-print it +# Copyright (C) 1995 Software Foundation, Inc. +# Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Prevent date giving response in another language. +LANG=C +export LANG +LC_ALL=C +export LC_ALL +LC_TIME=C +export LC_TIME + +# Get the extended ls output of the file. +if ls -L /dev/null 1>/dev/null 2>&1; then + set - `ls -L -l $1` +else + set - `ls -l $1` +fi +# The month is at least the fourth argument. +# (3 shifts here, the next inside the loop) +shift +shift +shift + +# Find the month. Next argument is day, followed by the year or time. +month= +until test $month +do + shift + case $1 in + Jan) month=January; nummonth=1;; + Feb) month=February; nummonth=2;; + Mar) month=March; nummonth=3;; + Apr) month=April; nummonth=4;; + May) month=May; nummonth=5;; + Jun) month=June; nummonth=6;; + Jul) month=July; nummonth=7;; + Aug) month=August; nummonth=8;; + Sep) month=September; nummonth=9;; + Oct) month=October; nummonth=10;; + Nov) month=November; nummonth=11;; + Dec) month=December; nummonth=12;; + esac +done + +day=$2 + +# Here we have to deal with the problem that the ls output gives either +# the time of day or the year. +case $3 in + *:*) set `date`; year=$6 + case $2 in + Jan) nummonthtod=1;; + Feb) nummonthtod=2;; + Mar) nummonthtod=3;; + Apr) nummonthtod=4;; + May) nummonthtod=5;; + Jun) nummonthtod=6;; + Jul) nummonthtod=7;; + Aug) nummonthtod=8;; + Sep) nummonthtod=9;; + Oct) nummonthtod=10;; + Nov) nummonthtod=11;; + Dec) nummonthtod=12;; + esac + # For the first six month of the year the time notation can also + # be used for files modified in the last year. + if (expr $nummonth \> $nummonthtod) > /dev/null; + then + year=`expr $year - 1` + fi;; + *) year=$3;; +esac + +# The result. +echo $day $month $year diff --git a/doc/stamp-vti b/doc/stamp-vti new file mode 100644 index 0000000..9788f70 --- /dev/null +++ b/doc/stamp-vti @@ -0,0 +1 @@ +timestamp diff --git a/doc/version.texi b/doc/version.texi new file mode 100644 index 0000000..4893f0a --- /dev/null +++ b/doc/version.texi @@ -0,0 +1,3 @@ +@set UPDATED 16 March 1996 +@set EDITION 3.0.9 +@set VERSION 3.0.9 |