diff options
author | Greg McGary <greg@mcgary.org> | 1999-01-26 17:56:25 +0000 |
---|---|---|
committer | Greg McGary <greg@mcgary.org> | 1999-01-26 17:56:25 +0000 |
commit | 2f9b216275fea538dcb208c11f778ae48702c722 (patch) | |
tree | 9a75b99a08f923be68fa2f4f6f4426a0aafdbce3 | |
parent | 263a1118150c1e73fdf03eb49fad574f73f31eda (diff) | |
download | idutils-2f9b216275fea538dcb208c11f778ae48702c722.tar.gz idutils-2f9b216275fea538dcb208c11f778ae48702c722.tar.bz2 idutils-2f9b216275fea538dcb208c11f778ae48702c722.zip |
Remove obsolete files
-rw-r--r-- | alloc.h | 34 | ||||
-rw-r--r-- | alloca.c | 492 | ||||
-rw-r--r-- | ansi2knr.1 | 19 | ||||
-rw-r--r-- | ansi2knr.c | 474 | ||||
-rw-r--r-- | bitops.c | 116 | ||||
-rw-r--r-- | bitops.h | 31 | ||||
-rw-r--r-- | fid.1 | 26 | ||||
-rw-r--r-- | fid.c | 186 | ||||
-rw-r--r-- | filenames.c | 530 | ||||
-rw-r--r-- | filenames.h | 36 | ||||
-rw-r--r-- | getopt.c | 748 | ||||
-rw-r--r-- | getopt.h | 129 | ||||
-rw-r--r-- | getopt1.c | 180 | ||||
-rw-r--r-- | hash.h | 124 | ||||
-rw-r--r-- | id.info | 1433 | ||||
-rw-r--r-- | id.texinfo | 1615 | ||||
-rw-r--r-- | idarg.h | 33 | ||||
-rw-r--r-- | idfile.c | 246 | ||||
-rw-r--r-- | idfile.h | 102 | ||||
-rw-r--r-- | idx.c | 95 | ||||
-rw-r--r-- | iid.1 | 235 | ||||
-rw-r--r-- | iid.c | 2329 | ||||
-rw-r--r-- | iid.help | 92 | ||||
-rw-r--r-- | iid.y | 1359 | ||||
-rw-r--r-- | lid.1 | 211 | ||||
-rw-r--r-- | lid.c | 1365 | ||||
-rw-r--r-- | misc.c | 126 | ||||
-rw-r--r-- | misc.h | 38 | ||||
-rw-r--r-- | mkid.1 | 187 | ||||
-rw-r--r-- | mkid.c | 999 | ||||
-rw-r--r-- | mkid.info | 1097 | ||||
-rw-r--r-- | mkid.texinfo | 957 | ||||
-rw-r--r-- | regex.c | 5244 | ||||
-rw-r--r-- | regex.h | 489 | ||||
-rw-r--r-- | scanners.c | 1216 | ||||
-rw-r--r-- | scanners.h | 30 | ||||
-rw-r--r-- | stamp-vti | 1 | ||||
-rw-r--r-- | strcasecmp.c | 76 | ||||
-rw-r--r-- | strxtra.h | 41 | ||||
-rw-r--r-- | texinfo.tex | 4421 | ||||
-rw-r--r-- | token.c | 50 | ||||
-rw-r--r-- | token.h | 40 | ||||
-rw-r--r-- | version.texi | 1 |
43 files changed, 0 insertions, 27253 deletions
diff --git a/alloc.h b/alloc.h deleted file mode 100644 index c430091..0000000 --- a/alloc.h +++ /dev/null @@ -1,34 +0,0 @@ -/* alloc.h -- convenient interface macros for malloc(3) & friends - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _alloc_h_ -#define _alloc_h_ - -#if HAVE_STDLIB_H -#include <stdlib.h> -#else /* not HAVE_STDLIB_H */ -#if HAVE_MALLOC_H -#include <malloc.h> -#endif /* HAVE_MALLOC_H */ -#endif /* not HAVE_STDLIB_H */ - -#define CALLOC(type, n) ((type *) calloc (sizeof (type), (n))) -#define MALLOC(type, n) ((type *) malloc (sizeof (type) * (n))) -#define REALLOC(old, type, n) ((type *) realloc ((old), sizeof (type) * (n))) - -#endif /* not _alloc_h_ */ diff --git a/alloca.c b/alloca.c deleted file mode 100644 index 7020f32..0000000 --- a/alloca.c +++ /dev/null @@ -1,492 +0,0 @@ -/* alloca.c -- allocate automatically reclaimed memory - (Mostly) portable public-domain implementation -- D A Gwyn - - This implementation of the PWB library alloca function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - J.Otto Tennant <jot@cray.com> contributed the Cray support. - - There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#ifdef emacs -#include "blockinput.h" -#endif - -/* If compiling with GCC 2, this file's not needed. */ -#if !defined (__GNUC__) || __GNUC__ < 2 - -/* If someone has defined alloca as a macro, - there must be some other way alloca is supposed to work. */ -#ifndef alloca - -#ifdef emacs -#ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -#ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -#endif /* STACK_DIRECTION undefined */ -#endif /* static */ -#endif /* emacs */ - -/* If your stack is a linked list of frames, you have to - provide an "address metric" ADDRESS_FUNCTION macro. */ - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) -long i00afunc (); -#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) -#else -#define ADDRESS_FUNCTION(arg) &(arg) -#endif - -#if __STDC__ -typedef void *pointer; -#else -typedef char *pointer; -#endif - -#define NULL 0 - -/* Different portions of Emacs need to call different versions of - malloc. The Emacs executable needs alloca to call xmalloc, because - ordinary malloc isn't protected from input signals. On the other - hand, the utilities in lib-src need alloca to call malloc; some of - them are very simple, and don't have an xmalloc routine. - - Non-Emacs programs expect this to call use xmalloc. - - Callers below should use malloc. */ - -#ifndef emacs -#define malloc xmalloc -#endif -extern pointer malloc (); - -/* Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ - -#ifndef STACK_DIRECTION -#define STACK_DIRECTION 0 /* Direction unknown. */ -#endif - -#if STACK_DIRECTION != 0 - -#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ - -#else /* STACK_DIRECTION == 0; need run-time code. */ - -static int stack_dir; /* 1 or -1 once known. */ -#define STACK_DIR stack_dir - -static void -find_stack_direction () -{ - static char *addr = NULL; /* Address of first `dummy', once known. */ - auto char dummy; /* To get stack address. */ - - if (addr == NULL) - { /* Initial entry. */ - addr = ADDRESS_FUNCTION (dummy); - - find_stack_direction (); /* Recurse once. */ - } - else - { - /* Second entry. */ - if (ADDRESS_FUNCTION (dummy) > addr) - stack_dir = 1; /* Stack grew upward. */ - else - stack_dir = -1; /* Stack grew downward. */ - } -} - -#endif /* STACK_DIRECTION == 0 */ - -/* An "alloca header" is used to: - (a) chain together all alloca'ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc - alignment chunk size. The following default should work okay. */ - -#ifndef ALIGN_SIZE -#define ALIGN_SIZE sizeof(double) -#endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* To force sizeof(header). */ - struct - { - union hdr *next; /* For chaining headers. */ - char *deep; /* For stack depth measure. */ - } h; -} header; - -static header *last_alloca_header = NULL; /* -> last alloca header. */ - -/* Return a pointer to at least SIZE bytes of storage, - which will be automatically reclaimed upon exit from - the procedure that called alloca. Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. */ - -pointer -alloca (size) - unsigned size; -{ - auto char probe; /* Probes stack depth: */ - register char *depth = ADDRESS_FUNCTION (probe); - -#if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* Unknown growth direction. */ - find_stack_direction (); -#endif - - /* Reclaim garbage, defined as all alloca'd storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* Traverses linked list. */ - -#ifdef emacs - BLOCK_INPUT; -#endif - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free ((pointer) hp); /* Collect garbage. */ - - hp = np; /* -> next header. */ - } - else - break; /* Rest are not deeper. */ - - last_alloca_header = hp; /* -> last valid storage. */ - -#ifdef emacs - UNBLOCK_INPUT; -#endif - } - - if (size == 0) - return NULL; /* No allocation required. */ - - /* Allocate combined header + user data storage. */ - - { - register pointer new = malloc (sizeof (header) + size); - /* Address of header. */ - - ((header *) new)->h.next = last_alloca_header; - ((header *) new)->h.deep = depth; - - last_alloca_header = (header *) new; - - /* User storage begins just after header. */ - - return (pointer) ((char *) new + sizeof (header)); - } -} - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) - -#ifdef DEBUG_I00AFUNC -#include <stdio.h> -#endif - -#ifndef CRAY_STACK -#define CRAY_STACK -#ifndef CRAY2 -/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ -struct stack_control_header - { - long shgrow:32; /* Number of times stack has grown. */ - long shaseg:32; /* Size of increments to stack. */ - long shhwm:32; /* High water mark of stack. */ - long shsize:32; /* Current size of stack (all segments). */ - }; - -/* The stack segment linkage control information occurs at - the high-address end of a stack segment. (The stack - grows from low addresses to high addresses.) The initial - part of the stack segment linkage control information is - 0200 (octal) words. This provides for register storage - for the routine which overflows the stack. */ - -struct stack_segment_linkage - { - long ss[0200]; /* 0200 overflow words. */ - long sssize:32; /* Number of words in this segment. */ - long ssbase:32; /* Offset to stack base. */ - long:32; - long sspseg:32; /* Offset to linkage control of previous - segment of stack. */ - long:32; - long sstcpt:32; /* Pointer to task common address block. */ - long sscsnm; /* Private control structure number for - microtasking. */ - long ssusr1; /* Reserved for user. */ - long ssusr2; /* Reserved for user. */ - long sstpid; /* Process ID for pid based multi-tasking. */ - long ssgvup; /* Pointer to multitasking thread giveup. */ - long sscray[7]; /* Reserved for Cray Research. */ - long ssa0; - long ssa1; - long ssa2; - long ssa3; - long ssa4; - long ssa5; - long ssa6; - long ssa7; - long sss0; - long sss1; - long sss2; - long sss3; - long sss4; - long sss5; - long sss6; - long sss7; - }; - -#else /* CRAY2 */ -/* The following structure defines the vector of words - returned by the STKSTAT library routine. */ -struct stk_stat - { - long now; /* Current total stack size. */ - long maxc; /* Amount of contiguous space which would - be required to satisfy the maximum - stack demand to date. */ - long high_water; /* Stack high-water mark. */ - long overflows; /* Number of stack overflow ($STKOFEN) calls. */ - long hits; /* Number of internal buffer hits. */ - long extends; /* Number of block extensions. */ - long stko_mallocs; /* Block allocations by $STKOFEN. */ - long underflows; /* Number of stack underflow calls ($STKRETN). */ - long stko_free; /* Number of deallocations by $STKRETN. */ - long stkm_free; /* Number of deallocations by $STKMRET. */ - long segments; /* Current number of stack segments. */ - long maxs; /* Maximum number of stack segments so far. */ - long pad_size; /* Stack pad size. */ - long current_address; /* Current stack segment address. */ - long current_size; /* Current stack segment size. This - number is actually corrupted by STKSTAT to - include the fifteen word trailer area. */ - long initial_address; /* Address of initial segment. */ - long initial_size; /* Size of initial segment. */ - }; - -/* The following structure describes the data structure which trails - any stack segment. I think that the description in 'asdef' is - out of date. I only describe the parts that I am sure about. */ - -struct stk_trailer - { - long this_address; /* Address of this block. */ - long this_size; /* Size of this block (does not include - this trailer). */ - long unknown2; - long unknown3; - long link; /* Address of trailer block of previous - segment. */ - long unknown5; - long unknown6; - long unknown7; - long unknown8; - long unknown9; - long unknown10; - long unknown11; - long unknown12; - long unknown13; - long unknown14; - }; - -#endif /* CRAY2 */ -#endif /* not CRAY_STACK */ - -#ifdef CRAY2 -/* Determine a "stack measure" for an arbitrary ADDRESS. - I doubt that "lint" will like this much. */ - -static long -i00afunc (long *address) -{ - struct stk_stat status; - struct stk_trailer *trailer; - long *block, size; - long result = 0; - - /* We want to iterate through all of the segments. The first - step is to get the stack status structure. We could do this - more quickly and more directly, perhaps, by referencing the - $LM00 common block, but I know that this works. */ - - STKSTAT (&status); - - /* Set up the iteration. */ - - trailer = (struct stk_trailer *) (status.current_address - + status.current_size - - 15); - - /* There must be at least one stack segment. Therefore it is - a fatal error if "trailer" is null. */ - - if (trailer == 0) - abort (); - - /* Discard segments that do not contain our argument address. */ - - while (trailer != 0) - { - block = (long *) trailer->this_address; - size = trailer->this_size; - if (block == 0 || size == 0) - abort (); - trailer = (struct stk_trailer *) trailer->link; - if ((block <= address) && (address < (block + size))) - break; - } - - /* Set the result to the offset in this segment and add the sizes - of all predecessor segments. */ - - result = address - block; - - if (trailer == 0) - { - return result; - } - - do - { - if (trailer->this_size <= 0) - abort (); - result += trailer->this_size; - trailer = (struct stk_trailer *) trailer->link; - } - while (trailer != 0); - - /* We are done. Note that if you present a bogus address (one - not in any segment), you will get a different number back, formed - from subtracting the address of the first block. This is probably - not what you want. */ - - return (result); -} - -#else /* not CRAY2 */ -/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. - Determine the number of the cell within the stack, - given the address of the cell. The purpose of this - routine is to linearize, in some sense, stack addresses - for alloca. */ - -static long -i00afunc (long address) -{ - long stkl = 0; - - long size, pseg, this_segment, stack; - long result = 0; - - struct stack_segment_linkage *ssptr; - - /* Register B67 contains the address of the end of the - current stack segment. If you (as a subprogram) store - your registers on the stack and find that you are past - the contents of B67, you have overflowed the segment. - - B67 also points to the stack segment linkage control - area, which is what we are really interested in. */ - - stkl = CRAY_STACKSEG_END (); - ssptr = (struct stack_segment_linkage *) stkl; - - /* If one subtracts 'size' from the end of the segment, - one has the address of the first word of the segment. - - If this is not the first segment, 'pseg' will be - nonzero. */ - - pseg = ssptr->sspseg; - size = ssptr->sssize; - - this_segment = stkl - size; - - /* It is possible that calling this routine itself caused - a stack overflow. Discard stack segments which do not - contain the target address. */ - - while (!(this_segment <= address && address <= stkl)) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); -#endif - if (pseg == 0) - break; - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - this_segment = stkl - size; - } - - result = address - this_segment; - - /* If you subtract pseg from the current end of the stack, - you get the address of the previous stack segment's end. - This seems a little convoluted to me, but I'll bet you save - a cycle somewhere. */ - - while (pseg != 0) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o\n", pseg, size); -#endif - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - result += size; - } - return (result); -} - -#endif /* not CRAY2 */ -#endif /* CRAY */ - -#endif /* no alloca */ -#endif /* not GCC version 2 */ diff --git a/ansi2knr.1 b/ansi2knr.1 deleted file mode 100644 index 434ce8f..0000000 --- a/ansi2knr.1 +++ /dev/null @@ -1,19 +0,0 @@ -.TH ANSI2KNR 1 "31 December 1990" -.SH NAME -ansi2knr \- convert ANSI C to Kernighan & Ritchie C -.SH SYNOPSIS -.I ansi2knr -input_file output_file -.SH DESCRIPTION -If no output_file is supplied, output goes to stdout. -.br -There are no error messages. -.sp -.I ansi2knr -recognizes functions by seeing a non-keyword identifier at the left margin, followed by a left parenthesis, with a right parenthesis as the last character on the line. It will recognize a multi-line header if the last character on each line but the last is a left parenthesis or comma. These algorithms ignore whitespace and comments, except that the function name must be the first thing on the line. -.sp -The following constructs will confuse it: -.br - - Any other construct that starts at the left margin and follows the above syntax (such as a macro or function call). -.br - - Macros that tinker with the syntax of the function header. diff --git a/ansi2knr.c b/ansi2knr.c deleted file mode 100644 index 8a7d4b5..0000000 --- a/ansi2knr.c +++ /dev/null @@ -1,474 +0,0 @@ -/* Copyright (C) 1989, 1991, 1993, 1994 Aladdin Enterprises. All rights reserved. */ - -/* ansi2knr.c */ -/* Convert ANSI C function definitions to K&R ("traditional C") syntax */ - -/* -ansi2knr is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY. No author or distributor accepts responsibility -to anyone for the consequences of using it or for whether it serves any -particular purpose or works at all, unless he says so in writing. Refer -to the GNU General Public License for full details. - -Everyone is granted permission to copy, modify and redistribute -ansi2knr, but only under the conditions described in the GNU -General Public License. A copy of this license is supposed to have been -given to you along with ansi2knr so you can know your rights and -responsibilities. It should be in a file named COPYLEFT. Among other -things, the copyright notice and this notice must be preserved on all -copies. -*/ - -/* - * Usage: - ansi2knr input_file [output_file] - * If no output_file is supplied, output goes to stdout. - * There are no error messages. - * - * ansi2knr recognizes function definitions by seeing a non-keyword - * identifier at the left margin, followed by a left parenthesis, - * with a right parenthesis as the last character on the line. - * It will recognize a multi-line header provided that the last character - * of the last line of the header is a right parenthesis, - * and no intervening line ends with a left or right brace or a semicolon. - * These algorithms ignore whitespace and comments, except that - * the function name must be the first thing on the line. - * The following constructs will confuse it: - * - Any other construct that starts at the left margin and - * follows the above syntax (such as a macro or function call). - * - Macros that tinker with the syntax of the function header. - */ - -/* - * The original and principal author of ansi2knr is L. Peter Deutsch - * <ghost@aladdin.com>. Other authors are noted in the change history - * that follows (in reverse chronological order): - lpd 94-12-18 added conditionals for systems where ctype macros - don't handle 8-bit characters properly, suggested by - Francois Pinard <pinard@iro.umontreal.ca>; - removed --varargs switch (this is now the default) - lpd 94-10-10 removed CONFIG_BROKETS conditional - lpd 94-07-16 added some conditionals to help GNU `configure', - suggested by Francois Pinard <pinard@iro.umontreal.ca>; - properly erase prototype args in function parameters, - contributed by Jim Avera <jima@netcom.com>; - correct error in writeblanks (it shouldn't erase EOLs) - lpd 89-xx-xx original version - */ - -/* Most of the conditionals here are to make ansi2knr work with */ -/* the GNU configure machinery. */ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <stdio.h> -#include <ctype.h> - -#ifdef HAVE_CONFIG_H - -/* - For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h). - This will define HAVE_CONFIG_H and so, activate the following lines. - */ - -# if STDC_HEADERS || HAVE_STRING_H -# include <string.h> -# else -# include <strings.h> -# endif - -#else /* not HAVE_CONFIG_H */ - -/* - Without AC_CONFIG_HEADER, merely use <string.h> as in the original - Ghostscript distribution. This loses on older BSD systems. - */ - -# include <string.h> - -#endif /* not HAVE_CONFIG_H */ - -#ifdef STDC_HEADERS -# include <stdlib.h> -#else -/* - malloc and free should be declared in stdlib.h, - but if you've got a K&R compiler, they probably aren't. - */ -char *malloc(); -void free(); -#endif - -/* - * The ctype macros don't always handle 8-bit characters correctly. - * Compensate for this here. - */ -#ifndef STDC_HEADERS -# define STDC_HEADERS 0 -#endif -#ifdef isascii -# undef HAVE_ISASCII /* just in case */ -# define HAVE_ISASCII 1 -#else -# ifndef HAVE_ISASCII -# define HAVE_ISASCII 0 -# endif -#endif -#if STDC_HEADERS || !HAVE_ISASCII -# define is_ascii(c) 1 -#else -# define is_ascii(c) isascii(c) -#endif - -#define is_space(c) (is_ascii(c) && isspace(c)) -#define is_alpha(c) (is_ascii(c) && isalpha(c)) -#define is_alnum(c) (is_ascii(c) && isalnum(c)) - -/* Scanning macros */ -#define isidchar(ch) (is_alnum(ch) || (ch) == '_') -#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_') - -/* Forward references */ -char *skipspace(); -void writeblanks(); -int test1(); -int convert1(); - -/* The main program */ -int -main(argc, argv) - int argc; - char *argv[]; -{ FILE *in, *out; -#define bufsize 5000 /* arbitrary size */ - char *buf; - char *line; - /* - * In previous versions, ansi2knr recognized a --varargs switch. - * If this switch was supplied, ansi2knr would attempt to convert - * a ... argument to va_alist and va_dcl; if this switch was not - * supplied, ansi2knr would simply drop any such arguments. - * Now, ansi2knr always does this conversion, and we only - * check for this switch for backward compatibility. - */ - int convert_varargs = 1; - - if ( argc > 1 && argv[1][0] == '-' ) - { if ( !strcmp(argv[1], "--varargs") ) - { convert_varargs = 1; - argc--; - argv++; - } - else - { fprintf(stderr, "Unrecognized switch: %s\n", argv[1]); - exit(1); - } - } - switch ( argc ) - { - default: - printf("Usage: ansi2knr input_file [output_file]\n"); - exit(0); - case 2: - out = stdout; - break; - case 3: - out = fopen(argv[2], "w"); - if ( out == NULL ) - { fprintf(stderr, "Cannot open output file %s\n", argv[2]); - exit(1); - } - } - in = fopen(argv[1], "r"); - if ( in == NULL ) - { fprintf(stderr, "Cannot open input file %s\n", argv[1]); - exit(1); - } - fprintf(out, "#line 1 \"%s\"\n", argv[1]); - buf = malloc(bufsize); - line = buf; - while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL ) - { switch ( test1(buf) ) - { - case 2: /* a function header */ - convert1(buf, out, 1, convert_varargs); - break; - case 1: /* a function */ - convert1(buf, out, 0, convert_varargs); - break; - case -1: /* maybe the start of a function */ - line = buf + strlen(buf); - if ( line != buf + (bufsize - 1) ) /* overflow check */ - continue; - /* falls through */ - default: /* not a function */ - fputs(buf, out); - break; - } - line = buf; - } - if ( line != buf ) fputs(buf, out); - free(buf); - fclose(out); - fclose(in); - return 0; -} - -/* Skip over space and comments, in either direction. */ -char * -skipspace(p, dir) - register char *p; - register int dir; /* 1 for forward, -1 for backward */ -{ for ( ; ; ) - { while ( is_space(*p) ) p += dir; - if ( !(*p == '/' && p[dir] == '*') ) break; - p += dir; p += dir; - while ( !(*p == '*' && p[dir] == '/') ) - { if ( *p == 0 ) return p; /* multi-line comment?? */ - p += dir; - } - p += dir; p += dir; - } - return p; -} - -/* - * Write blanks over part of a string. - * Don't overwrite end-of-line characters. - */ -void -writeblanks(start, end) - char *start; - char *end; -{ char *p; - for ( p = start; p < end; p++ ) - if ( *p != '\r' && *p != '\n' ) *p = ' '; -} - -/* - * Test whether the string in buf is a function definition. - * The string may contain and/or end with a newline. - * Return as follows: - * 0 - definitely not a function definition; - * 1 - definitely a function definition; - * 2 - definitely a function prototype (NOT USED); - * -1 - may be the beginning of a function definition, - * append another line and look again. - * The reason we don't attempt to convert function prototypes is that - * Ghostscript's declaration-generating macros look too much like - * prototypes, and confuse the algorithms. - */ -int -test1(buf) - char *buf; -{ register char *p = buf; - char *bend; - char *endfn; - int contin; - if ( !isidfirstchar(*p) ) - return 0; /* no name at left margin */ - bend = skipspace(buf + strlen(buf) - 1, -1); - switch ( *bend ) - { - case ';': contin = 0 /*2*/; break; - case ')': contin = 1; break; - case '{': return 0; /* not a function */ - case '}': return 0; /* not a function */ - default: contin = -1; - } - while ( isidchar(*p) ) p++; - endfn = p; - p = skipspace(p, 1); - if ( *p++ != '(' ) - return 0; /* not a function */ - p = skipspace(p, 1); - if ( *p == ')' ) - return 0; /* no parameters */ - /* Check that the apparent function name isn't a keyword. */ - /* We only need to check for keywords that could be followed */ - /* by a left parenthesis (which, unfortunately, is most of them). */ - { static char *words[] = - { "asm", "auto", "case", "char", "const", "double", - "extern", "float", "for", "if", "int", "long", - "register", "return", "short", "signed", "sizeof", - "static", "switch", "typedef", "unsigned", - "void", "volatile", "while", 0 - }; - char **key = words; - char *kp; - int len = endfn - buf; - while ( (kp = *key) != 0 ) - { if ( strlen(kp) == len && !strncmp(kp, buf, len) ) - return 0; /* name is a keyword */ - key++; - } - } - return contin; -} - -/* Convert a recognized function definition or header to K&R syntax. */ -int -convert1(buf, out, header, convert_varargs) - char *buf; - FILE *out; - int header; /* Boolean */ - int convert_varargs; /* Boolean */ -{ char *endfn; - register char *p; - char **breaks; - unsigned num_breaks = 2; /* for testing */ - char **btop; - char **bp; - char **ap; - char *vararg = 0; - /* Pre-ANSI implementations don't agree on whether strchr */ - /* is called strchr or index, so we open-code it here. */ - for ( endfn = buf; *(endfn++) != '('; ) ; -top: p = endfn; - breaks = (char **)malloc(sizeof(char *) * num_breaks * 2); - if ( breaks == 0 ) - { /* Couldn't allocate break table, give up */ - fprintf(stderr, "Unable to allocate break table!\n"); - fputs(buf, out); - return -1; - } - btop = breaks + num_breaks * 2 - 2; - bp = breaks; - /* Parse the argument list */ - do - { int level = 0; - char *lp = NULL; - char *rp; - char *end = NULL; - if ( bp >= btop ) - { /* Filled up break table. */ - /* Allocate a bigger one and start over. */ - free((char *)breaks); - num_breaks <<= 1; - goto top; - } - *bp++ = p; - /* Find the end of the argument */ - for ( ; end == NULL; p++ ) - { switch(*p) - { - case ',': - if ( !level ) end = p; - break; - case '(': - if ( !level ) lp = p; - level++; - break; - case ')': - if ( --level < 0 ) end = p; - else rp = p; - break; - case '/': - p = skipspace(p, 1) - 1; - break; - default: - ; - } - } - /* Erase any embedded prototype parameters. */ - if ( lp ) - writeblanks(lp + 1, rp); - p--; /* back up over terminator */ - /* Find the name being declared. */ - /* This is complicated because of procedure and */ - /* array modifiers. */ - for ( ; ; ) - { p = skipspace(p - 1, -1); - switch ( *p ) - { - case ']': /* skip array dimension(s) */ - case ')': /* skip procedure args OR name */ - { int level = 1; - while ( level ) - switch ( *--p ) - { - case ']': case ')': level++; break; - case '[': case '(': level--; break; - case '/': p = skipspace(p, -1) + 1; break; - default: ; - } - } - if ( *p == '(' && *skipspace(p + 1, 1) == '*' ) - { /* We found the name being declared */ - while ( !isidfirstchar(*p) ) - p = skipspace(p, 1) + 1; - goto found; - } - break; - default: goto found; - } - } -found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' ) - { if ( convert_varargs ) - { *bp++ = "va_alist"; - vararg = p-2; - } - else - { p++; - if ( bp == breaks + 1 ) /* sole argument */ - writeblanks(breaks[0], p); - else - writeblanks(bp[-1] - 1, p); - bp--; - } - } - else - { while ( isidchar(*p) ) p--; - *bp++ = p+1; - } - p = end; - } - while ( *p++ == ',' ); - *bp = p; - /* Make a special check for 'void' arglist */ - if ( bp == breaks+2 ) - { p = skipspace(breaks[0], 1); - if ( !strncmp(p, "void", 4) ) - { p = skipspace(p+4, 1); - if ( p == breaks[2] - 1 ) - { bp = breaks; /* yup, pretend arglist is empty */ - writeblanks(breaks[0], p + 1); - } - } - } - /* Put out the function name and left parenthesis. */ - p = buf; - while ( p != endfn ) putc(*p, out), p++; - /* Put out the declaration. */ - if ( header ) - { fputs(");", out); - for ( p = breaks[0]; *p; p++ ) - if ( *p == '\r' || *p == '\n' ) - putc(*p, out); - } - else - { for ( ap = breaks+1; ap < bp; ap += 2 ) - { p = *ap; - while ( isidchar(*p) ) - putc(*p, out), p++; - if ( ap < bp - 1 ) - fputs(", ", out); - } - fputs(") ", out); - /* Put out the argument declarations */ - for ( ap = breaks+2; ap <= bp; ap += 2 ) - (*ap)[-1] = ';'; - if ( vararg != 0 ) - { *vararg = 0; - fputs(breaks[0], out); /* any prior args */ - fputs("va_dcl", out); /* the final arg */ - fputs(bp[0], out); - } - else - fputs(breaks[0], out); - } - free((char *)breaks); - return 0; -} diff --git a/bitops.c b/bitops.c deleted file mode 100644 index 357aec6..0000000 --- a/bitops.c +++ /dev/null @@ -1,116 +0,0 @@ -/* bitops.c -- Bit-vector manipulation for mkid - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <config.h> -#include "bitops.h" - -static int str_to_int __P((char *bufp, int size)); -static char *int_to_str __P((int i, int size)); - -int -vec_to_bits (char *bit_array, char *vec, int size) -{ - int i; - int count; - - for (count = 0; (*vec & 0xff) != 0xff; count++) - { - i = str_to_int (vec, size); - BITSET (bit_array, i); - vec += size; - } - return count; -} - -int -bits_to_vec (char *vec, char *bit_array, int bit_count, int size) -{ - char *element; - int i; - int count; - - for (count = i = 0; i < bit_count; i++) - { - if (!BITTST (bit_array, i)) - continue; - element = int_to_str (i, size); - switch (size) - { - case 4: - *vec++ = *element++; - case 3: - *vec++ = *element++; - case 2: - *vec++ = *element++; - case 1: - *vec++ = *element++; - } - count++; - } - *vec++ = 0xff; - - return count; -} - -/* NEEDSWORK: ENDIAN */ - -static char * -int_to_str (int i, int size) -{ - static char buf0[4]; - char *bufp = &buf0[size]; - - switch (size) - { - case 4: - *--bufp = (i & 0xff); - i >>= 8; - case 3: - *--bufp = (i & 0xff); - i >>= 8; - case 2: - *--bufp = (i & 0xff); - i >>= 8; - case 1: - *--bufp = (i & 0xff); - } - return buf0; -} - -static int -str_to_int (char *bufp, int size) -{ - int i = 0; - - bufp--; - switch (size) - { - case 4: - i |= (*++bufp & 0xff); - i <<= 8; - case 3: - i |= (*++bufp & 0xff); - i <<= 8; - case 2: - i |= (*++bufp & 0xff); - i <<= 8; - case 1: - i |= (*++bufp & 0xff); - } - return i; -} diff --git a/bitops.h b/bitops.h deleted file mode 100644 index 7b9f15c..0000000 --- a/bitops.h +++ /dev/null @@ -1,31 +0,0 @@ -/* bitops.h -- defs for interface to bitops.c, plus bit-vector macros - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _bitops_h_ -#define _bitops_h_ - -#define BITTST(ba, bn) ((ba)[(bn) >> 3] & (1 << ((bn) & 0x07))) -#define BITSET(ba, bn) ((ba)[(bn) >> 3] |= (1 << ((bn) & 0x07))) -#define BITCLR(ba, bn) ((ba)[(bn) >> 3] &=~(1 << ((bn) & 0x07))) -#define BITAND(ba, bn) ((ba)[(bn) >> 3] &= (1 << ((bn) & 0x07))) -#define BITXOR(ba, bn) ((ba)[(bn) >> 3] ^= (1 << ((bn) & 0x07))) - -int vec_to_bits __P((char *bit_array, char *vec, int size)); -int bits_to_vec __P((char *vec, char *bit_array, int bit_count, int size)); - -#endif /* not _bitops_h_ */ @@ -1,26 +0,0 @@ -.TH FID 1 -.SH NAME -fid \- query id database for specific files -.SH SYNOPSIS -.B fid -.RB [ \-f \^file] -file1 [ file2 ] -.SH DESCRIPTION -.I Fid -is a query tool for the id database. If you specify a single file -name as an argument, it prints a list of all the identifiers that -occur in that file. -.PP -When you give it two file names it takes the intersection. It prints -only the list of identifiers that occur in both files. -.PP -The following options are recognized: -.TP 10 -.BR \-f file\^ -Use -.I file\^ -as the database instead of the default -.BR ID . -.SH SEE ALSO -mkid(1), -lid(1). @@ -1,186 +0,0 @@ -/* fid.c -- list all tokens in the given file(s) - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> - -#include <config.h> -#include "idfile.h" -#include "bitops.h" -#include "filenames.h" -#include "misc.h" -#include "strxtra.h" -#include "alloc.h" -#include "token.h" - -int get_idarg_index __P((char const *file_name)); -int is_hit __P((unsigned char const *hits, int file_number)); -int is_hit_1 __P((unsigned char const **hits, int level, int file_number)); -void skip_hits __P((unsigned char const **hits, int level)); - -FILE *id_FILE; -struct idhead idh; -struct idarg *idarg_0; -int tree8_levels; -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "Usage: %s [-f<file>] file1 file2\n", program_name); - exit (1); -} - -int -main (int argc, char **argv) -{ - char const *id_file_name = IDFILE; - char *buf; - int op; - int i; - int index_1 = -1; - int index_2 = -1; - - program_name = basename ((argc--, *argv++)); - - while (argc) - { - char const *arg = (argc--, *argv++); - switch (op = *arg++) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - while (*arg) - switch (*arg++) - { - case 'f': - id_file_name = arg; - goto nextarg; - default: - usage (); - } - nextarg:; - } -argsdone: - - id_file_name = find_id_file (id_file_name); - if (id_file_name == NULL) - { - filerr ("open", id_file_name); - return 1; - } - id_FILE = init_id_file (id_file_name, &idh, &idarg_0); - switch (argc) - { - case 2: - index_2 = get_idarg_index (argv[1]); - /* fall through */ - case 1: - index_1 = get_idarg_index (argv[0]); - break; - default: - usage (); - } - - if (index_1 < 0) - return 1; - - buf = MALLOC (char, idh.idh_buf_size); - fseek (id_FILE, idh.idh_tokens_offset, 0); - tree8_levels = tree8_count_levels (idh.idh_files); - - for (i = 0; i < idh.idh_tokens; i++) - { - unsigned char const *hits; - - gets_past_00 (buf, id_FILE); - hits = tok_hits_addr (buf); - if (is_hit (hits, index_1) && (index_2 < 0 || is_hit (hits, index_2))) - printf ("%s\n", tok_string (buf)); - } - - return 0; -} - -int -get_idarg_index (char const *file_name) -{ - struct idarg *idarg; - int file_name_length = strlen (file_name); - struct idarg *end = &idarg_0[idh.idh_files]; - - for (idarg = idarg_0; idarg < end; ++idarg) - { - int arg_length = strlen (idarg->ida_arg); - int prefix_length = arg_length - file_name_length; - if (prefix_length < 0 - || (prefix_length > 0 && idarg->ida_arg[prefix_length - 1] != '/')) - continue; - if (strequ (&idarg->ida_arg[prefix_length], file_name)) - return idarg->ida_index; - } - fprintf (stderr, "%s: not found\n", file_name); - return -1; -} - -int -is_hit (unsigned char const *hits, int file_number) -{ - return is_hit_1 (&hits, tree8_levels, file_number); -} - -int -is_hit_1 (unsigned char const **hits, int level, int file_number) -{ - int file_hit = 1 << ((file_number >> (3 * --level)) & 7); - int hit = *(*hits)++; - int bit; - - if (!(file_hit & hit)) - return 0; - if (level == 0) - return 1; - - for (bit = 1; (bit < file_hit) && (bit & 0xff); bit <<= 1) - { - if (hit & bit) - skip_hits (hits, level); - } - return is_hit_1 (hits, level, file_number); -} - -void -skip_hits (unsigned char const **hits, int level) -{ - int hit = *(*hits)++; - int bit; - - if (--level == 0) - return; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (hit & bit) - skip_hits (hits, level); - } -} diff --git a/filenames.c b/filenames.c deleted file mode 100644 index ad6a23d..0000000 --- a/filenames.c +++ /dev/null @@ -1,530 +0,0 @@ -/* filenames.c -- file & directory name manipulations - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> - -#include <config.h> -#include "strxtra.h" -#include "filenames.h" -#include "misc.h" -#include "error.h" - -#ifdef S_IFLNK -static char const *unsymlink __P((char *n)); -#endif -static void canonical_name __P((char *n)); -static char const *lex_name __P((void)); -static int same_link __P((struct stat *x, struct stat *y)); - -FILE *popen (); - -/* relative_file_name takes two arguments: - 1) an absolute path name for a directory. (*must* have a trailing "/"). - 2) an absolute path name for a file. - - It looks for a common directory prefix and generates a name for the - given file that is relative to the given directory. The result - might begin with a long sequence of "../"s, if the given names are - long but have a short common prefix. - - (Note: If the the result of relative_file_name is appended to its - directory argument and passed to span_file_name, span_file_name's - result should match relative_file_name's file name argument.) - - Examples: - dir arg return value - /x/y/z/ /x/y/q/file ../q/file - /x/y/z/ /q/t/p/file ../../../q/t/p/file - /x/y/z/ /x/y/z/file file */ - -char const * -relative_file_name (char const *dir_name, char const *file_name) -{ - static char file_name_buffer[MAXPATHLEN]; - char *bp = file_name_buffer; - - while (*file_name && *file_name++ == *dir_name++) - ; - while (*--dir_name != '/') - ; - dir_name++; - while (*--file_name != '/') - ; - file_name++; - /* file_name and dir_name now point past their common directory prefix */ - - /* copy "../" into the buffer for each component of the directory - that remains. */ - - while (*dir_name) - { - if (*dir_name++ == '/') - { - strcpy (bp, "../"); - bp += 3; - } - } - - strcpy (bp, file_name); - return file_name_buffer; -} - -/* span_file_name accepts a canonical directory name and a file name - and returns a canonical path to the file name relative to the - directory. If the file name is absolute, then the directory is - ignored. */ - -char const * -span_file_name (char const *dir_name, char const *file_name) -{ - char *fnp; - static char file_name_buffer[MAXPATHLEN]; - - strcpy (file_name_buffer, dir_name); - fnp = file_name_buffer + strlen (file_name_buffer); - *fnp++ = '/'; - strcpy (fnp, file_name); - canonical_name (fnp); - /* If it is an absolute name, just return it */ - if (*fnp == '/') - return fnp; - /* otherwise, combine the names to canonical form */ - canonical_name (file_name_buffer); - return file_name_buffer; -} - -/* root_name strips off the directory prefix and one suffix. If there - is neither prefix nor suffix, (i.e., "/"), it returns the empty - string. */ - -char const * -root_name (char const *path) -{ - static char file_name_buffer[MAXPATHLEN]; - char const *root; - char const *dot; - - root = strrchr (path, '/'); - if (root == NULL) - root = path; - else - root++; - - dot = strrchr (root, '.'); - if (dot == NULL) - strcpy (file_name_buffer, root); - else - { - strncpy (file_name_buffer, root, dot - root); - file_name_buffer[dot - root] = '\0'; - } - return file_name_buffer; -} - -/* suff_name returns the suffix (including the dot), or the - empty-string if there is none. */ - -char const * -suff_name (char const *path) -{ - char const *dot; - - dot = strrchr (path, '.'); - if (dot == NULL) - return ""; - return dot; -} - -/* Return non-zero if the two stat bufs refer to the same file or - directory */ - -static int -same_link (struct stat *x, struct stat *y) -{ - return ((x->st_ino == y->st_ino) && (x->st_dev == y->st_dev)); -} - -/* find_id_file adds "../"s to the beginning of a file name until it - finds the one that really exists. If the file name starts with - "/", just return it as is. If we fail for any reason, report the - error and exit. */ - -char const * -find_id_file (char const *arg) -{ - static char file_name_buffer[MAXPATHLEN]; - char *name; - char *dir_end; - struct stat root_buf; - struct stat stat_buf; - - if (arg[0] == '/') - return arg; - if (stat (arg, &stat_buf) == 0) - return arg; - - name = &file_name_buffer[sizeof (file_name_buffer) - strlen (arg) - 1]; - strcpy (name, arg); - dir_end = name - 1; - - if (stat ("/", &root_buf) < 0) - { - error (1, errno, "Can't stat `/'"); - return NULL; - } - do - { - *--name = '/'; - *--name = '.'; - *--name = '.'; - if (stat (name, &stat_buf) == 0) - return name; - *dir_end = '\0'; - if (stat (name, &stat_buf) < 0) - return NULL; - *dir_end = '/'; - } - while (name >= &file_name_buffer[3] && !same_link(&stat_buf, &root_buf)); - error (1, errno, "Can't stat `%s' anywhere between here and `/'", arg); - return NULL; -} - -/* define special name components */ - -static char slash[] = "/"; -static char dot[] = "."; -static char dotdot[] = ".."; - -/* nextc points to the next character to look at in the string or is - * null if the end of string was reached. - * - * namep points to buffer that holds the components. - */ -static char const *nextc = NULL; -static char *namep; - -/* lex_name - Return next name component. Uses global variables initialized - * by canonical_name to figure out what it is scanning. - */ -static char const * -lex_name (void) -{ - char c; - char const *d; - - if (nextc == NULL) - return NULL; - - c = *nextc++; - if (c == '\0') - { - nextc = NULL; - return NULL; - } - if (c == '/') - return slash; - if (c == '.') - { - if ((*nextc == '/') || (*nextc == '\0')) - return dot; - if (*nextc == '.' && (*(nextc + 1) == '/' || *(nextc + 1) == '\0')) - { - ++nextc; - return dotdot; - } - } - d = namep; - *namep++ = c; - while ((c = *nextc) != '/') - { - *namep++ = c; - if (c == '\0') - { - nextc = NULL; - return d; - } - ++nextc; - } - *namep++ = '\0'; - return d; -} - -/* canonical_name puts a file name in canonical form. It looks for all - the whacky wonderful things a demented *ni* programmer might put in - a file name and reduces the name to canonical form. */ - -static void -canonical_name (char *file_name) -{ - char const *components[1024]; - char const **cap = components; - char const **cad; - char const *cp; - char name_buf[2048]; - char const *s; - - /* initialize scanner */ - nextc = file_name; - namep = name_buf; - - while ((cp = lex_name ())) - *cap++ = cp; - if (cap == components) - return; - *cap = NULL; - - /* remove all trailing slashes and dots */ - while ((--cap != components) && - ((*cap == slash) || (*cap == dot))) - *cap = NULL; - - /* squeeze out all "./" sequences */ - cad = cap = components; - while (*cap) - { - if ((*cap == dot) && (*(cap + 1) == slash)) - cap += 2; - else - *cad++ = *cap++; - } - *cad++ = NULL; - - /* find multiple // and use last slash as root, except on apollo which - apparently actually uses // in real file names (don't ask me why). */ -#ifndef apollo - s = NULL; - cad = cap = components; - while (*cap) - { - if ((s == slash) && (*cap == slash)) - cad = components; - s = *cap++; - *cad++ = s; - } - *cad = NULL; -#endif - - /* if this is absolute name get rid of any /.. at beginning */ - if ((components[0] == slash) && (components[1] == dotdot)) - { - cad = cap = &components[1]; - while (*cap == dotdot) - { - ++cap; - if (*cap == NULL) - break; - if (*cap == slash) - ++cap; - } - while (*cap) - *cad++ = *cap++; - *cad = NULL; - } - - /* squeeze out any name/.. sequences (but leave leading ../..) */ - cap = components; - cad = cap; - while (*cap) - { - if ((*cap == dotdot) && ((cad - 2) >= components) && (*(cad - 2) != dotdot)) - { - cad -= 2; - ++cap; - if (*cap) - ++cap; - } - else - *cad++ = *cap++; - } - /* squeezing out a trailing /.. can leave unsightly trailing /s */ - if ((cad >= &components[2]) && ((*(cad - 1)) == slash)) - --cad; - *cad = NULL; - /* if it was just name/.. it now becomes . */ - if (components[0] == NULL) - { - components[0] = dot; - components[1] = NULL; - } - - /* re-assemble components */ - cap = components; - while ((s = *cap++)) - { - while (*s) - *file_name++ = *s++; - } - *file_name++ = '\0'; -} - -/* get_PWD is an optimized getwd(3) or getcwd(3) that takes advantage - of the shell's $PWD environment-variable, if present. This is - particularly worth doing on NFS mounted filesystems. */ - -char const * -get_PWD (char *pwd_buf) -{ - struct stat pwd_stat; - struct stat dot_stat; - char *pwd = getenv ("PWD"); - - if (pwd) - { - pwd = strcpy (pwd_buf, pwd); - if (pwd[0] != '/' - || stat (".", &dot_stat) < 0 - || stat (pwd, &pwd_stat) < 0 - || !same_link(&pwd_stat, &dot_stat) -#ifdef S_IFLNK - || !unsymlink (pwd) - || pwd[0] != '/' - || stat (pwd, &pwd_stat) < 0 - || !same_link(&pwd_stat, &dot_stat) -#endif - ) - pwd = 0; - } - - if (pwd == 0) - { - /* Oh well, something did not work out right, so do it the hard way... */ -#if HAVE_GETCWD - pwd = getcwd (pwd_buf, MAXPATHLEN); -#else -#if HAVE_GETWD - pwd = getwd (pwd_buf); -#endif -#endif - } - if (pwd) - strcat (pwd, "/"); - else - error (1, errno, "Can't determine current working directory!"); - - return pwd; -} - -#ifdef S_IFLNK - -/* unsymlink resolves all symbolic links in a file name into hard - links. If successful, it returns its argument and transforms - the file name in situ. If unsuccessful, it returns NULL, and leaves - the argument untouched. */ - -static char const * -unsymlink (char *file_name_buf) -{ - char new_buf[MAXPATHLEN]; - char part_buf[MAXPATHLEN]; - char link_buf[MAXPATHLEN]; - char const *s; - char *d; - char *lastcomp; - struct stat stat_buf; - - strcpy (new_buf, file_name_buf); - - /* Now loop, lstating each component to see if it is a symbolic - link. For symbolic link components, use readlink() to get the - real name, put the read link name in place of the last component, - and start again. */ - - canonical_name (new_buf); - s = new_buf; - d = part_buf; - if (*s == '/') - *d++ = *s++; - lastcomp = d; - for (;;) - { - if ((*s == '/') || (*s == '\0')) - { - /* we have a complete component name in partname, check it out */ - *d = '\0'; - if (lstat (part_buf, &stat_buf) < 0) - return NULL; - if ((stat_buf.st_mode & S_IFMT) == S_IFLNK) - { - /* This much of name is a symbolic link, do a readlink - and tack the bits and pieces together */ - int link_size = readlink (part_buf, link_buf, MAXPATHLEN); - if (link_size < 0) - return NULL; - link_buf[link_size] = '\0'; - strcpy (lastcomp, link_buf); - lastcomp += link_size; - strcpy (lastcomp, s); - strcpy (new_buf, part_buf); - canonical_name (new_buf); - s = new_buf; - d = part_buf; - if (*s == '/') - *d++ = *s++; - lastcomp = d; - } - else - { - /* Not a symlink, just keep scanning to next component */ - if (*s == '\0') - break; - *d++ = *s++; - lastcomp = d; - } - } - else - { - *d++ = *s++; - } - } - strcpy (file_name_buf, new_buf); - return file_name_buf; -} - -#endif - -FILE * -open_source_FILE (char *file_name, char const *filter) -{ - FILE *source_FILE; - - if (filter) - { - char command[1024]; - sprintf (command, filter, file_name); - source_FILE = popen (command, "r"); - } - else - source_FILE = fopen (file_name, "r"); - if (source_FILE == NULL) - filerr ("open", file_name); - return source_FILE; -} - -void -close_source_FILE (FILE *fp, char const *filter) -{ - if (filter) - pclose (fp); - else - fclose (fp); -} diff --git a/filenames.h b/filenames.h deleted file mode 100644 index 6d5b9f2..0000000 --- a/filenames.h +++ /dev/null @@ -1,36 +0,0 @@ -/* filenames.h -- defs for interface to filenames.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _filenames_h_ -#define _filenames_h_ - -#include <sys/param.h> -#ifndef MAXPATHLEN -#define MAXPATHLEN 1024 -#endif - -char const *relative_file_name __P((char const *dir_name, char const *file_name)); -char const *span_file_name __P((char const *dir, char const *arg)); -char const *root_name __P((char const *path)); -char const *suff_name __P((char const *path)); -char const *find_id_file __P((char const *arg)); -char const *get_PWD __P((char *pathname)); -FILE *open_source_FILE __P((char *file_name, char const *filter)); -void close_source_FILE __P((FILE *fp, char const *filter)); - -#endif /* not _filenames_h_ */ diff --git a/getopt.c b/getopt.c deleted file mode 100644 index 43c0a6a..0000000 --- a/getopt.c +++ /dev/null @@ -1,748 +0,0 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu - before changing it! - - Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. - Ditto for AIX 3.2 and <stdlib.h>. */ -#ifndef _NO_PROTO -#define _NO_PROTO -#endif - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#if !defined (__STDC__) || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -#ifndef const -#define const -#endif -#endif - -#include <stdio.h> - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -/* Don't include stdlib.h for non-GNU C libraries because some of them - contain conflicting prototypes for getopt. */ -#include <stdlib.h> -#endif /* GNU C library. */ - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable POSIXLY_CORRECT disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg = NULL; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns EOF, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* XXX 1003.2 says this must be 1 before any call. */ -int optind = 0; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters. - - PERMUTE is the default. We permute the contents of ARGV as we scan, - so that eventually all the non-options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code 1. - Using `-' as the first character of the list of option characters - selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return EOF with `optind' != ARGC. */ - -static enum -{ - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER -} ordering; - -/* Value of POSIXLY_CORRECT environment variable. */ -static char *posixly_correct; - -#ifdef __GNU_LIBRARY__ -/* We want to avoid inclusion of string.h with non-GNU libraries - because there are many ways it can cause trouble. - On some systems, it contains special magic macros that don't work - in GCC. */ -#include <string.h> -#define my_index strchr -#else - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -char *getenv (); - -static char * -my_index (str, chr) - const char *str; - int chr; -{ - while (*str) - { - if (*str == chr) - return (char *) str; - str++; - } - return 0; -} - -/* If using GCC, we can safely declare strlen this way. - If not using GCC, it is ok not to declare it. */ -#ifdef __GNUC__ -/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. - That was relevant to code that was here before. */ -#if !defined (__STDC__) || !__STDC__ -/* gcc with -traditional declares the built-in strlen to return int, - and has done so at least since version 2.4.5. -- rms. */ -extern int strlen (const char *); -#endif /* not __STDC__ */ -#endif /* __GNUC__ */ - -#endif /* not __GNU_LIBRARY__ */ - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -static void -exchange (argv) - char **argv; -{ - int bottom = first_nonopt; - int middle = last_nonopt; - int top = optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Initialize the internal data when the first call is made. */ - -static const char * -_getopt_initialize (optstring) - const char *optstring; -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - first_nonopt = last_nonopt = optind = 1; - - nextchar = NULL; - - posixly_correct = getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (posixly_correct != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns `EOF'. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - The elements of ARGV aren't really const, because we permute them. - But we pretend they're const in the prototype to be compatible - with other systems. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. */ - -int -_getopt_internal (argc, argv, optstring, longopts, longind, long_only) - int argc; - char *const *argv; - const char *optstring; - const struct option *longopts; - int *longind; - int long_only; -{ - optarg = NULL; - - if (optind == 0) - optstring = _getopt_initialize (optstring); - - if (nextchar == NULL || *nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc - && (argv[optind][0] != '-' || argv[optind][1] == '\0')) - optind++; - last_nonopt = optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return EOF; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if ((argv[optind][0] != '-' || argv[optind][1] == '\0')) - { - if (ordering == REQUIRE_ORDER) - return EOF; - optarg = argv[optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - nextchar = (argv[optind] + 1 - + (longopts != NULL && argv[optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[optind][1] == '-' - || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound; - int option_index; - - for (nameend = nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if (nameend - nextchar == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (opterr) - fprintf (stderr, "%s: option `%s' is ambiguous\n", - argv[0], argv[optind]); - nextchar += strlen (nextchar); - optind++; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (opterr) - { - if (argv[optind - 1][1] == '-') - /* --option */ - fprintf (stderr, - "%s: option `--%s' doesn't allow an argument\n", - argv[0], pfound->name); - else - /* +option or -option */ - fprintf (stderr, - "%s: option `%c%s' doesn't allow an argument\n", - argv[0], argv[optind - 1][0], pfound->name); - } - nextchar += strlen (nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (opterr) - fprintf (stderr, "%s: option `%s' requires an argument\n", - argv[0], argv[optind - 1]); - nextchar += strlen (nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[optind][1] == '-' - || my_index (optstring, *nextchar) == NULL) - { - if (opterr) - { - if (argv[optind][1] == '-') - /* --option */ - fprintf (stderr, "%s: unrecognized option `--%s'\n", - argv[0], nextchar); - else - /* +option or -option */ - fprintf (stderr, "%s: unrecognized option `%c%s'\n", - argv[0], argv[optind][0], nextchar); - } - nextchar = (char *) ""; - optind++; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *nextchar++; - char *temp = my_index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == '\0') - ++optind; - - if (temp == NULL || c == ':') - { - if (opterr) - { - if (posixly_correct) - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); - else - fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c); - } - optopt = c; - return '?'; - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != '\0') - { - optarg = nextchar; - optind++; - } - else - optarg = NULL; - nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (opterr) - { - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: option requires an argument -- %c\n", - argv[0], c); - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = NULL; - } - } - return c; - } -} - -int -getopt (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0); -} - -#endif /* _LIBC or not __GNU_LIBRARY__. */ - -#ifdef TEST - -/* Compile with -DTEST to make an executable for use in testing - the above definition of `getopt'. */ - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - - c = getopt (argc, argv, "abc:d:0123456789"); - if (c == EOF) - break; - - switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/getopt.h b/getopt.h deleted file mode 100644 index 4ac33b7..0000000 --- a/getopt.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Declarations for getopt. - Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef _GETOPT_H -#define _GETOPT_H 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns EOF, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ -#if defined (__STDC__) && __STDC__ - const char *name; -#else - char *name; -#endif - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -#define no_argument 0 -#define required_argument 1 -#define optional_argument 2 - -#if defined (__STDC__) && __STDC__ -#ifdef __GNU_LIBRARY__ -/* Many other libraries have conflicting prototypes for getopt, with - differences in the consts, in stdlib.h. To avoid compilation - errors, only prototype getopt for the GNU C library. */ -extern int getopt (int argc, char *const *argv, const char *shortopts); -#else /* not __GNU_LIBRARY__ */ -extern int getopt (); -#endif /* __GNU_LIBRARY__ */ -extern int getopt_long (int argc, char *const *argv, const char *shortopts, - const struct option *longopts, int *longind); -extern int getopt_long_only (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind); - -/* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind, - int long_only); -#else /* not __STDC__ */ -extern int getopt (); -extern int getopt_long (); -extern int getopt_long_only (); - -extern int _getopt_internal (); -#endif /* __STDC__ */ - -#ifdef __cplusplus -} -#endif - -#endif /* _GETOPT_H */ diff --git a/getopt1.c b/getopt1.c deleted file mode 100644 index 4580211..0000000 --- a/getopt1.c +++ /dev/null @@ -1,180 +0,0 @@ -/* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987, 88, 89, 90, 91, 92, 1993, 1994 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include "getopt.h" - -#if !defined (__STDC__) || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -#ifndef const -#define const -#endif -#endif - -#include <stdio.h> - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -#include <stdlib.h> -#else -char *getenv (); -#endif - -#ifndef NULL -#define NULL 0 -#endif - -int -getopt_long (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 0); -} - -/* Like getopt_long, but '-' as well as '--' can indicate a long option. - If an option that starts with '-' (not '--') doesn't match a long option, - but does match a short option, it is parsed as a short option - instead. */ - -int -getopt_long_only (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 1); -} - - -#endif /* _LIBC or not __GNU_LIBRARY__. */ - -#ifdef TEST - -#include <stdio.h> - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - int option_index = 0; - static struct option long_options[] = - { - {"add", 1, 0, 0}, - {"append", 0, 0, 0}, - {"delete", 1, 0, 0}, - {"verbose", 0, 0, 0}, - {"create", 0, 0, 0}, - {"file", 1, 0, 0}, - {0, 0, 0, 0} - }; - - c = getopt_long (argc, argv, "abc:d:0123456789", - long_options, &option_index); - if (c == EOF) - break; - - switch (c) - { - case 0: - printf ("option %s", long_options[option_index].name); - if (optarg) - printf (" with arg %s", optarg); - printf ("\n"); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case 'd': - printf ("option d with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ @@ -1,124 +0,0 @@ -/* hash.h -- decls for hash table - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _hash_h_ -#define _hash_h_ - -typedef unsigned long (*hash_t) __P((void const *key)); -typedef int (*hash_cmp_t) __P((void const *x, void const *y)); - -struct hash_table -{ - void **ht_vec; - unsigned long ht_size; /* total number of slots (power of 2) */ - unsigned long ht_capacity; /* usable slots, limited by loading-factor */ - unsigned long ht_fill; /* items in table */ - unsigned long ht_probes; /* number of comparisons */ - unsigned long ht_lookups; /* number of queries */ - unsigned int ht_rehashes; /* number of times we've expanded table */ - hash_t ht_hash_1; /* primary hash function */ - hash_t ht_hash_2; /* secondary hash function */ - hash_cmp_t ht_compare; /* comparison function */ -}; - -void hash_init __P((struct hash_table* ht, long size, - hash_t hash_1, hash_t hash_2, hash_cmp_t hash_cmp)); -void rehash __P((struct hash_table* ht)); -void **hash_lookup __P((struct hash_table* ht, void const *key)); - - -/* hash and comparison macros for string keys. */ - -#define STRING_HASH_1(_key_, _result_) { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - while (*++kk) \ - (_result_) += (*kk << (kk[1] & 0xf)); \ -} while (0) -#define return_STRING_HASH_1(_key_) { \ - unsigned long result = 0; \ - STRING_HASH_1 ((_key_), result); \ - return result; \ -} while (0) - -#define STRING_HASH_2(_key_, _result_) { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - while (*++kk) \ - (_result_) += (*kk << (kk[1] & 0x7)); \ -} while (0) -#define return_STRING_HASH_2(_key_) { \ - unsigned long result = 0; \ - STRING_HASH_2 ((_key_), result); \ - return result; \ -} while (0) - -#define STRING_COMPARE(_x_, _y_, _result_) { \ - unsigned char const *xx = (unsigned char const *) (_x_) - 1; \ - unsigned char const *yy = (unsigned char const *) (_y_) - 1; \ - do { \ - if (*++xx == '\0') { \ - yy++; \ - break; \ - } \ - } while (*xx == *++yy); \ - (_result_) = *xx - *yy; \ -} while (0) -#define return_STRING_COMPARE(_x_, _y_) { \ - int result; \ - STRING_COMPARE (_x_, _y_, result); \ - return result; \ -} while (0) - -/* hash and comparison macros for integer keys. */ - -#define INTEGER_HASH_1(_key_, _result_) { \ - (_result_) = ((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_1(_key_) { \ - unsigned long result = 0; \ - INTEGER_HASH_1 ((_key_), result); \ - return result; \ -} while (0) - -#define INTEGER_HASH_2(_key_, _result_) { \ - (_result_) = ~((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_2(_key_) { \ - unsigned long result = 0; \ - INTEGER_HASH_2 ((_key_), result); \ - return result; \ -} while (0) - -#define INTEGER_COMPARE(_x_, _y_, _result_) { \ - (_result_) = _x_ - _y_; \ -} while (0) -#define return_INTEGER_COMPARE(_x_, _y_) { \ - int result; \ - INTEGER_COMPARE (_x_, _y_, result); \ - return result; \ -} while (0) - -/* hash and comparison macros for address keys. */ - -#define ADDRESS_HASH_1(_key_, _result_) INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3, (_result_)) -#define ADDRESS_HASH_2(_key_, _result_) INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3, (_result_)) -#define ADDRESS_COMPARE(_x_, _y_, _result_) INTEGER_COMPARE ((_x_), (_y_), (_result_)) -#define return_ADDRESS_HASH_1(_key_) return_INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3) -#define return_ADDRESS_HASH_2(_key_) return_INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3) -#define return_ADDRESS_COMPARE(_x_, _y_) return_INTEGER_COMPARE ((_x_), (_y_)) - -#endif /* not _hash_h_ */ diff --git a/id.info b/id.info deleted file mode 100644 index 08834b6..0000000 --- a/id.info +++ /dev/null @@ -1,1433 +0,0 @@ -This is Info file id.info, produced by Makeinfo-1.55 from the input -file id.texinfo. - -START-INFO-DIR-ENTRY -* ID database: (id). Identifier database utilities. -* aid: (id)aid invocation:: Matching strings. -* eid: (id)eid invocation:: Invoking an editor on matches. -* fid: (id)fid invocation:: Listing a file's identifiers. -* gid: (id)gid invocation:: Listing all matching lines. -* idx: (id)idx invocation:: Testing mkid scanners. -* iid: (id)iid invocation:: Interactive complex queries. -* lid: (id)lid invocation:: Matching patterns. -* mkid: (id)mkid invocation:: Creating an ID database. -* pid: (id)pid invocation:: Looking up filenames. -END-INFO-DIR-ENTRY - - This file documents the `mkid' identifier database utilities. - - Copyright (C) 1991, 1995 Tom Horsley. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation. - - -File: id.info, Node: Top, Next: Introduction, Prev: (DIR), Up: (DIR) - -ID database utilities -********************* - - This manual documents version 3.0.9 of the ID database utilities. - -* Menu: - -* Introduction:: Overview of the tools, and authors. -* mkid invocation:: Creating an ID database. -* Common query arguments:: Common lookup options and search patterns. -* gid invocation:: Listing all matching lines. -* Looking up identifiers:: lid, aid, eid, and fid. -* pid invocation:: Looking up filenames. -* iid invocation:: Interactive and complex queries. -* Index:: General index. - - -File: id.info, Node: Introduction, Next: mkid invocation, Prev: Top, Up: Top - -Introduction -************ - - An "ID database" is a binary file containing a list of filenames, a -list of identifiers, and a matrix indicating which identifiers appear in -which files. With this database and some tools to manipulate it -(described in this manual), a host of tasks become simpler and faster. -For example, you can list all files containing a particular `#include' -throughout a huge source hierarchy, search for all the memos containing -references to a project, or automatically invoke an editor on all files -containing references to some function. Anyone with a large software -project to maintain, or a large set of text files to organize, can -benefit from an ID database. - - Although the ID utilities are most commonly used with identifiers, -numeric constants are also stored in the database, and can be searched -for in the same way (independent of radix, if desired). - - There are a number of programs in the ID family: - -`mkid' - scans files for identifiers and numeric constants and builds the ID - database file. - -`gid' - lists all lines that match given patterns. - -`lid' - lists the filenames containing identifiers that match given - patterns. - -`aid' - lists the filenames containing identifiers that contain given - strings, independent of case. - -`eid' - invokes an editor on each file containing identifiers that match - given patterns. - -`fid' - lists all identifiers recorded in the database for given files, or - identifiers common to two files. - -`pid' - matches the filenames in the database, rather than the identifiers. - -`iid' - interactively supports more complex queries, such as intersection - and union. - -`idx' - helps with testing of new `mkid' scanners. - - Please report bugs to `gkm@magilla.cichlid.com'. Remember to -include the version number, machine architecture, input files, and any -other information needed to reproduce the bug: your input, what you -expected, what you got, and why it is wrong. Diffs are welcome, but -please include a description of the problem as well, since this is -sometimes difficult to infer. *Note Bugs: (gcc)Bugs. - -* Menu: - -* Past and future:: How the ID tools came about, and where they're going. - - -File: id.info, Node: Past and future, Up: Introduction - -Past and future -=============== - - Greg McGary conceived of the ideas behind mkid when he began hacking -the Unix kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first `mkid'-like -tools were shell scripts, and produced an ASCII database that looks much -like the output of `lid' with no arguments. It took over an hour on a -VAX 11/750 to build a database for a 4.1BSD-ish kernel. Lookups were -done with the system utility `look', modified to handle very long lines. - - In 1986, Greg rewrote `mkid', `lid', `fid' and `idx' in C to improve -performance. Database-build times were shortened by an order of -magnitude. The `mkid' tools were first posted to `comp.sources.unix' -in September 1987. - - Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the `iid' program. A first release of `mkid' -version 2 was posted to `alt.sources' near the end of 1990. At that -time, Tom wrote this Texinfo manual with the encouragement the net -community. (Tom especially thanks Doug Scofield and Bill Leonard whom -he dragooned into helping poorfraed and edit--they found several -problems in the initial version.) Karl Berry revamped the manual for -Texinfo style, indexing, and organization in 1995. - - In January 1995, Greg McGary reemerged as the primary maintaner and -launched development of `mkid' version 3, whose primary new feature is -an efficient algorithm for building databases that is linear in both -time and space over the size of the input text. (The old algorithm was -quadratic in space and therefore choked on very large source trees.) -The code is released under the GNU Public License, and might become a -part of the GNU system. `mkid' 3 is an interim release, since several -significant enhancements are still in the works: an optional coupling -with GNU `grep', so that `grep' can use an ID database for hints; a -`cscope' work-alike query interface; incremental update of the ID -database; and an automatic file-tree walker so you need not explicitly -supply every filename argument to the `mkid' program. - - -File: id.info, Node: mkid invocation, Next: Common query arguments, Prev: Introduction, Up: Top - -`mkid': Creating ID databases -***************************** - - The `mkid' program builds an ID database. To do this, it must scan -each file you tell it to include in the database. This takes some time, -but once the work is done the query programs run very rapidly. (You can -run `mkid' as a `cron' job to regularly update your databases.) - - The `mkid' program knows how to extract identifiers from various -types of files. For example, it can recognize and skip over comments -and string constants in a C program. - - Identifiers are not the only thing included in the database. Numbers -are also recognized and included in the database indexed by their binary -value. This feature allows you to find uses of constants without regard -to the radix used to specify them, since the same number can frequently -be written in many different ways (for instance, `47', `0x2f', `057' in -C). - - All the places in this document which mention identifiers should -really mention both identifiers and numbers, but that gets fairly -clumsy after a while, so you just need to keep in mind that numbers are -included in the database as well as identifiers. - - The ID files that `mkid' creates are architecture- and -byte-order-independent; you can share them at will across systems. - -* Menu: - -* mkid options:: Command-line options to mkid. -* Scanners:: Built-in and defining your own. -* mkid examples:: Examples of mkid usage. - - -File: id.info, Node: mkid options, Next: Scanners, Up: mkid invocation - -`mkid' options -============== - - By default, `mkid' scans the files you specify and writes the -database to a file named `ID' in the current directory. - - mkid [-v] [-SSCANARG] [-aARGFILE] [-] [-fIDFILE] FILES... - - The program accepts the following options. - -`-v' - Verbose. `mkid' tells you as it scans each file and indicates - which scanner it is using. It also summarizes some statistics - about the database at the end. - -`-SSCANARG' - Specify options regarding `mkid''s scanners. *Note Scanner option - formats::. - -`-aARGFILE' - Read additional command line arguments from ARGFILE. This is - typically used to specify lists of filenames longer than will fit - on a command line; some systems have severe limitations on the - total length of a command line. - -`-' - Read additional command line arguments from standard input. - -`-fIDFILE' - Write the database to the file IDFILE, instead of `ID'. The - database stores filenames relative to the directory containing the - database, so if you move the database to a different directory - after creating it, you may have trouble finding files. - - The remaining arguments FILES are the files to be scanned and -included in the database. If no files are given at all (either on -command line or via `-a' or `-'), `mkid' does nothing. - - -File: id.info, Node: Scanners, Next: mkid examples, Prev: mkid options, Up: mkid invocation - -Scanners -======== - - To determine which identifiers to extract from a file and store in -the database, `mkid' calls a "scanner"; we say a scanner "recognizes" a -particular language. Scanners for several languages are built-in to -`mkid'; you can add your own scanners as well, as explained in the -sections below. - - `mkid' determines which scanner to use for a particular file by -looking at the suffix of the filename. This "suffix" is everything -after and including the last `.' in a filename; for example, the suffix -of `foo.c' is `.c'. `mkid' has a built-in list of bindings from some -suffixes to corresponding scanners; for example, `.c' files are (not -surprisingly) scanned by the predefined C language scanner. - - If `mkid' cannot determine what scanner to use for a particular -file, either because the file has no suffix (e.g., `foo') or because -`mkid' has no binding for the file's suffix (e.g., `foo.bar'), it uses -the scanner bound to the `.default' suffix. By default, this is the -plain text scanner (*note Plain text scanner::.), but you can change -this with the `-S' option, as explained below. - -* Menu: - -* Scanner option formats:: Overview of the -S option. -* Predefined scanners:: The C, plain text, and assembler scanners. -* Defining new scanners:: Either in source code or at runtime with -S. -* idx invocation:: Testing mkid scanners. - - -File: id.info, Node: Scanner option formats, Next: Predefined scanners, Up: Scanners - -Scanner option formats ----------------------- - - With the `-S' option, you can change which language scanner to use -for which files, give language-specific options, and get some limited -online help about scanner options. - - Here are the different forms of the `-S' option: - -`-S.SUFFIX=SCANNER' - Use SCANNER for a file with the given `.SUFFIX'. For example, - `-S.yacc=c' tells `mkid' to use the `c' language scanner for all - files ending in `.yacc'. - -`-S.SUFFIX=?' - Display which scanner is used for the given `.SUFFIX'. - -`-S?=SCANNER' - Display which suffixes SCANNER is used for. - -`-S?=?' - Display the scanner binding for every known suffix. - -`-SSCANNER+ARG' -`-SSCANNER-ARG' - Each scanner accepts certain scanner-dependent arguments. These - options all have one of these forms. *Note Predefined scanners::. - -`-SSCANNER?' - Display the scanner-specific options accepted by SCANNER. - -`-SNEW-SCANNER/OLD-SCANNER/FILTER-COMMAND' - Define NEW-SCANNER in terms of OLD-SCANNER and FILTER-COMMAND. - *Note Defining scanners with options::. - - -File: id.info, Node: Predefined scanners, Next: Defining new scanners, Prev: Scanner option formats, Up: Scanners - -Predefined scanners -------------------- - - `mkid' has built-in scanners for several types of languages; you can -get the list by running `mkid -S?=?'. The supported languages are -documented below(1). - -* Menu: - -* C scanner:: For the C programming language. -* Plain text scanner:: For documents or other non-source code. -* Assembler scanner:: For assembly language. - - ---------- Footnotes ---------- - - (1) This is not strictly true: `vhil' is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored. - - -File: id.info, Node: C scanner, Next: Plain text scanner, Up: Predefined scanners - -C scanner -......... - - The C scanner is the most commonly used. Files with the usual `.c' -and `.h' suffixes, and the `.y' (yacc) and `.l' (lex) suffixes, are -processed with this scanner (by default). - - Scanner-specific options: - -`-Sc-sCHARACTER' - Allow the specified CHARACTER in identifiers. For example, if you - use `$' in identifiers, you'll want to use `-Sc-s$'. - -`-Sc+u' - Strip leading underscores from identifiers. You might to do this in - peculiar circumstances, such as trying to parse the output from - `nm' or some other system utility. - -`-Sc-u' - Don't strip leading underscores from identifiers; this is the - default. - - -File: id.info, Node: Plain text scanner, Next: Assembler scanner, Prev: C scanner, Up: Predefined scanners - -Plain text scanner -.................. - - The plain text scanner is intended for scanning most non-source-code -files. This is typically the scanner used when adding custom scanners -via `-S' (*note Defining scanners with options::.). - - Scanner-specific options: - -`-Stext+aCHARACTER' - Include CHARACTER in identifiers. By default, letters (a-z and - A-Z) and underscore are included. - -`-Stext-aCHARACTER' - Exclude CHARACTER from identifiers. - -`-Stext+sCHARACTER' - Squeeze CHARACTER from identifiers, i.e., do not terminate an - identifier when CHARACTER is seen. By default, the characters - `'', `-', and `.' are squeezed out of identifiers. For example, - the input `fred's' leads to the identifier `freds'. - -`-Stext-sCHARACTER' - Do not squeeze CHARACTER. - - -File: id.info, Node: Assembler scanner, Prev: Plain text scanner, Up: Predefined scanners - -Assembler scanner -................. - - Since assembly languages come in several flavors, this scanner has a -number of options: - -`-Sasm-cCHARACTER' - Define CHARACTER as starting a comment that extends to the end of - the input line; no default. In many assemblers this is `;' or `#'. - -`-Sasm+u' -`-Sasm-u' - Strip (`+u') or do not strip (`-u') leading underscores from - identifiers. The default is to strip them. - -`-Sasm+aCHARACTER' - Allow CHARACTER in identifiers. - -`-Sasm-aCHARACTER' - Allow CHARACTER in identifiers, but if an identifier contains - CHARACTER, ignore it. This is useful to ignore temporary labels, - which can be generated in great profusion; these often contain `.' - or `@'. - -`-Sasm+p' -`-Sasm-p' - Recognize (`+p') or do not recognize (`-p') C preprocessor - directives in assembler source. The default is to recognize them. - -`-Sasm+C' -`-Sasm-C' - Skip over (`+C') or do not skip over (`-C') C style comments in - assembler source. The default is to skip them. - - -File: id.info, Node: Defining new scanners, Next: idx invocation, Prev: Predefined scanners, Up: Scanners - -Defining new scanners ---------------------- - - You can add new scanners to `mkid' in two ways: modify the source -code and recompile, or at runtime via the `-S' option. Each has their -advantages and disadvantages, as explained below. - - If you create a new scanner that would be of use to others, please -consider sending it back to the maintainer, `gkm@magilla.cichlid.com', -for inclusion in future releases of `mkid'. - -* Menu: - -* Defining scanners in source code:: -* Defining scanners with options:: - - -File: id.info, Node: Defining scanners in source code, Next: Defining scanners with options, Up: Defining new scanners - -Defining scanners in source code -................................ - - To add a new scanner in source code, you should add a new section to -the file `scanners.c'. Copy one of the existing scanners (most likely -either C or plain text), and modify as necessary. Also add the new -scanner to the `languages_0' and `suffixes_0' tables near the beginning -of the file. - - This is not a terribly difficult programming task, but it requires -recompiling and installing the new version of `mkid', which may be -inconvenient. - - This method leads to scanners which operate much more quickly than -ones that depend on external programmers. It is also likely the -easiest way to define scanners for new programming languages. - - -File: id.info, Node: Defining scanners with options, Prev: Defining scanners in source code, Up: Defining new scanners - -Defining scanners with options -.............................. - - You can use the `-S' option on the command line to define a new -language scanner: - - -SNEW-SCANNER/EXISTING-SCANNER/FILTER - -Here, NEW-SCANNER is the name of the new scanner being defined, -EXISTING-SCANNER is the name of an existing scanner, and FILTER is a -shell command or pipeline. - - The new scanner works by passing the input file to FILTER, and then -arranging for the result to be passed through EXISTING-SCANNER. -Typically, EXISTING-SCANNER is `text'. - - Somewhere within FILTER, the string`%s' should occur. This `%s' is -replaced by the name of the source file being scanned. - - For example, `mkid' has no built-in scanner for Texinfo files (like -this one). In indexing a Texinfo file, you most likely would want to -ignore the Texinfo @-commands. Here's one way to specify a new scanner -to do this: - - -S/texinfo/text/sed s,@[a-z]*,,g %s - - This defines a new language scanner (`texinfo') defined in terms of -a `sed' command to strip out Texinfo directives (an `@' character -followed by letters). Once the directives are stripped, the remaining -text is run through the plain text scanner. - - This is a minimal example; to do a complete job, you would need to -completely delete some lines, such as those beginning with `@end' or -@node. - - -File: id.info, Node: idx invocation, Prev: Defining new scanners, Up: Scanners - -`idx': Testing `mkid' scanners ------------------------------- - - `idx' prints the identifiers found in the files you specify to -standard output. This is useful in debugging new `mkid' scanners (*note -Scanners::.). Synopsis: - - idx [-SSCANARG] FILES... - - `idx' accepts the same `-S' options as `mkid'. *Note Scanner option -formats::. - - The name "idx" stands for "ID eXtract". The name may change in -future releases, since this is such an infrequently used program. - - -File: id.info, Node: mkid examples, Prev: Scanners, Up: mkid invocation - -`mkid' examples -=============== - - The simplest example of `mkid' is something like: - - mkid *.[chy] - - This will build an ID database indexing identifiers and numbers in -the all the `.c', `.h', and `.y' files in the current directory. -Because `mkid' already knows how to scan files with those suffixes, no -additional options are needed. - - Here's a more complex example. Suppose you want to build a database -indexing the contents of all the `man' pages, and furthur suppose that -your system is using `gzip' (*note Top: (gzip)Top.) to store compressed -`cat' versions of the `man' pages in the directory `/usr/catman'. The -`gzip' program creates files with a `.gz' suffix, so you must tell -`mkid' how to scan `.gz' files. Here are the commands to do the job: - - cd /usr/catman - find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - - -Explanation: - - 1. We first `cd' to `/usr/catman' so the ID database will store the - correct relative filenames. - - 2. The `find' command prints the names of all `.gz' files under the - current directory. *Note find invocation: (sh-utils)find - invocation. - - 3. This list is piped to `mkid'; the `-' option (at the end of the - line) tells `mkid' to read arguments (in this case, as is typical, - the list of filenames) from standard input. *Note mkid options::. - - 4. The `-Sman/text/gzip ...' defines a new language `man' in terms of - the `gzip' program and `mkid''s existing text scanner. *Note - Defining scanners with options::. - - 5. The `-S.gz=man' tells `mkid' to treat all `.gz' files as this new - language `man'. *Note Scanner option formats::. - - - As a further complication, `cat' pages typically contain underlining -and backspace sequences, which will confuse `mkid'. To handle this, -the `gzip' command becomes a pipeline, like this: - - mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - - - -File: id.info, Node: Common query arguments, Next: gid invocation, Prev: mkid invocation, Up: Top - -Common query arguments -********************** - - Certain options, and regular expression syntax, are shared by the ID -query tools. So we describe those things in the sections below, instead -of repeating the description for each tool. - -* Menu: - -* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. -* Patterns:: Regular expression syntax for searches. -* Examples: Query examples. Some common uses. - - -File: id.info, Node: Query options, Next: Patterns, Up: Common query arguments - -Query options -============= - - The ID query tools (*not* `mkid') share certain command line -options. Not all of these options are recognized by all programs, but -if an option is used by more than one program, it is described below. -The description of each program gives the options that program uses. - -`-fIDFILE' - Read the database from IDFILE, in the current directory or in any - directory above the current directory. The default database name - is `ID'. Searching parent directories lets you have a single ID - database at the root of a large source tree and then use the query - tools from anywhere within that tree. - -`-rDIRECTORY' - Find files relative to DIRECTORY, instead of the directory in - which the ID database was found. This is useful if the ID - database was moved after its creation. - -`-c' - Equivalent to `-r`pwd`', i.e., find files relative to the current - directory, instead of the directory in which the ID database was - found. - -`-e' -`-w' - `-e' forces pattern arguments to be treated as regular expressions, - and `-w' forces pattern arguments to be treated as constant - strings. By default, the query tools guess whether a pattern is - regular expressions or constant strings by looking for special - characters. *Note Patterns::. - -`-k' -`-g' - `-k' suppresses use of shell brace notation in the output. By - default, the query tools that generate lists of filenames attempt - to compress the lists using the usual shell brace notation, e.g., - `{foo,bar}.c' to mean `foo.c' and `bar.c'. (This is useful if you - use `ksh' or the original (not GNU) `sh' and want to feed the list - of names to another command, since those shells do not support - this brace notation; the name of the `-k' option comes from the - `k' in `ksh'). - - `-g' turns on use of brace notation; this is only needed if the - query tools were compiled with `-k' as the default behavior. - -`-n' - Suppress the matching identifier before each list of filenames - that the query tools output by default. This is useful if you want - a list of just the names to feed to another command. - -`-d' -`-o' -`-x' -`-a' - These options may be used in any combination to specify the radix - of numeric matches. `-d' allows matching on decimal numbers, `-o' - on octal numbers, and `-x' on hexadecimal numbers. The `-a' - option is equivalent to specifying all three; this is the default. - Any combination of these options may be used. - -`-m' - Merge multiple lines of output into a single line. If your query - matches more than one identifier, the default is to generate a - separate line of output for each matching identifier. - -`-F-' -`-FN' -`-F-M' -`-FN-M' - Show identifiers matching at least N and at most M times. `-F-' - is equivalent to `-F1', i.e., find identifiers that appear only - once in the database. (This is useful to locate identifiers that - are defined but never used, or used once and never defined.) - -`-uNUMBER' - List identifiers that conflict in the first NUMBER characters. - This could be in useful porting programs to brain-dead computers - that refuse to support long identifiers, but your best long term - option is to set such computers on fire. - - -File: id.info, Node: Patterns, Next: Query examples, Prev: Query options, Up: Common query arguments - -Patterns -======== - - "Patterns", also called "regular expressions", allow you to match -many different identifiers in a single query. - - The same regular expression syntax is recognized by all the query -tools that handle regular expressions. The exact syntax depends on how -the ID tools were compiled, but the following constructs should always -be supported: - -`.' - Match any single character. - -`[CHARS]' - Match any of the characters specified within the brackets. You can - match any characters *except* the ones in brackets by typing `^' - as the first character. A range of characters can be specified - using `-'. For example, `[abc]' and `[a-c]' both match `a', `b', - or `c', and `[^abc]' matches anything *except* `a', `b', or `c'. - -`*' - Match the previous construct zero or more times. - -`^' -`$' - `^' (`$') at the beginning (end) of a pattern anchors the match to - the first (last) character of the identifier. - - The query programs use either the `regex'/`regcmp' or -`re_comp'/`re_exec' functions, depending on which are available in the -library on your system. These do not always support the exact same -regular expression syntax, so consult your local `man' pages to find -out. - - -File: id.info, Node: Query examples, Prev: Patterns, Up: Common query arguments - -Query examples -============== - - Here are some examples of the options described in the previous -sections. - - To restrict searches to exact matches, use `^...$'. For example: - - prompt$ gid '^FILE$' - ansi2knr.c:144: { FILE *in, *out; - ansi2knr.c:315: FILE *out; - fid.c:38: FILE *id_FILE; - filenames.c:576: FILE * - ... - - To show identifiers not unique in the first 16 characters: - - prompt$ lid -u16 - RE_CONTEXT_INDEP_ANCHORS regex.c - RE_CONTEXT_INDEP_OPS regex.c - RE_SYNTAX_POSIX_BASIC regex.c - RE_SYNTAX_POSIX_EXTENDED regex.c - ... - - Numbers are searched for numerically rather than textually. For -example: - - prompt$ lid 0xff - 0377 {lid,regex}.c - 0xff {bitops,fid,lid,mkid}.c - 255 regex.c - - On the other hand, you can restrict a numeric search to a particular -radix if you want: - - laurie$ lid -x 0xff - 0xff {bitops,fid,lid,mkid}.c - - Filenames in the output are always adjusted to be correct for the -correct working directory. For example: - - prompt$ lid bdevsw - bdevsw sys/conf.h cf/conf.c io/bio.c os/{fio,main,prf,sys3}.c - prompt$ cd io - prompt$ lid bdevsw - bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/{fio,main,prf,sys3}.c - - -File: id.info, Node: gid invocation, Next: Looking up identifiers, Prev: Common query arguments, Up: Top - -`gid': Listing matching lines -***************************** - - Synopsis: - - gid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN...] - - `gid' finds the identifiers in the database that match the specified -PATTERNs, then searches for all occurrences of those identifiers, in -only the files containing matches. In a large source tree, this saves -an enormous amount of time (compared to searching every source file). - - With no PATTERN arguments, `gid' prints every line of every source -file. - - The name "gid" stands for "grep for identifiers", `grep' being the -standard utility to search regular files. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - `gid' uses the standard GNU output format for identifying source -lines: - - FILENAME:LINENUM: TEXT - - Here is an example: - - prompt$ gid FILE - ansi2knr.c:144: { FILE *in, *out; - ansi2knr.c:315: FILE *out; - fid.c:38: FILE *id_FILE; - ... - -* Menu: - -* GNU Emacs gid interface:: Using next-error with gid. - - -File: id.info, Node: GNU Emacs gid interface, Up: gid invocation - -GNU Emacs `gid' interface -========================= - - The `mkid' source distribution comes with a file `gid.el', which -defines a GNU Emacs interface to `gid'. To install it, put `gid.el' -somewhere that Emacs will find it (i.e., in your `load-path') and put - - (autoload 'gid "gid" nil t) - -in one of Emacs' initialization files, e.g., `~/.emacs'. You will then -be able to use `M-x gid' to run the command. - - The `gid' function prompts you with the word around point. If you -want to search for something else, simply delete the line and type the -pattern of interest. - - The function then runs the `gid' program in a `*compilation*' -buffer, so the normal `next-error' function can be used to visit all -the places the identifier is found (*note Compilation: -(emacs)Compilation.). - - -File: id.info, Node: Looking up identifiers, Next: pid invocation, Prev: gid invocation, Up: Top - -Looking up identifiers -********************** - - These commands look up identifiers in the ID database and operate on -the files containing matches. - -* Menu: - -* lid invocation:: Matching patterns. -* aid invocation:: Matching strings. -* eid invocation:: Invoking an editor on matches. -* fid invocation:: Listing a file's identifiers. - - -File: id.info, Node: lid invocation, Next: aid invocation, Up: Looking up identifiers - -`lid': Matching patterns -======================== - - Synopsis: - - lid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] PATTERN... - - `lid' searches the database for identifiers matching the given -PATTERN arguments and prints the names of the files that match each -PATTERN. With no PATTERNs, `lid' lists every entry in the database. - - The name "lid" stands for "lookup identifier". - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - By default, each line of output consists of an identifier and all the -files containing that identifier. - - Here is an example showing a search for a single identifier (omitting -some output to keep lines short): - - prompt$ lid FILE - FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c - - This example shows a regular expression search: - - prompt$ lid 'FILE$' - AF_FILE mkid.c - AF_IDFILE mkid.c - FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c - IDFILE id.h {fid,lid,mkid}.c - IdFILE {fid,lid}.c - ... - -As you can see, when a regular expression is used, it is possible to -get more than one line of output. To merge multiple lines into one, -use `-m': - - prompt$ lid -m ^get - ^get extern.h {bitsvec,fid,gets0,getsFF,getscan,idx,lid,...}.c - - -File: id.info, Node: aid invocation, Next: eid invocation, Prev: lid invocation, Up: Looking up identifiers - -`aid': Matching strings -======================= - - Synopsis: - - aid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] STRING... - - `aid' searches the database for identifiers containing the given -STRING arguments. The search is case-insensitive. - - The name "aid" stands for "apropos identifier", `apropros' being a -command that does a similar search of the `whatis' database of `man' -descriptions. - - For example, `aid get' matches the identifiers `fgets', `GETLINE', -and `getchar'. - - The default output format is the same as `lid'; see the previous -section. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - -File: id.info, Node: eid invocation, Next: fid invocation, Prev: aid invocation, Up: Looking up identifiers - -`eid': Invoking an editor on matches -==================================== - - Synopsis: - - eid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN]... - - `eid' runs the usual search (*note lid invocation::.) on the given -arguments, shows you the output, and then asks: - - Edit? [y1-9^S/nq] - -You can respond with: - -`y' - Edit all files listed. - -`1...9' - Start editing at the N + 1'st file. - -`/STRING or `CTRL-S'STRING' - Start editing at the first filename containing STRING. - -`n' - Go on to the next PATTERN, i.e., edit nothing for this one. - -`q' - Quit `eid'. - - `eid' invokes the editor defined by the `EDITOR' environment -variable to edit a file. If this editor can accept an initial search -argument on the command line, `eid' can move automatically to the -location of the match, via the environment variables below. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - Here are the environment variables relevant to `eid': - -`EDITOR' - The name of the editor program to invoke. - -`EIDARG' - The argument to pass to the editor to search for the matching - identifier. For `vi', this should be `+/%s/''. - -`EIDLDEL' - A regular expression to force a match at the beginning of a word - ("left delimiter). `eid' inserts this in front of the matching - identifier when composing the search argument. For `vi', this - should be `\<'. - -`EIDRDEL' - The end-of-word regular expression. For `vi', this should be `\>'. - - For Emacs users, the interface in `gid.el' is probably preferable to -`eid'. *Note GNU Emacs gid interface::. - - Here is an example: - - prompt$ eid FILE \^print - FILE {ansi2knr,fid,filenames,idfile,idx,iid,lid,misc,...}.c - Edit? [y1-9^S/nq] n - ^print {ansi2knr,fid,getopt,getopt1,iid,lid,mkid,regex,scanners}.c - Edit? [y1-9^S/nq] 2 - -This will start editing at `getopt'.c. - - -File: id.info, Node: fid invocation, Prev: eid invocation, Up: Looking up identifiers - -`fid': Listing a file's identifiers -=================================== - - `fid' lists the identifiers found in a given file. Synopsis: - - fid [-fDBFILE] FILE1 [FILE2] - -`-fDBFILE' - Read the database from DBFILE instead of `ID'. - -`FILE1' - List all the identifiers contained in FILE1. - -`FILE2' - With a second file argument, list only the identifiers both files - have in common. - - The output is simply one identifier (or number) per line. - - -File: id.info, Node: pid invocation, Next: iid invocation, Prev: Looking up identifiers, Up: Top - -`pid': Looking up filenames -*************************** - - `pid' matches the filenames stored in the ID database, rather than -the identifiers. Synopsis: - - pid [-fDBFILE] [-rDIR] [-ebkgnc] WILDCARD... - - By default, the WILDCARD patterns are treated as shell globbing -patterns, rather than the regular expressions the other utilities -accept. See the section below for details. - - Besides the standard options given in the synopsis (*note Query -options::.), `pid' accepts the following: - -`-e' - Do the usual regular expression matching (*note Patterns::.), - instead of shell wildcard matching. - -`-b' - Match the basenames of the files in the database. For example, - `pid -b foo' will match the stored filename `dir/foo', but not - `foo/file'. - - For example, the command: - - pid \*.c - -lists all the `.c' files in the database. (The `\' here protects the -`*' from being expanded by the shell.) - -* Menu: - -* Wildcard patterns:: Shell-style globbing patterns. - - -File: id.info, Node: Wildcard patterns, Up: pid invocation - -Wildcard patterns -================= - - `pid' does simplified shell wildcard matching (unless the `-e' -option is specified), rather than the regular expression matching done -by the other utilities. Here is a description of wildcard matching, -also called "globbing": - - * `*' matches zero or more characters. - - * `?' matches any single character. - - * `\' forces the next character to be taken literally. - - * `[CHARS]' matches any single character listed in CHARS. - - * `[!CHARS]' matches any character *not* listed in CHARS. - - Most shells treat `/' and leading `.' characters specially. `pid' -does not do this. It simply matches the filename in the database -against the wildcard pattern. - - -File: id.info, Node: iid invocation, Next: Index, Prev: pid invocation, Up: Top - -`iid': Complex interactive queries -********************************** - - `iid' is an interactive query utility for ID databases. It operates -by running another query program (`lid' by default, `aid' if `-a' is -specified) and manipulating the sets of filenames returned by these -queries. - -* Menu: - -* iid command line options:: Command-line options. -* iid query expressions:: Operands to the commands. -* iid commands:: Printing matching filenames, etc. - - -File: id.info, Node: iid command line options, Next: iid query expressions, Up: iid invocation - -`iid' command line options -========================== - - `iid' recognizes the following options (the standard query options -described in *Note Query options:: are inapplicable): - -`-a' - Use `aid' for searches, instead of `lid'. - -`-cCOMMAND' - Execute COMMAND and exit, instead of prompting for interactive - commands. - -`-H' - Print a usage message and exit successfully. The `help' command - inside `iid' gives more information. *Note iid commands::. - - -File: id.info, Node: iid query expressions, Next: iid commands, Prev: iid command line options, Up: iid invocation - -`iid' query expressions -======================= - - An `iid' "query expression" generates a set of filenames or -manipulates existing sets. These expressions are operands to some of -the `iid' commands (see the next section), not commands themselves. - - Here are the possible constructs, highest precedence first: - -`sSET-NUMBER' - Refer to a set previously created by a query operation. During - each `iid' session, every query generates a different set number, - so any previously generated set may be used as part of any new - query by reference to its set number. - -`PATTERN' - `iid' treats any non-keyword input (i.e., anything not in this - table) as an identifier to be searched for in the database. It is - passed to the search program (`lid' by default, `aid' if the `-a' - option was specified). The result of this operation is a set of - filenames, and it is assigned a unique set number. - -`lid IDENTIFIER-LIST' - Invoke the `lid' program on IDENTIFIER-LIST and construct a new - set from the result. - -`aid IDENTIFIER-LIST' - Like `lid', but use the `aid' program. - -`match WILDCARDS' - Invoke the `pid' program on WILDCARDS, therefore matching on the - filenames in the database instead of the identifiers. The - resulting set contains the filenames that match the specified - patterns. *Note pid invocation::. - -`not EXPR' - The result is those filenames in the database that are not in EXPR. - -`EXPR1 and EXPR2' - The result is the intersection of the sets EXPR1 and EXPR2, i.e., - only those filenames contained in both. - -`EXPR1 or EXPR2' - The result is the union of the sets EXPR1 and EXPR2, i.e., all the - filenames contained in either or both. - - Operator names are recognized independent of case, so `AND', `and', -and `aNd' are all the same as far as `iid' is concerned. - - To pass a keyword as an operand, you must enclose it in double -quotes: the command `lid "lid"' generates the set of all filenames -matching the string `lid'. - - Patterns containing shell metacharacters (such as `*' or `?') must -also be properly quoted, since the query commands are run by invoking -them with the shell. - - -File: id.info, Node: iid commands, Prev: iid query expressions, Up: iid invocation - -`iid' commands -============== - - This section describes the interactive commands that `iid' -recognizes. The database query expressions you can pass to the `ss' -and `files' commands are described in the previous section. - - Some commands output a "summary line" for sets. These lines show the -set number, the number of filenames in the set, and the command that -generated it. - -`ss QUERY' - Build the set(s) of filenames resulting from the query expression - QUERY. The output is a summary line for each set. - -`files QUERY' -`f QUERY' - Evaluate the query expression QUERY as in `ss', but output the - full list of matching filenames instead of a summary. - -`sets' - Output a summary line for each extant set. - -`show SET' -`p SET' - Pass the filename in the set number SET to the program named in - the `PAGER' environment variable. Typically, this is a - page-at-a-time display program like `less' or `more'. If you use - Emacs, you might want to set `PAGER' to `emacsclient' (*note Emacs - Server: (emacs)Emacs Server.). - -`anything else' - When `iid' does not recognize the first word on an input line as a - builtin `iid' command, it assumes the input is a shell command - which will write a list of filenames to standard output, which it - gathers into a set as usual. - - Any set numbers that appear in the input are expanded into the - lists of filenames they represent prior to running the command. - -`!SHELL-COMMAND' - Expand set numbers appear in SHELL-COMMAND into the filenames they - represent, and pass the result to `/bin/sh'. The output is not - interpreted. - -`begin DIRECTORY' -`b DIRECTORY' - Begin a new `iid' session in a different directory (which - presumably contains a different database). It deletes all the sets - created so far and switches to the specified directory. It is - equivalent to exiting `iid', changing directories in the shell, and - running `iid' again. - -`help' -`h' -`?' - Display a short help file using the program named in `PAGER'. - -`quit' -`q' -`off' - Quit `iid'. An end-of-file character (usually `CTRL-D') also exits. - - -File: id.info, Node: Index, Prev: iid invocation, Up: Top - -Index -***** - -* Menu: - -* $ in identifiers: C scanner. -* * in globbing: Wildcard patterns. -* *scratch* Emacs buffer: GNU Emacs gid interface. -* -: mkid options. -* -a: iid command line options. -* -a: Query options. -* -aARGFILE: mkid options. -* -b: pid invocation. -* -c: iid command line options. -* -c: Query options. -* -d: Query options. -* -e: pid invocation. -* -e: Query options. -* -F: Query options. -* -fIDFILE: Query options. -* -g: Query options. -* -H: iid command line options. -* -k: Query options. -* -m: Query options. -* -n: Query options. -* -o: Query options. -* -rDIRECTORY: Query options. -* -S scanner option: Scanner option formats. -* -S.: Scanner option formats. -* -S?: Scanner option formats. -* -SSCANARG: mkid options. -* -Sasm+a: Assembler scanner. -* -Sasm+C: Assembler scanner. -* -Sasm+p: Assembler scanner. -* -Sasm+u: Assembler scanner. -* -Sasm-c: Assembler scanner. -* -Sc+u: C scanner. -* -Sc-s: C scanner. -* -Sc-u: C scanner. -* -Stext+a: Plain text scanner. -* -Stext+s: Plain text scanner. -* -Stext-a: Plain text scanner. -* -u: Query options. -* -v: mkid options. -* -w: Query options. -* -x: Query options. -* .default scanner: Scanners. -* .[chly] files, scanning: C scanner. -* ? in globbing: Wildcard patterns. -* aid: aid invocation. -* aid used for iid searches: iid command line options. -* architecture-independence: mkid invocation. -* assembler scanner: Assembler scanner. -* basename match: pid invocation. -* beginning-of-word editor argument: eid invocation. -* Berry, Karl: Past and future. -* brace notation in filename lists: Query options. -* bugs, reporting: Introduction. -* C scanner, predefined: C scanner. -* case-insensitive searching: aid invocation. -* commands for iid: iid commands. -* comments in assembler: Assembler scanner. -* common query arguments: Common query arguments. -* common query options: Query options. -* complex queries: iid invocation. -* compressed files, building ID from: mkid examples. -* conflicting identifiers, finding: Query options. -* constant strings, forcing evaluation as: Query options. -* creating databases: mkid invocation. -* cron: mkid invocation. -* cscope: Past and future. -* database name, specifying: Query options. -* databases, creating: mkid invocation. -* EDITOR: eid invocation. -* eid: eid invocation. -* EIDARG: eid invocation. -* EIDLDEL: eid invocation. -* EIDRDEL: eid invocation. -* Emacs interface to gid: GNU Emacs gid interface. -* emacsclient: iid commands. -* end-of-word editor argument: eid invocation. -* examples of mkid: mkid examples. -* examples, queries: Query examples. -* fid: fid invocation. -* filenames, matching: pid invocation. -* future: Past and future. -* gid Emacs function: GNU Emacs gid interface. -* gid.el interface to Emacs: GNU Emacs gid interface. -* globbing patterns: Wildcard patterns. -* grep: Past and future. -* help for iid: iid command line options. -* history: Past and future. -* Horsley, Tom: Past and future. -* ID database, definition of: Introduction. -* ID file format: mkid invocation. -* identifiers in a file: fid invocation. -* iid: iid invocation. -* iid commands: iid commands. -* iid options: iid command line options. -* iid query expressions: iid query expressions. -* interactive queries: iid invocation. -* introduction: Introduction. -* languages_0: Defining scanners in source code. -* left delimiter editor argument: eid invocation. -* Leonard, Bill: Past and future. -* lid: lid invocation. -* load-path: GNU Emacs gid interface. -* look and mkid 1: Past and future. -* man pages, compressed: mkid examples. -* matching filenames: pid invocation. -* McGary, Greg: Past and future. -* mkid: mkid invocation. -* mkid options: mkid options. -* multiple lines, merging: Query options. -* numbers, in databases: mkid invocation. -* numeric matches, specifying radix of: Query options. -* numeric searches: Query examples. -* options for iid: iid command line options. -* options for mkid: mkid options. -* overview: Introduction. -* PAGER: iid commands. -* parent directories, searched for ID: Query options. -* patterns: Patterns. -* pid: pid invocation. -* plain text scanner: Plain text scanner. -* predefined scanners: Predefined scanners. -* queries for iid: iid query expressions. -* query examples: Query examples. -* query options, common: Query options. -* radix of numeric matches, specifying: Query options. -* regular expression syntax: Patterns. -* regular expressions, forcing evaluation as: Query options. -* right delimiter editor argument: eid invocation. -* scanner options: Scanner option formats. -* scanners: Scanners. -* scanners, adding new: Defining new scanners. -* scanners, defining in source code: Defining scanners in source code. -* scanners, defining with options: Defining scanners with options. -* scanners, predefined: Predefined scanners. -* scanners.c: Defining scanners in source code. -* Scofield, Doug: Past and future. -* search for identifier, initial: eid invocation. -* sharing ID files: mkid invocation. -* shell brace notation in filename lists: Query options. -* shell commands in iid: iid commands. -* shell escape: iid commands. -* shell wildcard patterns: Wildcard patterns. -* single matches, showing: Query options. -* squeezing characters from identifiers: Plain text scanner. -* statistics: mkid options. -* string searching: aid invocation. -* strings, forcing evaluation as: Query options. -* suffixes of filenames: Scanners. -* suffixes_0: Defining scanners in source code. -* suppressing matching identifier: Query options. -* Texinfo, scanning example of: Defining scanners with options. -* whatis: aid invocation. -* wildcard wildcard patterns: Wildcard patterns. -* [!...] in globbing: Wildcard patterns. -* [...] in globbing: Wildcard patterns. -* \ in globbing: Wildcard patterns. - - - -Tag Table: -Node: Top1418 -Node: Introduction2101 -Node: Past and future4406 -Node: mkid invocation6731 -Node: mkid options8295 -Node: Scanners9707 -Node: Scanner option formats11196 -Node: Predefined scanners12366 -Node: C scanner13063 -Node: Plain text scanner13812 -Node: Assembler scanner14717 -Node: Defining new scanners15840 -Node: Defining scanners in source code16457 -Node: Defining scanners with options17296 -Node: idx invocation18744 -Node: mkid examples19304 -Node: Common query arguments21277 -Node: Query options21819 -Node: Patterns25208 -Node: Query examples26542 -Node: gid invocation27924 -Node: GNU Emacs gid interface29080 -Node: Looking up identifiers29938 -Node: lid invocation30428 -Node: aid invocation31856 -Node: eid invocation32636 -Node: fid invocation34674 -Node: pid invocation35226 -Node: Wildcard patterns36327 -Node: iid invocation37091 -Node: iid command line options37642 -Node: iid query expressions38213 -Node: iid commands40515 -Node: Index42745 - -End Tag Table diff --git a/id.texinfo b/id.texinfo deleted file mode 100644 index cdd9e56..0000000 --- a/id.texinfo +++ /dev/null @@ -1,1615 +0,0 @@ -\input texinfo -@comment %**start of header -@setfilename id.info -@settitle ID database utilities -@comment %**end of header - -@include version.texi - -@c Define new indices for filenames, commands and options. -@defcodeindex fl -@defcodeindex cm -@defcodeindex op - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex fl cp -@syncodeindex fn cp -@syncodeindex ky cp -@syncodeindex op cp -@syncodeindex pg cp -@syncodeindex vr cp - -@ifinfo -@set Francois Franc,ois -@end ifinfo -@tex -@set Francois Fran\noexpand\ptexc cois -@end tex - -@ifinfo -@format -START-INFO-DIR-ENTRY -* ID database: (id). Identifier database utilities. -* aid: (id)aid invocation:: Matching strings. -* eid: (id)eid invocation:: Invoking an editor on matches. -* fid: (id)fid invocation:: Listing a file's identifiers. -* gid: (id)gid invocation:: Listing all matching lines. -* idx: (id)idx invocation:: Testing mkid scanners. -* iid: (id)iid invocation:: Interactive complex queries. -* lid: (id)lid invocation:: Matching patterns. -* mkid: (id)mkid invocation:: Creating an ID database. -* pid: (id)pid invocation:: Looking up filenames. -END-INFO-DIR-ENTRY -@end format -@end ifinfo - -@ifinfo -This file documents the @code{mkid} identifier database utilities. - -Copyright (C) 1991, 1995 Tom Horsley. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end ifinfo - -@titlepage -@title ID database utilities -@subtitle Programs for simple, fast, high-capacity cross-referencing -@subtitle for version @value{VERSION} -@author Tom Horsley - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1991, 1995 Tom Horsley. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end titlepage - - -@ifinfo -@node Top -@top ID database utilities - -This manual documents version @value{VERSION} of the ID database -utilities. - -@menu -* Introduction:: Overview of the tools, and authors. -* mkid invocation:: Creating an ID database. -* Common query arguments:: Common lookup options and search patterns. -* gid invocation:: Listing all matching lines. -* Looking up identifiers:: lid, aid, eid, and fid. -* pid invocation:: Looking up filenames. -* iid invocation:: Interactive and complex queries. -* Index:: General index. -@end menu -@end ifinfo - - -@node Introduction -@chapter Introduction - -@cindex overview -@cindex introduction - -@cindex ID database, definition of -An @dfn{ID database} is a binary file containing a list of filenames, a -list of identifiers, and a matrix indicating which identifiers appear in -which files. With this database and some tools to manipulate it -(described in this manual), a host of tasks become simpler and faster. -For example, you can list all files containing a particular -@code{#include} throughout a huge source hierarchy, search for all the -memos containing references to a project, or automatically invoke an -editor on all files containing references to some function. Anyone with -a large software project to maintain, or a large set of text files to -organize, can benefit from an ID database. - -Although the ID utilities are most commonly used with identifiers, -numeric constants are also stored in the database, and can be searched -for in the same way (independent of radix, if desired). - -There are a number of programs in the ID family: - -@table @code - -@item mkid -scans files for identifiers and numeric constants and builds the ID -database file. - -@item gid -lists all lines that match given patterns. - -@item lid -lists the filenames containing identifiers that match given patterns. - -@item aid -lists the filenames containing identifiers that contain given strings, -independent of case. - -@item eid -invokes an editor on each file containing identifiers that match given -patterns. - -@item fid -lists all identifiers recorded in the database for given files, or -identifiers common to two files. - -@item pid -matches the filenames in the database, rather than the identifiers. - -@item iid -interactively supports more complex queries, such as intersection and -union. - -@item idx -helps with testing of new @code{mkid} scanners. - -@end table - -@cindex bugs, reporting -Please report bugs to @samp{gkm@@magilla.cichlid.com}. Remember to -include the version number, machine architecture, input files, and any -other information needed to reproduce the bug: your input, what you -expected, what you got, and why it is wrong. Diffs are welcome, but -please include a description of the problem as well, since this is -sometimes difficult to infer. @xref{Bugs, , , gcc, GNU CC}. - -@menu -* Past and future:: How the ID tools came about, and where they're going. -@end menu - - -@node Past and future -@section Past and future - -@cindex history - -@pindex look @r{and @code{mkid} 1} -@cindex McGary, Greg -Greg McGary conceived of the ideas behind mkid when he began hacking the -Unix kernel in 1984. He needed a navigation tool to help him find his -way the expansive, unfamiliar landscape. The first @code{mkid}-like -tools were shell scripts, and produced an ASCII database that looks much -like the output of @code{lid} with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSD-ish kernel. Lookups -were done with the system utility @code{look}, modified to handle very -long lines. - -In 1986, Greg rewrote @code{mkid}, @code{lid}, @code{fid} and @code{idx} -in C to improve performance. Database-build times were shortened by an -order of magnitude. The @code{mkid} tools were first posted to -@samp{comp.sources.unix} in September 1987. - -@cindex Horsley, Tom -@cindex Scofield, Doug -@cindex Leonard, Bill -@cindex Berry, Karl -Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the @code{iid} program. A first release of -@code{mkid} @w{version 2} was posted to @file{alt.sources} near the end -of 1990. At that time, Tom wrote this Texinfo manual with the -encouragement the net community. (Tom especially thanks Doug Scofield -and Bill Leonard whom he dragooned into helping poorfraed and -edit---they found several problems in the initial version.) Karl Berry -revamped the manual for Texinfo style, indexing, and organization in -1995. - -@pindex cscope -@pindex grep -@cindex future -In January 1995, Greg McGary reemerged as the primary maintaner and -launched development of @code{mkid} version 3, whose primary new feature -is an efficient algorithm for building databases that is linear in both -time and space over the size of the input text. (The old algorithm was -quadratic in space and therefore choked on very large source trees.) -The code is released under the GNU Public License, and might become a -part of the GNU system. @code{mkid} 3 is an interim release, since -several significant enhancements are still in the works: an optional -coupling with GNU @code{grep}, so that @code{grep} can use an ID -database for hints; a @code{cscope} work-alike query interface; -incremental update of the ID database; and an automatic file-tree walker -so you need not explicitly supply every filename argument to the -@code{mkid} program. - - -@node mkid invocation -@chapter @code{mkid}: Creating ID databases - -@pindex mkid -@cindex creating databases -@cindex databases, creating - -@pindex cron -The @code{mkid} program builds an ID database. To do this, it must scan -each file you tell it to include in the database. This takes some time, -but once the work is done the query programs run very rapidly. (You can -run @code{mkid} as a @code{cron} job to regularly update your -databases.) - -The @code{mkid} program knows how to extract identifiers from various -types of files. For example, it can recognize and skip over comments -and string constants in a C program. - -@cindex numbers, in databases -Identifiers are not the only thing included in the database. Numbers -are also recognized and included in the database indexed by their binary -value. This feature allows you to find uses of constants without regard -to the radix used to specify them, since the same number can frequently -be written in many different ways (for instance, @samp{47}, @samp{0x2f}, -@samp{057} in C). - -All the places in this document which mention identifiers should really -mention both identifiers and numbers, but that gets fairly clumsy after -a while, so you just need to keep in mind that numbers are included in -the database as well as identifiers. - -@cindex ID file format -@cindex architecture-independence -@cindex sharing ID files -The ID files that @code{mkid} creates are architecture- and -byte-order-independent; you can share them at will across systems. - -@menu -* mkid options:: Command-line options to mkid. -* Scanners:: Built-in and defining your own. -* mkid examples:: Examples of mkid usage. -@end menu - - -@node mkid options -@section @code{mkid} options - -@cindex options for @code{mkid} -@pindex mkid @r{options} - -By default, @code{mkid} scans the files you specify and writes the -database to a file named @file{ID} in the current directory. - -@example -mkid [-v] [-S@var{scanarg}] [-a@var{argfile}] [-] [-f@var{idfile}] @c -@var{files}@dots{} -@end example - -The program accepts the following options. - -@table @samp - -@item -v -@opindex -v -@cindex statistics -Verbose. @code{mkid} tells you as it scans each file and indicates -which scanner it is using. It also summarizes some statistics about the -database at the end. - -@item -S@var{scanarg} -@opindex -S@var{scanarg} -Specify options regarding @code{mkid}'s scanners. @xref{Scanner option -formats}. - -@item -a@var{argfile} -@opindex -a@var{argfile} -Read additional command line arguments from @var{argfile}. This is -typically used to specify lists of filenames longer than will fit on a -command line; some systems have severe limitations on the total length -of a command line. - -@item - -@opindex - -Read additional command line arguments from standard input. - -@item -f@var{idfile} -Write the database to the file @var{idfile}, instead of @file{ID}. The -database stores filenames relative to the directory containing the -database, so if you move the database to a different directory after -creating it, you may have trouble finding files. - -@c @item -u -@c @opindex -u -@c The @code{-u} option updates an existing database by rescanning any -@c files that have changed since the database was written. Unfortunately -@c you cannot incrementally add new files to a database. -@c Greg is reimplementing this ... - -@end table - -The remaining arguments @var{files} are the files to be scanned and -included in the database. If no files are given at all (either on -command line or via @samp{-a} or @samp{-}), @code{mkid} does nothing. - - -@node Scanners -@section Scanners - -@cindex scanners - -To determine which identifiers to extract from a file and store in the -database, @code{mkid} calls a @dfn{scanner}; we say a scanner -@dfn{recognizes} a particular language. Scanners for several languages -are built-in to @code{mkid}; you can add your own scanners as well, as -explained in the sections below. - -@cindex suffixes of filenames -@code{mkid} determines which scanner to use for a particular file by -looking at the suffix of the filename. This @dfn{suffix} is everything -after and including the last @samp{.} in a filename; for example, the -suffix of @file{foo.c} is @file{.c}. @code{mkid} has a built-in list of -bindings from some suffixes to corresponding scanners; for example, -@file{.c} files are (not surprisingly) scanned by the predefined C -language scanner. - -@findex .default @r{scanner} -If @code{mkid} cannot determine what scanner to use for a particular -file, either because the file has no suffix (e.g., @file{foo}) or -because @code{mkid} has no binding for the file's suffix (e.g., -@file{foo.bar}), it uses the scanner bound to the @samp{.default} -suffix. By default, this is the plain text scanner (@pxref{Plain text -scanner}), but you can change this with the @samp{-S} option, as -explained below. - -@menu -* Scanner option formats:: Overview of the -S option. -* Predefined scanners:: The C, plain text, and assembler scanners. -* Defining new scanners:: Either in source code or at runtime with -S. -* idx invocation:: Testing mkid scanners. -@end menu - - -@node Scanner option formats -@subsection Scanner option formats - -@cindex scanner options -@opindex -S @r{scanner option} - -With the @samp{-S} option, you can change which language scanner to use -for which files, give language-specific options, and get some limited -online help about scanner options. - -Here are the different forms of the @samp{-S} option: - -@table @samp - -@item -S.@var{suffix}=@var{scanner} -@opindex -S. -Use @var{scanner} for a file with the given @samp{.@var{suffix}}. For -example, @samp{-S.yacc=c} tells @code{mkid} to use the @samp{c} language -scanner for all files ending in @samp{.yacc}. - -@item -S.@var{suffix}=? -Display which scanner is used for the given @samp{.@var{suffix}}. - -@item -S?=@var{scanner} -@opindex -S? -Display which suffixes @var{scanner} is used for. - -@item -S?=? -Display the scanner binding for every known suffix. - -@item -S@var{scanner}+@var{arg} -@itemx -S@var{scanner}-@var{arg} -Each scanner accepts certain scanner-dependent arguments. These options -all have one of these forms. @xref{Predefined scanners}. - -@item -S@var{scanner}? -Display the scanner-specific options accepted by @var{scanner}. - -@item -S@var{new-scanner}/@var{old-scanner}/@var{filter-command} -Define @var{new-scanner} in terms of @var{old-scanner} and -@var{filter-command}. @xref{Defining scanners with options}. - -@end table - - -@node Predefined scanners -@subsection Predefined scanners - -@cindex predefined scanners -@cindex scanners, predefined - -@code{mkid} has built-in scanners for several types of languages; you -can get the list by running @code{mkid -S?=?}. -The supported languages are documented -below@footnote{This is not strictly true: @samp{vhil} is a supported -language, but it is an obsolete and arcane dialect of C and should be -ignored.}. - -@menu -* C scanner:: For the C programming language. -* Plain text scanner:: For documents or other non-source code. -* Assembler scanner:: For assembly language. -@end menu - - -@node C scanner -@subsubsection C scanner - -@cindex C scanner, predefined -@flindex .[chly] @r{files, scanning} - -The C scanner is the most commonly used. Files with the usual @file{.c} -and @file{.h} suffixes, and the @file{.y} (yacc) and @file{.l} (lex) -suffixes, are processed with this scanner (by default). - -Scanner-specific options: - -@table @samp - -@item -Sc-s@var{character} -@kindex $ @r{in identifiers} -@opindex -Sc-s -Allow the specified @var{character} in identifiers. For example, if you -use @samp{$} in identifiers, you'll want to use @samp{-Sc-s$}. - -@item -Sc+u -@opindex -Sc+u -Strip leading underscores from identifiers. You might to do this in -peculiar circumstances, such as trying to parse the output from -@code{nm} or some other system utility. - -@item -Sc-u -@opindex -Sc-u -Don't strip leading underscores from identifiers; this is the default. - -@end table - - -@node Plain text scanner -@subsubsection Plain text scanner - -@cindex plain text scanner - -The plain text scanner is intended for scanning most non-source-code -files. This is typically the scanner used when adding custom scanners -via @samp{-S} (@pxref{Defining scanners with options}). - -@c @code{mkid} predefines a troff scanner in terms of the plain text -@c scanner and -@c the @code{deroff} utility. -@c A compressed man page -@c scanner runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner -@c runs @code{detex}. - -Scanner-specific options: - -@table @samp - -@item -Stext+a@var{character} -@opindex -Stext+a -Include @var{character} in identifiers. By default, letters (a--z and -A--Z) and underscore are included. - -@item -Stext-a@var{character} -@opindex -Stext-a -Exclude @var{character} from identifiers. - -@item -Stext+s@var{character} -@opindex -Stext+s -@cindex squeezing characters from identifiers -Squeeze @var{character} from identifiers, i.e., do not terminate an -identifier when @var{character} is seen. By default, the characters -@samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers. For -example, the input @samp{fred's} leads to the identifier @samp{freds}. - -@item -Stext-s@var{character} -Do not squeeze @var{character}. - -@end table - - -@node Assembler scanner -@subsubsection Assembler scanner - -@cindex assembler scanner - -Since assembly languages come in several flavors, this scanner has a -number of options: - -@table @samp - -@item -Sasm-c@var{character} -@opindex -Sasm-c -@cindex comments in assembler -Define @var{character} as starting a comment that extends to the end of -the input line; no default. In many assemblers this is @samp{;} or -@samp{#}. - -@item -Sasm+u -@itemx -Sasm-u -@opindex -Sasm+u -Strip (@samp{+u}) or do not strip (@samp{-u}) leading underscores from -identifiers. The default is to strip them. - -@item -Sasm+a@var{character} -@opindex -Sasm+a -Allow @var{character} in identifiers. - -@item -Sasm-a@var{character} -Allow @var{character} in identifiers, but if an identifier contains -@var{character}, ignore it. This is useful to ignore temporary labels, -which can be generated in great profusion; these often contain @samp{.} -or @samp{@@}. - -@item -Sasm+p -@itemx -Sasm-p -@opindex -Sasm+p -Recognize (@samp{+p}) or do not recognize (@samp{-p}) C preprocessor -directives in assembler source. The default is to recognize them. - -@item -Sasm+C -@itemx -Sasm-C -@opindex -Sasm+C -Skip over (@samp{+C}) or do not skip over (@samp{-C}) C style comments -in assembler source. The default is to skip them. - -@end table - - -@node Defining new scanners -@subsection Defining new scanners - -@cindex scanners, adding new - -You can add new scanners to @code{mkid} in two ways: modify the source -code and recompile, or at runtime via the @samp{-S} option. Each has -their advantages and disadvantages, as explained below. - -If you create a new scanner that would be of use to others, please -consider sending it back to the maintainer, -@samp{gkm@@magilla.cichlid.com}, for inclusion in future releases of -@code{mkid}. - -@menu -* Defining scanners in source code:: -* Defining scanners with options:: -@end menu - - -@node Defining scanners in source code -@subsubsection Defining scanners in source code - -@flindex scanners.c -@cindex scanners, defining in source code - -@vindex languages_0 -@vindex suffixes_0 -To add a new scanner in source code, you should add a new section to the -file @file{scanners.c}. Copy one of the existing scanners (most likely -either C or plain text), and modify as necessary. Also add the new -scanner to the @code{languages_0} and @code{suffixes_0} tables near the -beginning of the file. - -This is not a terribly difficult programming task, but it requires -recompiling and installing the new version of @code{mkid}, which may be -inconvenient. - -This method leads to scanners which operate much more quickly than ones -that depend on external programmers. It is also likely the easiest way -to define scanners for new programming languages. - - -@node Defining scanners with options -@subsubsection Defining scanners with options - -@cindex scanners, defining with options - -You can use the @samp{-S} option on the command line to define a new -language scanner: - -@example --S@var{new-scanner}/@var{existing-scanner}/@var{filter} -@end example - -@noindent -Here, @var{new-scanner} is the name of the new scanner being defined, -@var{existing-scanner} is the name of an existing scanner, and -@var{filter} is a shell command or pipeline. - -The new scanner works by passing the input file to @var{filter}, and -then arranging for the result to be passed through -@var{existing-scanner}. Typically, @var{existing-scanner} is @samp{text}. - -Somewhere within @var{filter}, the string@samp{%s} should occur. This -@samp{%s} is replaced by the name of the source file being scanned. - -@cindex Texinfo, scanning example of -For example, @code{mkid} has no built-in scanner for Texinfo files (like -this one). In indexing a Texinfo file, you most likely would want -to ignore the Texinfo @@-commands. Here's one way to specify a new -scanner to do this: - -@example --S/texinfo/text/sed s,@@[a-z]*,,g %s -@end example - -This defines a new language scanner (@samp{texinfo}) defined in terms of -a @code{sed} command to strip out Texinfo directives (an @samp{@@} -character followed by letters). Once the directives are stripped, the -remaining text is run through the plain text scanner. - -This is a minimal example; to do a complete job, you would need to -completely delete some lines, such as those beginning with @code{@@end} -or @@node. - - -@node idx invocation -@subsection @code{idx}: Testing @code{mkid} scanners - -@code{idx} prints the identifiers found in the files you specify to -standard output. This is useful in debugging new @code{mkid} scanners -(@pxref{Scanners}). Synopsis: - -@example -idx [-S@var{scanarg}] @var{files}@dots{} -@end example - -@code{idx} accepts the same @samp{-S} options as @code{mkid}. -@xref{Scanner option formats}. - -The name ``idx'' stands for ``ID eXtract''. The name may change in -future releases, since this is such an infrequently used program. - - -@node mkid examples -@section @code{mkid} examples - -@cindex examples of @code{mkid} - -The simplest example of @code{mkid} is something like: - -@example -mkid *.[chy] -@end example - -This will build an ID database indexing identifiers and numbers in the -all the @file{.c}, @file{.h}, and @file{.y} files in the current -directory. Because @code{mkid} already knows how to scan files with -those suffixes, no additional options are needed. - -@cindex man pages, compressed -@cindex compressed files, building ID from -Here's a more complex example. Suppose you want to build a database -indexing the contents of all the @code{man} pages, and furthur suppose -that your system is using @code{gzip} (@pxref{Top, , , gzip, Gzip}) to -store compressed @code{cat} versions of the @code{man} pages in the -directory @file{/usr/catman}. The @code{gzip} program creates files -with a @code{.gz} suffix, so you must tell @code{mkid} how to scan -@file{.gz} files. Here are the commands to do the job: - -@example -cd /usr/catman -find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - -@end example - -@noindent Explanation: - -@enumerate - -@item -We first @code{cd} to @file{/usr/catman} so the ID database -will store the correct relative filenames. - -@item -The @code{find} command prints the names of all @file{.gz} files under -the current directory. @xref{find invocation, , , sh-utils, GNU shell -utilities}. - -@item -This list is piped to @code{mkid}; the @code{-} option (at the end of -the line) tells @code{mkid} to read arguments (in this case, as is -typical, the list of filenames) from standard input. @xref{mkid options}. - -@item -The @samp{-Sman/text/gzip @dots{}} defines a new language @samp{man} in -terms of the @code{gzip} program and @code{mkid}'s existing text -scanner. @xref{Defining scanners with options}. - -@item -The @samp{-S.gz=man} tells @code{mkid} to treat all @file{.gz} files as -this new language @code{man}. @xref{Scanner option formats}. - -@end enumerate - -As a further complication, @code{cat} pages typically contain -underlining and backspace sequences, which will confuse @code{mkid}. To -handle this, the @code{gzip} command becomes a pipeline, like this: - -@example -mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - -@end example - - -@node Common query arguments -@chapter Common query arguments - -@cindex common query arguments - -Certain options, and regular expression syntax, are shared by the ID -query tools. So we describe those things in the sections below, instead -of repeating the description for each tool. - -@menu -* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. -* Patterns:: Regular expression syntax for searches. -* Examples: Query examples. Some common uses. -@end menu - - -@node Query options -@section Query options - -@cindex query options, common -@cindex common query options - -The ID query tools (@emph{not} @code{mkid}) share certain command line -options. Not all of these options are recognized by all programs, but -if an option is used by more than one program, it is described below. -The description of each program gives the options that program uses. - -@table @samp - -@item -f@var{idfile} -@opindex -f@var{idfile} -@cindex database name, specifying -@cindex parent directories, searched for ID -Read the database from @var{idfile}, in the current directory or in any -directory above the current directory. The default database name is -@file{ID}. Searching parent directories lets you have a single ID -database at the root of a large source tree and then use the query tools -from anywhere within that tree. - -@item -r@var{directory} -@opindex -r@var{directory} -Find files relative to @var{directory}, instead of the directory in -which the ID database was found. This is useful if the ID database was -moved after its creation. - -@item -c -@opindex -c -Equivalent to @code{-r`pwd`}, i.e., find files relative to the current -directory, instead of the directory in which the ID database was found. - -@item -e -@itemx -w -@opindex -e -@opindex -w -@cindex regular expressions, forcing evaluation as -@cindex strings, forcing evaluation as -@cindex constant strings, forcing evaluation as -@samp{-e} forces pattern arguments to be treated as regular expressions, -and @samp{-w} forces pattern arguments to be treated as constant -strings. By default, the query tools guess whether a pattern is regular -expressions or constant strings by looking for special characters. -@xref{Patterns}. - -@item -k -@itemx -g -@opindex -k -@opindex -g -@cindex brace notation in filename lists -@cindex shell brace notation in filename lists -@samp{-k} suppresses use of shell brace notation in the output. By -default, the query tools that generate lists of filenames attempt to -compress the lists using the usual shell brace notation, e.g., -@file{@{foo,bar@}.c} to mean @file{foo.c} and @file{bar.c}. (This is -useful if you use @code{ksh} or the original (not GNU) @code{sh} and -want to feed the list of names to another command, since those shells do -not support this brace notation; the name of the @code{-k} option comes -from the @code{k} in @code{ksh}). - -@samp{-g} turns on use of brace notation; this is only needed if the -query tools were compiled with @samp{-k} as the default behavior. - -@item -n -@opindex -n -@cindex suppressing matching identifier -Suppress the matching identifier before each list of filenames that the -query tools output by default. This is useful if you want a list of just -the names to feed to another command. - -@item -d -@itemx -o -@itemx -x -@itemx -a -@opindex -d -@opindex -o -@opindex -x -@opindex -a -@cindex radix of numeric matches, specifying -@cindex numeric matches, specifying radix of -These options may be used in any combination to specify the radix of -numeric matches. @samp{-d} allows matching on decimal numbers, -@samp{-o} on octal numbers, and @samp{-x} on hexadecimal numbers. The -@code{-a} option is equivalent to specifying all three; this is the -default. Any combination of these options may be used. - -@item -m -@opindex -m -@cindex multiple lines, merging -Merge multiple lines of output into a single line. If your query -matches more than one identifier, the default is to generate a separate -line of output for each matching identifier. - -@itemx -F- -@itemx -F@var{n} -@itemx -F-@var{m} -@itemx -F@var{n}-@var{m} -@opindex -F -@cindex single matches, showing -Show identifiers matching at least @var{n} and at most @var{m} times. -@samp{-F-} is equivalent to @samp{-F1}, i.e., find identifiers that -appear only once in the database. (This is useful to locate identifiers -that are defined but never used, or used once and never defined.) - -@item -u@var{number} -@opindex -u -@cindex conflicting identifiers, finding -List identifiers that conflict in the first @var{number} characters. -This could be in useful porting programs to brain-dead computers that -refuse to support long identifiers, but your best long term option is to -set such computers on fire. - -@end table - - -@node Patterns -@section Patterns - -@cindex patterns -@cindex regular expression syntax - -@dfn{Patterns}, also called @dfn{regular expressions}, allow you to -match many different identifiers in a single query. - -The same regular expression syntax is recognized by all the query tools -that handle regular expressions. The exact syntax depends on how the ID -tools were compiled, but the following constructs should always be -supported: - -@table @samp - -@item . -Match any single character. - -@item [@var{chars}] -Match any of the characters specified within the brackets. You can -match any characters @emph{except} the ones in brackets by typing -@samp{^} as the first character. A range of characters can be specified -using @samp{-}. For example, @samp{[abc]} and @samp{[a-c]} both match -@samp{a}, @samp{b}, or @samp{c}, and @samp{[^abc]} matches anything -@emph{except} @samp{a}, @samp{b}, or @samp{c}. - -@item * -Match the previous construct zero or more times. - -@item ^ -@itemx $ -@samp{^} (@samp{$}) at the beginning (end) of a pattern anchors the -match to the first (last) character of the identifier. - -@end table - -The query programs use either the @code{regex}/@code{regcmp} or -@code{re_comp}/@code{re_exec} functions, depending on which are -available in the library on your system. These do not always support -the exact same regular expression syntax, so consult your local -@code{man} pages to find out. - - -@node Query examples -@section Query examples - -@cindex examples, queries -@cindex query examples -Here are some examples of the options described in the previous -sections. - -To restrict searches to exact matches, use @samp{^@dots{}$}. For example: - -@example -prompt$ gid '^FILE$' -ansi2knr.c:144: @{ FILE *in, *out; -ansi2knr.c:315: FILE *out; -fid.c:38: FILE *id_FILE; -filenames.c:576: FILE * -@dots{} -@end example - -To show identifiers not unique in the first 16 characters: - -@example -prompt$ lid -u16 -RE_CONTEXT_INDEP_ANCHORS regex.c -RE_CONTEXT_INDEP_OPS regex.c -RE_SYNTAX_POSIX_BASIC regex.c -RE_SYNTAX_POSIX_EXTENDED regex.c -@dots{} -@end example - -@cindex numeric searches -Numbers are searched for numerically rather than textually. For example: - -@example -prompt$ lid 0xff -0377 @{lid,regex@}.c -0xff @{bitops,fid,lid,mkid@}.c -255 regex.c -@end example - -On the other hand, you can restrict a numeric search to a particular -radix if you want: - -@example -laurie$ lid -x 0xff -0xff @{bitops,fid,lid,mkid@}.c -@end example - -Filenames in the output are always adjusted to be correct for the -correct working directory. For example: - -@example -prompt$ lid bdevsw -bdevsw sys/conf.h cf/conf.c io/bio.c os/@{fio,main,prf,sys3@}.c -prompt$ cd io -prompt$ lid bdevsw -bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/@{fio,main,prf,sys3@}.c -@end example - - -@node gid invocation -@chapter @code{gid}: Listing matching lines - -Synopsis: - -@example -gid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}@dots{}] -@end example - -@code{gid} finds the identifiers in the database that match the -specified @var{pattern}s, then searches for all occurrences of those -identifiers, in only the files containing matches. In a large source -tree, this saves an enormous amount of time (compared to searching every -source file). - -With no @var{pattern} arguments, @code{gid} prints every line of every -source file. - -The name ``gid'' stands for ``grep for identifiers'', @code{grep} being -the standard utility to search regular files. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -@code{gid} uses the standard GNU output format for identifying source lines: - -@example -@var{filename}:@var{linenum}: @var{text} -@end example - -Here is an example: - -@example -prompt$ gid FILE -ansi2knr.c:144: @{ FILE *in, *out; -ansi2knr.c:315: FILE *out; -fid.c:38: FILE *id_FILE; -@dots{} -@end example - -@menu -* GNU Emacs gid interface:: Using next-error with gid. -@end menu - - -@node GNU Emacs gid interface -@section GNU Emacs @code{gid} interface - -@cindex Emacs interface to @code{gid} -@flindex gid.el @r{interface to Emacs} - -@vindex load-path -The @code{mkid} source distribution comes with a file @file{gid.el}, -which defines a GNU Emacs interface to @code{gid}. To install it, put -@file{gid.el} somewhere that Emacs will find it (i.e., in your -@code{load-path}) and put - -@example -(autoload 'gid "gid" nil t) -@end example - -@noindent in one of Emacs' initialization files, e.g., @file{~/.emacs}. -You will then be able to use @kbd{M-x gid} to run the command. - -@findex gid @r{Emacs function} -The @code{gid} function prompts you with the word around point. If you -want to search for something else, simply delete the line and type the -pattern of interest. - -@flindex *scratch* @r{Emacs buffer} -The function then runs the @code{gid} program in a @samp{*compilation*} -buffer, so the normal @code{next-error} function can be used to visit -all the places the identifier is found (@pxref{Compilation,,, emacs, The -GNU Emacs Manual}). - - -@node Looking up identifiers -@chapter Looking up identifiers - -These commands look up identifiers in the ID database and operate on the -files containing matches. - -@menu -* lid invocation:: Matching patterns. -* aid invocation:: Matching strings. -* eid invocation:: Invoking an editor on matches. -* fid invocation:: Listing a file's identifiers. -@end menu - - -@node lid invocation -@section @code{lid}: Matching patterns - -@pindex lid - -Synopsis: - -@example -lid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c -@var{pattern}@dots{} -@end example - -@code{lid} searches the database for identifiers matching the given -@var{pattern} arguments and prints the names of the files that match -each @var{pattern}. With no @var{pattern}s, @code{lid} lists every -entry in the database. - -The name ``lid'' stands for ``lookup identifier''. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -By default, each line of output consists of an identifier and all the -files containing that identifier. - -Here is an example showing a search for a single identifier (omitting -some output to keep lines short): - -@example -prompt$ lid FILE -FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c -@end example - -This example shows a regular expression search: - -@example -prompt$ lid 'FILE$' -AF_FILE mkid.c -AF_IDFILE mkid.c -FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c -IDFILE id.h @{fid,lid,mkid@}.c -IdFILE @{fid,lid@}.c -@dots{} -@end example - -@noindent As you can see, when a regular expression is used, it is -possible to get more than one line of output. To merge multiple lines -into one, use @samp{-m}: - -@example -prompt$ lid -m ^get -^get extern.h @{bitsvec,fid,gets0,getsFF,getscan,idx,lid,@dots{}@}.c -@end example - - -@node aid invocation -@section @code{aid}: Matching strings - -@pindex aid - -Synopsis: - -@example -aid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c -@var{string}@dots{} -@end example - -@cindex case-insensitive searching -@cindex string searching -@code{aid} searches the database for identifiers containing the given -@var{string} arguments. The search is case-insensitive. - -@flindex whatis -The name ``aid'' stands for ``apropos identifier'', @code{apropros} -being a command that does a similar search of the @code{whatis} database -of @code{man} descriptions. - -For example, @samp{aid get} matches the identifiers @code{fgets}, -@code{GETLINE}, and @code{getchar}. - -The default output format is the same as @code{lid}; see the previous -section. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - - -@node eid invocation -@section @code{eid}: Invoking an editor on matches - -@pindex eid - -Synopsis: - -@example -eid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}]@dots{} -@end example - -@code{eid} runs the usual search (@pxref{lid invocation}) on the given -arguments, shows you the output, and then asks: - -@example -Edit? [y1-9^S/nq] -@end example - -@noindent -You can respond with: - -@table @samp -@item y -Edit all files listed. - -@item 1@dots{}9 -Start editing at the @math{@var{n} + 1}'st file. - -@item /@var{string} @r{or} @kbd{CTRL-S}@var{string} -Start editing at the first filename containing @var{string}. - -@item n -Go on to the next @var{pattern}, i.e., edit nothing for this one. - -@item q -Quit @code{eid}. - -@end table - -@code{eid} invokes the editor defined by the @samp{EDITOR} environment -variable to edit a file. If this editor can accept an initial search -argument on the command line, @code{eid} can move automatically to the -location of the match, via the environment variables below. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -Here are the environment variables relevant to @code{eid}: - -@table @samp - -@item EDITOR -@vindex EDITOR -The name of the editor program to invoke. - -@item EIDARG -@vindex EIDARG -@cindex search for identifier, initial -The argument to pass to the editor to search for the matching -identifier. For @code{vi}, this should be @samp{+/%s/'}. - -@item EIDLDEL -@vindex EIDLDEL -@cindex left delimiter editor argument -@cindex beginning-of-word editor argument -A regular expression to force a match at the beginning of a word (``left -delimiter). @code{eid} inserts this in front of the matching identifier -when composing the search argument. For @code{vi}, this should be -@samp{\<}. - -@item EIDRDEL -@vindex EIDRDEL -@cindex right delimiter editor argument -@cindex end-of-word editor argument -The end-of-word regular expression. For @code{vi}, this should be -@samp{\>}. - -@end table - -For Emacs users, the interface in @code{gid.el} is probably preferable -to @code{eid}. @xref{GNU Emacs gid interface}. - - -Here is an example: - -@example -prompt$ eid FILE \^print -FILE @{ansi2knr,fid,filenames,idfile,idx,iid,lid,misc,@dots{}@}.c -Edit? [y1-9^S/nq] n -^print @{ansi2knr,fid,getopt,getopt1,iid,lid,mkid,regex,scanners@}.c -Edit? [y1-9^S/nq] 2 -@end example - -@noindent This will start editing at @file{getopt}.c. - - -@node fid invocation -@section @code{fid}: Listing a file's identifiers - -@pindex fid -@cindex identifiers in a file - -@code{fid} lists the identifiers found in a given file. Synopsis: - -@example -fid [-f@var{dbfile}] @var{file1} [@var{file2}] -@end example - -@table @samp - -@item -f@var{dbfile} -Read the database from @var{dbfile} instead of @file{ID}. - -@item @var{file1} -List all the identifiers contained in @var{file1}. - -@item @var{file2} -With a second file argument, list only the identifiers both files have -in common. - -@end table - -The output is simply one identifier (or number) per line. - - -@node pid invocation -@chapter @code{pid}: Looking up filenames - -@pindex pid -@cindex filenames, matching -@cindex matching filenames - -@code{pid} matches the filenames stored in the ID database, rather than -the identifiers. Synopsis: - -@example -pid [-f@var{dbfile}] [-r@var{dir}] [-ebkgnc] @var{wildcard}@dots{} -@end example - -By default, the @var{wildcard} patterns are treated as shell globbing -patterns, rather than the regular expressions the other utilities -accept. See the section below for details. - -Besides the standard options given in the synopsis (@pxref{Query -options}), @code{pid} accepts the following: - -@table @samp - -@item -e -@opindex -e -Do the usual regular expression matching (@pxref{Patterns}), instead -of shell wildcard matching. - -@item -b -@opindex -b -@cindex basename match -Match the basenames of the files in the database. For example, -@samp{pid -b foo} will match the stored filename @file{dir/foo}, but not -@file{foo/file}. - -@end table - -For example, the command: - -@example -pid \*.c -@end example - -@noindent lists all the @file{.c} files in the database. (The @samp{\} -here protects the @samp{*} from being expanded by the shell.) - -@menu -* Wildcard patterns:: Shell-style globbing patterns. -@end menu - - -@node Wildcard patterns -@section Wildcard patterns - -@cindex globbing patterns -@cindex shell wildcard patterns -@cindex wildcard wildcard patterns - -@code{pid} does simplified shell wildcard matching (unless the @samp{-e} -option is specified), rather than the regular expression matching done -by the other utilities. Here is a description of wildcard matching, -also called @dfn{globbing}: - -@itemize - -@item -@kindex * @r{in globbing} -@samp{*} matches zero or more characters. - -@item -@kindex ? @r{in globbing} -@samp{?} matches any single character. - -@item -@kindex \ @r{in globbing} -@samp{\} forces the next character to be taken literally. - -@item -@kindex [@dots{}] @r{in globbing} -@samp{[@var{chars}]} matches any single character listed in @var{chars}. - -@item -@kindex [!@dots{}] @r{in globbing} -@samp{[!@var{chars}]} matches any character @emph{not} listed in @var{chars}. - -@end itemize - -Most shells treat @samp{/} and leading @samp{.} characters -specially. @code{pid} does not do this. It simply matches the filename -in the database against the wildcard pattern. - - -@node iid invocation -@chapter @code{iid}: Complex interactive queries - -@pindex iid -@cindex interactive queries -@cindex complex queries - -@code{iid} is an interactive query utility for ID databases. It -operates by running another query program (@code{lid} by default, -@code{aid} if @samp{-a} is specified) and manipulating the sets of -filenames returned by these queries. - -@menu -* iid command line options:: Command-line options. -* iid query expressions:: Operands to the commands. -* iid commands:: Printing matching filenames, etc. -@end menu - - -@node iid command line options -@section @code{iid} command line options - -@cindex options for @code{iid} -@pindex iid @r{options} - -@code{iid} recognizes the following options (the standard query options -described in @ref{Query options} are inapplicable): - -@table @samp - -@item -a -@opindex -a -@pindex aid @r{used for @code{iid} searches} -Use @code{aid} for searches, instead of @code{lid}. - -@item -c@var{command} -@pindex -c -Execute @var{command} and exit, instead of prompting for interactive -commands. - -@item -H -@pindex -H -@cindex help for @code{iid} -Print a usage message and exit successfully. The @code{help} command -inside @code{iid} gives more information. @xref{iid commands}. - -@end table - - -@node iid query expressions -@section @code{iid} query expressions - -@cindex queries for @code{iid} -@pindex iid @r{query expressions} - -An @code{iid} @dfn{query expression} generates a set of filenames or -manipulates existing sets. These expressions are operands to some of -the @code{iid} commands (see the next section), not commands themselves. - -Here are the possible constructs, highest precedence first: - -@table @samp - -@item s@var{set-number} -Refer to a set previously created by a query operation. During each -@code{iid} session, every query generates a different set number, so -any previously generated set may be used as part of any new query by -reference to its set number. - -@item @var{pattern} -@code{iid} treats any non-keyword input (i.e., anything not in this -table) as an identifier to be searched for in the database. It is -passed to the search program (@code{lid} by default, @code{aid} if the -@code{-a} option was specified). The result of this operation is a set -of filenames, and it is assigned a unique set number. - -@item lid @var{identifier-list} -@cmindex lid @r{iid operator} -Invoke the @code{lid} program on @var{identifier-list} and construct a -new set from the result. - -@item aid @var{identifier-list} -@cmindex lid @r{iid operator} -Like @code{lid}, but use the @code{aid} program. - -@item match @var{wildcards} -@cmindex match @r{iid operator} -Invoke the @code{pid} program on @var{wildcards}, therefore matching on -the filenames in the database instead of the identifiers. The resulting -set contains the filenames that match the specified patterns. @xref{pid -invocation}. - -@item not @var{expr} -@cmindex not @r{iid operator} -The result is those filenames in the database that are not in -@var{expr}. - -@item @var{expr1} and @var{expr2} -@cmindex and @r{iid operator} -The result is the intersection of the sets @var{expr1} and @var{expr2}, -i.e., only those filenames contained in both. - -@item @var{expr1} or @var{expr2} -@cmindex or @r{iid operator} -The result is the union of the sets @var{expr1} and @var{expr2}, i.e., -all the filenames contained in either or both. - -@end table - -Operator names are recognized independent of case, so @code{AND}, -@code{and}, and @code{aNd} are all the same as far as @code{iid} is -concerned. - -To pass a keyword as an operand, you must enclose it in double quotes: -the command @samp{lid "lid"} generates the set of all filenames matching -the string @samp{lid}. - -Patterns containing shell metacharacters (such as @samp{*} or @samp{?}) -must also be properly quoted, since the query commands are run by -invoking them with the shell. - -@c Summary of query expression syntax: -@c -@c A @var{query} is: -@c @example -@c <set number> -@c <identifier> -@c lid <identifier list> -@c aid <identifier list> -@c match <wildcard list> -@c <query> or <query> -@c <query> and <query> -@c not <query> -@c ( <query> ) -@c @end example - - -@node iid commands -@section @code{iid} commands - -@cindex commands for @code{iid} -@pindex iid @r{commands} - -This section describes the interactive commands that @code{iid} -recognizes. The database query expressions you can pass to the -@samp{ss} and @samp{files} commands are described in the previous -section. - -Some commands output a @dfn{summary line} for sets. These lines show the -set number, the number of filenames in the set, and the command that -generated it. - -@table @samp - -@item ss @var{query} -@cmindex ss iid @r{command} -Build the set(s) of filenames resulting from the query expression -@var{query}. The output is a summary line for each set. - -@item files @var{query} -@itemx f @var{query} -@cmindex files iid @r{command} -@cmindex f iid @r{command} -Evaluate the query expression @var{query} as in @code{ss}, but output -the full list of matching filenames instead of a summary. - -@item sets -@cmindex sets iid @r{command} -Output a summary line for each extant set. - -@item show @var{set} -@itemx p @var{set} -@cmindex show iid @r{command} -@cmindex p iid @r{command} -@vindex PAGER -@pindex emacsclient -Pass the filename in the set number @var{set} to the program named in -the @code{PAGER} environment variable. Typically, this is a -page-at-a-time display program like @code{less} or @code{more}. If you -use Emacs, you might want to set @samp{PAGER} to @code{emacsclient} -(@pxref{Emacs Server,,, emacs, The GNU Emacs Manual}). - -@item @r{anything else} -@cindex shell commands in @code{iid} -When @code{iid} does not recognize the first word on an input line as a -builtin @code{iid} command, it assumes the input is a shell command -which will write a list of filenames to standard output, which it -gathers into a set as usual. - -Any set numbers that appear in the input are expanded into the lists of -filenames they represent prior to running the command. - -@item !@var{shell-command} -@cmindex ! iid @r{command} -@cindex shell escape -Expand set numbers appear in @var{shell-command} into the filenames they -represent, and pass the result to @file{/bin/sh}. The output is not -interpreted. - -@item begin @var{directory} -@itemx b @var{directory} -@cmindex begin iid @r{command} -@cmindex b iid @r{command} -Begin a new @code{iid} session in a different directory (which -presumably contains a different database). It deletes all the sets -created so far and switches to the specified directory. It is -equivalent to exiting @code{iid}, changing directories in the shell, and -running @code{iid} again. - -@item help -@itemx h -@itemx ? -@cmindex help iid @r{command} -@cmindex h iid @r{command} -@cmindex ? iid @r{command} -Display a short help file using the program named in @samp{PAGER}. - -@item quit -@itemx q -@itemx off -@cmindex quit iid @r{command} -@cmindex q iid @r{command} -@cmindex off iid @r{command} -Quit @code{iid}. An end-of-file character (usually @kbd{CTRL-D}) also exits. - -@end table - - -@node Index -@unnumbered Index - -@printindex cp - -@contents -@bye diff --git a/idarg.h b/idarg.h deleted file mode 100644 index 7570ebd..0000000 --- a/idarg.h +++ /dev/null @@ -1,33 +0,0 @@ -/* idarg.h -- defs for internal form of command-line arguments - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _idarg_h_ -#define _idarg_h_ - -struct idarg -{ - struct idarg *ida_next; - char *ida_arg; - int ida_index; - char ida_flags; -#define IDA_RELATIVE 0x01 /* file name is now relative (lid) */ -#define IDA_SCAN_ME 0x01 /* file should be scanned (mkid) */ -#define IDA_PREFIX_US 0x02 /* file has names with prefixed underscores */ -}; - -#endif /* not _idarg_h_ */ diff --git a/idfile.c b/idfile.c deleted file mode 100644 index f244a0f..0000000 --- a/idfile.c +++ /dev/null @@ -1,246 +0,0 @@ -/* idfile.c -- read & write mkid database file header - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <string.h> - -#include <config.h> -#include "alloc.h" -#include "idfile.h" -#include "strxtra.h" - -typedef int (*iof_t) __P((FILE *, void *, unsigned int, int)); -static int io_idhead __P((FILE *fp, iof_t iof, struct idhead *idh)); -static int io_write __P((FILE *output_FILE, void *addr, unsigned int size, int is_int)); -static int io_read __P((FILE *input_FILE, void *addr, unsigned int size, int is_int)); -static int io_size __P((FILE *, void *, unsigned int size, int)); - -extern char *program_name; - -/* init_id_file opens the ID file, reads header fields into idh, - verifies the magic number and version, and reads the constituent - file names. Any errors are considered fatal and cause an exit. */ - -FILE * -init_id_file (char const *id_file_name, struct idhead *idh) -{ - FILE *id_FILE = maybe_init_id_file (id_file_name, idh); - if (id_FILE) - return id_FILE; - error (1, errno, "Can't open `%s'", id_file_name); - return NULL; -} - -/* maybe_init_id_file does everything that init_id_file does, but is - tolerant of errors opening the ID file, returning NULL in this case - (this is called from mkid where an ID might or might not already - exist). All other errors are considered fatal. */ - -FILE * -maybe_init_id_file (char const *id_file_name, struct idhead *idh) -{ - FILE *id_FILE; - unsigned int i; - char *strings; - struct idarg *ida; - - id_FILE = fopen (id_file_name, "r"); - if (id_FILE == NULL) - return NULL; - - read_idhead (id_FILE, idh); - if (idh->idh_magic[0] != IDH_MAGIC_0 || idh->idh_magic[1] != IDH_MAGIC_1) - error (1, 0, "`%s' is not an ID file! (bad magic #)", id_file_name); - if (idh->idh_version != IDH_VERSION) - error (1, 0, "`%s' is version %d, but I only grok version %d", - id_file_name, idh->idh_version, IDH_VERSION); - - fseek (id_FILE, idh->idh_args_offset, 0); - /* NEEDSWORK */ - fseek (id_FILE, idh->idh_files_offset, 0); - - i = idh->idh_tokens_offset - idh->idh_args_offset; - strings = malloc (i); - fread (strings, i, 1, id_FILE); - ida = *id_args = CALLOC (struct idarg, idh->idh_files); - for (i = 0; i < idh->idh_files; i++) - { - while (*strings == '+' || *strings == '-') - { - while (*strings++) - ; - } - ida->ida_flags = 0; - ida->ida_arg = strings; - ida->ida_next = ida + 1; - ida->ida_index = i; - ida++; - while (*strings++) - ; - } - (--ida)->ida_next = NULL; - return id_FILE; -} - - -unsigned long -file_link_hash_1 (void const *key) -{ - unsigned long result = 0; - ADDRESS_HASH_1 (((struct file_link const *) key)->fl_parent, result); - STRING_HASH_1 (((struct file_link const *) key)->fl_name, result); - return result; -} - -unsigned long -file_link_hash_2 (void const *key) -{ - unsigned long result = 0; - ADDRESS_HASH_2 (((struct file_link const *) key)->fl_parent, result); - STRING_HASH_2 (((struct file_link const *) key)->fl_name, result); - return result; -} - -int -file_link_hash_cmp (void const *x, void const *y) -{ - int result; - ADDRESS_CMP (((struct file_link const *) x)->fl_parent, - ((struct file_link const *) y)->fl_parent, result); - if (result) - return result; - STRING_CMP (((struct file_link const *) x)->fl_name, - ((struct file_link const *) y)->fl_name, result); - return result; -} - - -int -read_idhead (FILE *input_FILE, struct idhead *idh) -{ - return io_idhead (input_FILE, io_read, idh); -} - -int -write_idhead (FILE *input_FILE, struct idhead *idh) -{ - return io_idhead (input_FILE, io_write, idh); -} - -int -sizeof_idhead () -{ - return io_idhead (0, io_size, 0); -} - -static int -io_size (FILE *ignore_FILE, void *ignore_addr, unsigned int size, int ignore_int) -{ - return size; -} - -static int -io_read (FILE *input_FILE, void *addr, unsigned int size, int is_int) -{ - if (is_int) - { - switch (size) - { - case 4: /* This must be a literal 4. Don't use sizeof (unsigned long)! */ - *(unsigned long *)addr = getc (input_FILE); - *(unsigned long *)addr += getc (input_FILE) << 010; - *(unsigned long *)addr += getc (input_FILE) << 020; - *(unsigned long *)addr += getc (input_FILE) << 030; - break; - case 2: - *(unsigned short *)addr = getc (input_FILE); - *(unsigned short *)addr += getc (input_FILE) << 010; - break; - case 1: - *(unsigned char *)addr = getc (input_FILE); - break; - default: - fprintf (stderr, "Unsupported size in io_write (): %d\n", size); - abort (); - } - } - else if (size > 1) - fread (addr, size, 1, input_FILE); - else - *(char *)addr = getc (input_FILE); - return size; -} - -static int -io_write (FILE *output_FILE, void *addr, unsigned int size, int is_int) -{ - if (is_int) - { - switch (size) - { - case 4: /* This must be a literal 4. Don't use sizeof (unsigned long)! */ - putc (*(unsigned long *)addr, output_FILE); - putc (*(unsigned long *)addr >> 010, output_FILE); - putc (*(unsigned long *)addr >> 020, output_FILE); - putc (*(unsigned long *)addr >> 030, output_FILE); - break; - case 2: - putc (*(unsigned short *)addr, output_FILE); - putc (*(unsigned short *)addr >> 010, output_FILE); - break; - case 1: - putc (*(unsigned char *)addr, output_FILE); - break; - default: - fprintf (stderr, "Unsupported size in io_write (): %d\n", size); - abort (); - } - } - else if (size > 1) - fwrite (addr, size, 1, output_FILE); - else - putc (*(char *)addr, output_FILE); - return size; -} - -/* The sizes of the fields must be hard-coded. They aren't - necessarily the sizes of the struct members, because some - architectures don't have any way to declare 4-byte integers - (e.g., Cray) */ - -static int -io_idhead (FILE *fp, iof_t iof, struct idhead *idh) -{ - unsigned int size = 0; - unsigned char pad = 0; - if (fp) - fseek (fp, 0L, 0); - size += iof (fp, idh->idh_magic, 2, 0); - size += iof (fp, &pad, 1, 0); - size += iof (fp, &idh->idh_version, 1, 0); - size += iof (fp, &idh->idh_flags, 2, 1); - size += iof (fp, &idh->idh_links, 4, 1); - size += iof (fp, &idh->idh_files, 4, 1); - size += iof (fp, &idh->idh_tokens, 4, 1); - size += iof (fp, &idh->idh_buf_size, 4, 1); - size += iof (fp, &idh->idh_vec_size, 4, 1); - size += iof (fp, &idh->idh_args_offset, 4, 1); - size += iof (fp, &idh->idh_tokens_offset, 4, 1); - size += iof (fp, &idh->idh_end_offset, 4, 1); - return size; -} diff --git a/idfile.h b/idfile.h deleted file mode 100644 index be5b00e..0000000 --- a/idfile.h +++ /dev/null @@ -1,102 +0,0 @@ -/* idfile.h -- decls for ID file header and constituent file names - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _idfile_h_ -#define _idfile_h_ 1 - -#include <sys/types.h> -#include <stdio.h> -#include "hash.h" - -#define IDFILE "ID" - -struct idhead -{ - unsigned char idh_magic[2]; -#define IDH_MAGIC_0 ('I'|0x80) -#define IDH_MAGIC_1 ('D'|0x80) - unsigned char idh_version; -#define IDH_VERSION 3 - unsigned short idh_flags; -#define IDH_COUNTS 0x0001 /* include occurrence counts for each token */ -#define IDH_FOLLOW_SL 0x0002 /* follow symlinks to directories */ -#define IDH_COMMENTS 0x0004 /* include tokens found in comments */ -#define IDH_LOCALS 0x0008 /* include names of formal params & local vars */ -#define IDH_DECL_DEFN_USE 0x0100 /* include decl/defn/use info */ -#define IDH_L_R_VALUE 0x0200 /* include lvalue/rvalue info */ -#define IDH_CALL_ER_EE 0x0400 /* include caller/callee relationship info */ - unsigned long idh_links; /* total # of file name components */ - unsigned long idh_files; /* total # of constituent source files */ - unsigned long idh_tokens; /* total # of constituent tokens */ - /* idh_*_size: max buffer-sizes for ID file reading programs */ - unsigned long idh_buf_size; /* # of bytes in longest entry */ - unsigned long idh_vec_size; /* # of hits in longest entry */ - unsigned long idh_path_size; /* # of bytes in longest file name path */ - /* idh_*_offset: ID file offsets for start of various sections */ - long idh_args_offset; /* command-line options section */ - long idh_files_offset; /* constituent file & directory names section */ - long idh_tokens_offset; /* constituent tokens section */ - long idh_end_offset; /* end of tokens section */ - /* */ - struct hash_table ia_link_table; /* all file and dir name name links */ - struct arg_file **ia_file_order; /* sequence in ID file */ - struct arg_file **ia_scan_order; /* sequence in summaries */ -}; - -struct file_link -{ - struct file_link *fl_parent; - unsigned char fl_flags; -#define FL_IS_ARG 0x01 /* is an explicit command-line argument */ -#define FL_SYM_LINK 0x02 /* is a symlink (only used for dirs) */ -#define FL_TYPE_MASK 0x10 -# define FL_TYPE_DIR 0x00 -# define FL_TYPE_FILE 0x10 - char fl_name[1]; -}; - -struct arg_file -{ - struct file_link *af_name; - short af_old_index; /* order in extant ID file */ - short af_new_index; /* order in new ID file */ - short af_scan_index; /* order of scanning in summary */ -}; - -#if HAVE_LINK - -/* If the system supports filesystem links (e.g., any UN*X variant), - we should detect file name aliases. */ - -struct dev_ino -{ - dev_t di_dev; - ino_t di_ino; - struct file_link *di_file_link; -}; - -extern struct hash_table dev_ino_table; - -#endif - -FILE *init_id_file __P((char const *id_file, struct idhead *idh)); -int read_idhead __P((FILE *input_FILE, struct idhead *idh)); -int write_idhead __P((FILE *input_FILE, struct idhead *idh)); -int sizeof_idhead __P((void)); - -#endif /* not _idfile_h_ */ @@ -1,95 +0,0 @@ -/* static char copyright[] = "@(#)Copyright (c) 1986, Greg McGary"; - static char sccsid[] = "@(#)idx.c 1.2 86/10/17"; */ - -#include <stdio.h> -#include <string.h> - -#include <config.h> -#include "misc.h" -#include "filenames.h" -#include "scanners.h" - -void idxtract __P((char *path)); - -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "\ -Usage: %s [(+|-)S<scanarg>] files\n\ - -S<lang>-<arg> Pass arg to <lang> scanner\n\ - -S.<suffix>=<lang> Scan files with .<suffix> as <lang>\n\ - -S<lang>? Print usage documentation for <lang>\n", - program_name); - - exit (1); -} - -int -main (int argc, char **argv) -{ - char *arg; - int op; - - program_name = basename ((argc--, *argv++)); - - init_scanners (); - - while (argc) - { - arg = (argc--, *argv++); - switch (op = *arg++) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - switch (*arg++) - { - case 'S': - set_scan_args (op, arg); - break; - default: - usage (); - } - } -argsdone: - - if (argc == 0) - usage (); - - while (argc) - idxtract ((argc--, *argv++)); - - return 0; -} - -void -idxtract (char *file_name) -{ - char const *key; - FILE *source_FILE; - int flags; - char const *suffix; - char const *filter; - char const *lang_name; - get_token_t scanner; - - suffix = strrchr (file_name, '.'); - lang_name = get_lang_name (suffix); - scanner = get_scanner (lang_name); - if (scanner == NULL) - return; - source_FILE = open_source_FILE (file_name, filter = get_filter (suffix)); - if (source_FILE == NULL) - return; - - while ((key = (*scanner) (source_FILE, &flags)) != NULL) - puts (key); - - close_source_FILE (source_FILE, filter); -} @@ -1,235 +0,0 @@ -.TH IID 1 -.SH NAME -iid \- interactive query for ID database -.SH SYNOPSIS -.PP -.B iid -.RB [ \-a] -.RB [ \-c \^command] -.RB [ \-H] -.SH DESCRIPTION -This command provides an interactive query interface to the -.I ID -database. -.I Iid\^ -allows you to query an -.I ID -database in a fashion similar to using \fIDIALOG\fP. Any individual -query command results in a list of files that satisfy that query, -each set of files is retained by -.I iid -and assigned a set number. The sets may be combined with -.IR AND , -.I OR -and -.I NOT -operators to produce additional sets. The primitive operators that -produce sets are invocations of the -.I lid -or -.I aid -programs. -.SH OPTIONS -Normally -.I iid -runs interactively. Options may be used to run it in batch mode. -.TP 8 -.B \-a -Use the -.I aid -program as the default query program, normally -.I lid -is used. -.TP 8 -.B \-c -Accept a single command as an argument, run that command, and exit -.IR Iid . -.TP -.B \-H -Print a brief help message and exit. -.SH SUBCOMMANDS -The subcommands are used to carry on a dialog with -.I iid -after invoking the program. -.PP -Two basic query commands are available: -.B SS -and -.BR FILES . -The -.B SS -command shows the sets generated by a query, but does not display -the actual file names that satisfy the query. -The -.B FILES -command only displays the list of files, it does not show any -of the sets created during the query. -.PP -Queries consist of keywords and identifier strings. The keywords are: -.B and or not lid aid match -and -.B s<number> -where -.B s<number> -is a set number consisting of the letter -.B s -followed (with no space) by a decimal set number. -A clause of the form -.B lid <identifier list> -invokes -.I lid -with the -.B <identifier list> -as arguments and produces a set of files as a result. -Substituting -.B aid -for -.B lid -runs the -.I aid -program to generate the list of files. -As a shorthand notation for -.B lid <identifier> -you may simply use -.B <identifier>. -The -.B match -operator runs the standard system -.I ls -utility to produce a set of files. This allows sets to be -constructed based on the names of files (using wild cards) -rather than contents. -The -.B and or -and -.B not -operators can be used to combine sets in the obvious fashion. -If you need to pass any of the keywords as actual arguments to -programs, or if the search strings contain any shell escape -characters place the argument in quotes. -.PP -The -.B NOT -operator has highest precedence, followed by -.B AND -and -.B OR -in that order. Parenthesis may be used for grouping. -.PP -The remaining commands are: -.PP -.B BEGIN <directory> -accepts a directory name and switches to that directory. By changing -directories you control which -.I ID -database is searched. Changing directories automatically deletes -all the sets constructed so far. The -.B BEGIN -command may be abbreviated as -.BR B . -.PP -.B SETS -shows the description of all the sets created so far. Each set -description has the set number, the number of files in the set, -and a symbolic description of the query that created the set. -.PP -.B SHOW <set number> -runs a pager program, passing as arguments all the files in -the specified set. The pager program comes from the -.B $PAGER -environment variable. This command may be abbreviated -.BR P . -.PP -.B HELP -runs the pager on the help file. The commands -.B H -and -.B ? -also act as help commands. -.PP -.B OFF -exits the program. -.B Q -is short for -.BR OFF . -.PP -All commands and keywords are case insensitive, so that -.B SHOW ShOW -and -.B show -all work equally well. -.SH INTERFACE -Two forms of commands are provided for interface with arbitrary -programs. Any command that is not recognized as one -of the above built in -.I iid -commands, is assumed to be a program which, when run, will print -a list of file names. -.I Iid -runs the command as typed, and records the output as a new set -which may be combined with other sets in subsequent queries. -.PP -If the command starts with a -.BR !, -.I iid -strips off the leading -.B ! -and simply runs the command. Any output goes to stdout and -is not recorded as a set. -.PP -In both types of shell commands, any set numbers specified as -arguments are expanded into a list of file names before running -the command. -.SH EXAMPLE -.nf -.ft L -===> iid -iid> ss lid "^get" or lid "Arg$" - S0 14 lid -kmn "^get" - S1 3 lid -kmn "Arg$" - S2 15 (lid -kmn "^get") OR (lid -kmn "Arg$") -iid> f s1 -lid.c -paths.c -init.c -iid> off -.FT P -.fi -.EX off -.PP -In this example the -.B ss -command displays the sets it creates as it -does the parts of the query. In this case 3 sets are created, set S0 -has 14 files in it, set S1 has 3 files and the union of the two sets, -S2, has 15 files. A description of the query that created any given -set is kept along with the set and displayed when sets are printed. -.PP -The -.B f s1 -command lists the three files in set S1. -.PP -The -.B off -command terminates the example session. -.SH HINTS -The shell interface commands can be used to generate file sets by -running the -.I find -or -.I ls -utilities, or compiles of a selected group of files can be done -using the -.BR ! cc -command with a set number as the argument. -.BR ! lp -can be used to print a selected group of files. -.PP -This program interfaces nicely with -.I emacs -if you run the server program and specify the client program -as your $PAGER. -.SH SEE ALSO -mkid(1), -lid(1), -aid(1). @@ -1,2329 +0,0 @@ - -/* A Bison parser, made from ./iid.y with Bison version GNU Bison version 1.22 - */ - -#define YYBISON 1 /* Identify Bison output. */ - -#define SET 258 -#define ID 259 -#define SHELL_QUERY 260 -#define SHELL_COMMAND 261 -#define LID 262 -#define AID 263 -#define BEGIN 264 -#define SETS 265 -#define SS 266 -#define FILES 267 -#define SHOW 268 -#define HELP 269 -#define OFF 270 -#define MATCH 271 -#define OR 272 -#define AND 273 -#define NOT 274 - -#line 1 "./iid.y" - -/* iid.y -- interactive mkid query language - Copyright (C) 1991 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> -#include <getopt.h> - -#include <config.h> -#include "strxtra.h" -#include "obstack.h" -#include "xmalloc.h" - -FILE *popen (); - -#define obstack_chunk_alloc xmalloc -#define obstack_chunk_free free - -#if HAVE_ALLOCA - -#if HAVE_ALLOCA_H -#include <alloca.h> -#endif -#define TEMP_ALLOC(s) alloca(s) -#define TEMP_FREE(s) - -#else /* not HAVE_ALLOCA */ - -#define TEMP_ALLOC(s) malloc(s) -#define TEMP_FREE(s) free(s) - -#endif /* not HAVE_ALLOCA */ - -#define HASH_SIZE 947 /* size of hash table for file names */ -#define INIT_FILES 8000 /* start with bits for this many */ -#define INIT_SETSPACE 500 /* start with room for this many */ -#define MAXCMD 1024 /* input command buffer size */ - -#define MAX(a,b) (((a)<(b))?(b):(a)) -#define MIN(a,b) (((a)>(b))?(b):(a)) - -#ifndef PAGER -#define PAGER "pg" -#endif - -#define PROMPT "iid> " - -/* set_type is the struct defining a set of file names - * The file names are stored in a symbol table and assigned - * unique numbers. The set is a bit set of file numbers. - * One of these set structs is calloced for each new set - * constructed, the size allocated depends on the max file - * bit number. An array of pointers to sets are kept to - * represent the complete set of sets. - */ - -struct set_struct { - char * set_desc ; /* string describing the set */ - int set_num ; /* the set number */ - int set_size ; /* number of long words in set */ - unsigned long int set_tail ; /* set extended with these bits */ - unsigned long int set_data[1] ;/* the actual set data (calloced) */ -} ; -typedef struct set_struct set_type ; - -/* id_type is one element of an id_list - */ - -struct id_struct { - struct id_struct * next_id ; /* Linked list of IDs */ - char id [ 1 ] ; /* calloced data holding id string */ -} ; -typedef struct id_struct id_type ; - -/* id_list_type is used during parsing to build lists of - * identifiers that will eventually represent arguments - * to be passed to the database query programs. - */ - -struct id_list_struct { - int id_count ; /* count of IDs in the list */ - id_type * * end_ptr_ptr ;/* pointer to link word at end of list */ - id_type * id_list ; /* pointer to list of IDs */ -} ; -typedef struct id_list_struct id_list_type ; - -/* symtab_type is used to record file names in the symbol table. - */ -struct symtab_struct { - struct symtab_struct * hash_link ; /* list of files with same hash code */ - int mask_word ; /* word in bit vector */ - unsigned long mask_bit ; /* bit in word */ - char name [ 1 ] ; /* the file name */ -} ; -typedef struct symtab_struct symtab_type ; - -/* LidCommand is the command to run for a Lid_group. It is set - * to "lid -kmn" if explicitly preceeded by "lid", otherwise - * it is the default command which is determined by an option. - */ -char const * LidCommand ; - -/* DefaultCommand is the default command for a Lid_group. If - * the -a option is given to iid, it is set to use 'aid'. - */ -char const * DefaultCommand = "lid -kmn" ; - -/* FileList is a lexically ordered list of file symbol table - * pointers. It is dynamically expanded when necessary. - */ -symtab_type * * FileList = NULL ; - -/* FileSpace is the number of long ints in TheFiles array. - */ -int FileSpace = 0 ; - -/* HashTable is the symbol table used to store file names. Each - * new name installed is assigned the next consecutive file number. - */ -symtab_type * HashTable [ HASH_SIZE ] ; - -/* HelpSet is a dummy set containing only one bit set which corresponds - * to the help file name. Simply a cheesy way to maximize sharing of - * the code that runs the pager. - */ -set_type * HelpSet ; - -/* high_bit is a unsigned long with the most significant bit set. - */ -unsigned long high_bit ; - -/* ListSpace is the amount of space avail in the FileList. - */ -int ListSpace = 0 ; - -/* MaxCurFile - max word that has any bit currently set in the - * TheFiles array. - */ -int MaxCurFile = 0 ; - -/* NextFileNum is the file number that will be assigned to the next - * new file name seen when it is installed in the symtab. - */ -int NextFileNum = 0 ; - -/* NextMaskBit is the bit within the next mask word that will - * correspond to the next file added to the symbol table. - */ -unsigned long NextMaskBit ; - -/* NextMaskWord is the next word number to be assigned to a file - * bit mask entry. - */ -int NextMaskWord = 0 ; - -/* NextSetNum is the number that will be assigned to the next set - * created. Starts at 0 because I am a C programmer. - */ -int NextSetNum = 0 ; - -/* The PAGER program to run on a SHOW command. - */ -char Pager[MAXCMD] ; - -/* Prompt - the string to use for a prompt. - */ -char Prompt[MAXCMD] ; - -/* SetSpace is the number of pointers available in TheSets. TheSets - * is realloced when we run out of space. - */ -int SetSpace = 0 ; - -/* TheFiles is a bit set used to construct the initial set of files - * generated while running one of the subprograms. It is copied to - * the alloced set once we know how many bits are set. - */ -unsigned long * TheFiles = NULL ; - -/* TheSets is a dynamically allocated array of pointers pointing - * the sets that have been allocated. It represents the set of - * sets. - */ -set_type * * TheSets = NULL ; - -/* VerboseQuery controls the actions of the semantic routines during - * the process of a query. If TRUE the sets are described as they - * are constructed. - */ -int VerboseQuery ; - -char const *program_name ; - -int yyerror __P(( char const * s )) ; -void ScanInit __P(( char * line )) ; -int yylex __P(( void )) ; -int ArgListSize __P(( id_list_type * idlp )) ; -int SetListSize __P(( set_type * sp )) ; -void FlushFiles __P(( void )) ; -void fatal __P(( char const * s )) ; -int CountBits __P(( set_type * sp )) ; -void OneDescription __P(( set_type * sp )) ; -void DescribeSets __P(( void )) ; -id_list_type * SetList __P(( id_list_type * idlp , set_type * sp )) ; -void PrintSet __P(( set_type * sp )) ; -void FlushSets __P(( void )) ; -id_list_type * InitList __P(( void )) ; -id_list_type * ExtendList __P(( id_list_type * idlp , id_type * idp )) ; -void InitIid __P(( void )) ; -symtab_type * InstallFile __P(( char const * fp )) ; -void RunPager __P(( char * pp , set_type * sp )) ; -void AddSet __P(( set_type * sp )) ; -set_type * RunProg __P(( char const * pp , id_list_type * idlp )) ; -void SetDirectory __P(( id_type * dir )) ; -set_type * SetIntersect __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetUnion __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetInverse __P(( set_type * sp )) ; -void RunShell __P(( char * pp , id_list_type * idlp )) ; - - -#line 240 "./iid.y" -typedef union { - set_type * setdef ; - id_type * strdef ; - id_list_type * listdef ; -} YYSTYPE; - -#ifndef YYLTYPE -typedef - struct yyltype - { - int timestamp; - int first_line; - int first_column; - int last_line; - int last_column; - char *text; - } - yyltype; - -#define YYLTYPE yyltype -#endif - -#include <stdio.h> - -#ifndef __cplusplus -#ifndef __STDC__ -#define const -#endif -#endif - - - -#define YYFINAL 46 -#define YYFLAG -32768 -#define YYNTBASE 22 - -#define YYTRANSLATE(x) ((unsigned)(x) <= 274 ? yytranslate[x] : 31) - -static const char yytranslate[] = { 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, - 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19 -}; - -#if YYDEBUG != 0 -static const short yyprhs[] = { 0, - 0, 3, 6, 9, 12, 14, 16, 18, 21, 24, - 26, 28, 30, 34, 38, 41, 43, 45, 47, 50, - 54, 56, 59, 62, 64, 66, 69, 72, 74 -}; - -static const short yyrhs[] = { 9, - 4, 0, 23, 25, 0, 24, 25, 0, 13, 3, - 0, 10, 0, 14, 0, 15, 0, 5, 29, 0, - 6, 29, 0, 11, 0, 12, 0, 26, 0, 25, - 18, 25, 0, 25, 17, 25, 0, 19, 25, 0, - 3, 0, 27, 0, 28, 0, 16, 30, 0, 20, - 25, 21, 0, 4, 0, 7, 30, 0, 8, 30, - 0, 4, 0, 3, 0, 29, 4, 0, 29, 3, - 0, 4, 0, 30, 4, 0 -}; - -#endif - -#if YYDEBUG != 0 -static const short yyrline[] = { 0, - 266, 274, 275, 281, 287, 293, 299, 303, 310, 319, - 328, 337, 344, 353, 362, 373, 380, 389, 398, 406, - 414, 423, 432, 441, 449, 456, 462, 470, 478 -}; - -static const char * const yytname[] = { "$","error","$illegal.","SET","ID", -"SHELL_QUERY","SHELL_COMMAND","LID","AID","BEGIN","SETS","SS","FILES","SHOW", -"HELP","OFF","MATCH","OR","AND","NOT","'('","')'","Command","Set_query","File_query", -"Query","Primitive","Lid_group","Aid_group","Command_list","Id_list","" -}; -#endif - -static const short yyr1[] = { 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, - 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, - 27, 27, 28, 29, 29, 29, 29, 30, 30 -}; - -static const short yyr2[] = { 0, - 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, - 1, 1, 3, 3, 2, 1, 1, 1, 2, 3, - 1, 2, 2, 1, 1, 2, 2, 1, 2 -}; - -static const short yydefact[] = { 0, - 0, 0, 0, 5, 10, 11, 0, 6, 7, 0, - 0, 25, 24, 8, 9, 1, 4, 16, 21, 0, - 0, 0, 0, 0, 2, 12, 17, 18, 3, 27, - 26, 28, 22, 23, 19, 15, 0, 0, 0, 29, - 20, 14, 13, 0, 0, 0 -}; - -static const short yydefgoto[] = { 44, - 10, 11, 25, 26, 27, 28, 14, 33 -}; - -static const short yypact[] = { 10, - 5, 5, 22,-32768,-32768,-32768, 28,-32768,-32768, -2, - -2,-32768,-32768, 7, 7,-32768,-32768,-32768,-32768, 30, - 30, 30, -2, -2, 12,-32768,-32768,-32768, 12,-32768, --32768,-32768, 31, 31, 31,-32768, -14, -2, -2,-32768, --32768, 18,-32768, 37, 38,-32768 -}; - -static const short yypgoto[] = {-32768, --32768,-32768, -11,-32768,-32768,-32768, 39, 11 -}; - - -#define YYLAST 41 - - -static const short yytable[] = { 29, - 18, 19, 38, 39, 20, 21, 41, 12, 13, 30, - 31, 36, 37, 22, 1, 2, 23, 24, 3, 4, - 5, 6, 7, 8, 9, 16, 42, 43, 38, 39, - 17, 34, 35, 32, 40, 39, 45, 46, 0, 0, - 15 -}; - -static const short yycheck[] = { 11, - 3, 4, 17, 18, 7, 8, 21, 3, 4, 3, - 4, 23, 24, 16, 5, 6, 19, 20, 9, 10, - 11, 12, 13, 14, 15, 4, 38, 39, 17, 18, - 3, 21, 22, 4, 4, 18, 0, 0, -1, -1, - 2 -}; -/* -*-C-*- Note some compilers choke on comments on `#line' lines. */ -#line 3 "/usr/lib/bison.simple" - -/* Skeleton output parser for bison, - Copyright (C) 1984, 1989, 1990 Bob Corbett and Richard Stallman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - - -#ifndef alloca -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not GNU C. */ -#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) -#include <alloca.h> -#else /* not sparc */ -#if defined (MSDOS) && !defined (__TURBOC__) -#include <malloc.h> -#else /* not MSDOS, or __TURBOC__ */ -#if defined(_AIX) -#include <malloc.h> - #pragma alloca -#else /* not MSDOS, __TURBOC__, or _AIX */ -#ifdef __hpux -#ifdef __cplusplus -extern "C" { -void *alloca (unsigned int); -}; -#else /* not __cplusplus */ -void *alloca (); -#endif /* not __cplusplus */ -#endif /* __hpux */ -#endif /* not _AIX */ -#endif /* not MSDOS, or __TURBOC__ */ -#endif /* not sparc. */ -#endif /* not GNU C. */ -#endif /* alloca not defined. */ - -/* This is the parser code that is written into each bison parser - when the %semantic_parser declaration is not specified in the grammar. - It was written by Richard Stallman by simplifying the hairy parser - used when %semantic_parser is specified. */ - -/* Note: there must be only one dollar sign in this file. - It is replaced by the list of actions, each action - as one case of the switch. */ - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY -2 -#define YYEOF 0 -#define YYACCEPT return(0) -#define YYABORT return(1) -#define YYERROR goto yyerrlab1 -/* Like YYERROR except do call yyerror. - This remains here temporarily to ease the - transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ -#define YYFAIL goto yyerrlab -#define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(token, value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { yychar = (token), yylval = (value); \ - yychar1 = YYTRANSLATE (yychar); \ - YYPOPSTACK; \ - goto yybackup; \ - } \ - else \ - { yyerror ("syntax error: cannot back up"); YYERROR; } \ -while (0) - -#define YYTERROR 1 -#define YYERRCODE 256 - -#ifndef YYLEX -#ifndef YYPURE -#define YYLEX yylex() -#else -#ifdef YYLSP_NEEDED -#define YYLEX yylex(&yylval, &yylloc) -#else -#define YYLEX yylex(&yylval) -#endif -#endif -#endif - -/* If nonreentrant, generate the variables here */ - -#ifndef YYPURE - -int yychar; /* the lookahead symbol */ -YYSTYPE yylval; /* the semantic value of the */ - /* lookahead symbol */ - -#ifdef YYLSP_NEEDED -YYLTYPE yylloc; /* location data for the lookahead */ - /* symbol */ -#endif - -int yynerrs; /* number of parse errors so far */ -#endif /* not YYPURE */ - -#if YYDEBUG != 0 -int yydebug; /* nonzero means print parse trace */ -/* Since this is uninitialized, it does not stop multiple parsers - from coexisting. */ -int yydebug_reducing = 0; -#endif - -/* YYINITDEPTH indicates the initial size of the parser's stacks */ - -#ifndef YYINITDEPTH -#define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH is the maximum size the stacks can grow to - (effective only if the built-in stack extension method is used). */ - -#if YYMAXDEPTH == 0 -#undef YYMAXDEPTH -#endif - -#ifndef YYMAXDEPTH -#define YYMAXDEPTH 10000 -#endif - -/* Prevent warning if -Wstrict-prototypes. */ -#ifdef __GNUC__ -int yyparse (void); -#endif - -#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ -#define __yy_bcopy(FROM,TO,COUNT) __builtin_memcpy(TO,FROM,COUNT) -#else /* not GNU C or C++ */ -#ifndef __cplusplus - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_bcopy (from, to, count) - char *from; - char *to; - int count; -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#else /* __cplusplus */ - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_bcopy (char *from, char *to, int count) -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#endif -#endif - -#line 185 "/usr/lib/bison.simple" -int -yyparse() -{ - register int yystate; - register int yyn; - register short *yyssp; - register YYSTYPE *yyvsp; - int yyerrstatus; /* number of tokens to shift before error messages enabled */ - int yychar1 = 0; /* lookahead token as an internal (translated) token number */ - - short yyssa[YYINITDEPTH]; /* the state stack */ - YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ - - short *yyss = yyssa; /* refer to the stacks thru separate pointers */ - YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ - -#ifdef YYLSP_NEEDED - YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ - YYLTYPE *yyls = yylsa; - YYLTYPE *yylsp; - -#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) -#else -#define YYPOPSTACK (yyvsp--, yyssp--) -#endif - - int yystacksize = YYINITDEPTH; - -#ifdef YYPURE - int yychar; - YYSTYPE yylval; - int yynerrs; -#ifdef YYLSP_NEEDED - YYLTYPE yylloc; -#endif -#endif - - YYSTYPE yyval; /* the variable used to return */ - /* semantic values from the action */ - /* routines */ - - int yylen; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Starting parse\n"); -#else /* __cplusplus */ - clog << "Starting parse" << endl; -#endif /* __cplusplus */ -#endif - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss - 1; - yyvsp = yyvs; -#ifdef YYLSP_NEEDED - yylsp = yyls; -#endif - -/* Push a new state, which is found in yystate . */ -/* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. */ -yynewstate: - - *++yyssp = yystate; - - if (yyssp >= yyss + yystacksize - 1) - { - /* Give user a chance to reallocate the stack */ - /* Use copies of these so that the &'s don't force the real ones into memory. */ - YYSTYPE *yyvs1 = yyvs; - short *yyss1 = yyss; -#ifdef YYLSP_NEEDED - YYLTYPE *yyls1 = yyls; -#endif - - /* Get the current used size of the three stacks, in elements. */ - int size = yyssp - yyss + 1; - -#ifdef yyoverflow - /* Each stack pointer address is followed by the size of - the data in use in that stack, in bytes. */ -#ifdef YYLSP_NEEDED - /* This used to be a conditional around just the two extra args, - but that might be undefined if yyoverflow is a macro. */ - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yyls1, size * sizeof (*yylsp), - &yystacksize); -#else - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yystacksize); -#endif - - yyss = yyss1; yyvs = yyvs1; -#ifdef YYLSP_NEEDED - yyls = yyls1; -#endif -#else /* no yyoverflow */ - /* Extend the stack our own way. */ - if (yystacksize >= YYMAXDEPTH) - { - yyerror("parser stack overflow"); - return 2; - } - yystacksize *= 2; - if (yystacksize > YYMAXDEPTH) - yystacksize = YYMAXDEPTH; - yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); - __yy_bcopy ((char *)yyss1, (char *)yyss, size * sizeof (*yyssp)); - yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp)); - __yy_bcopy ((char *)yyvs1, (char *)yyvs, size * sizeof (*yyvsp)); -#ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp)); - __yy_bcopy ((char *)yyls1, (char *)yyls, size * sizeof (*yylsp)); -#endif -#endif /* no yyoverflow */ - - yyssp = yyss + size - 1; - yyvsp = yyvs + size - 1; -#ifdef YYLSP_NEEDED - yylsp = yyls + size - 1; -#endif - -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Stack size increased to %d\n", yystacksize); -#else /* __cplusplus */ - clog << "Stack size increased to " << yystacksize << endl; -#endif /* __cplusplus */ -#endif - - if (yyssp >= yyss + yystacksize - 1) - YYABORT; - } - -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Entering state %d\n", yystate); -#else /* __cplusplus */ - clog << "Entering state " << yystate << endl; -#endif /* __cplusplus */ -#endif - - goto yybackup; - yybackup: - -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -/* yyresume: */ - - /* First try to decide what to do without reference to lookahead token. */ - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* yychar is either YYEMPTY or YYEOF - or a valid token in external form. */ - - if (yychar == YYEMPTY) - { -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Reading a token: "); -#else /* __cplusplus */ - clog << "Reading a token: "; -#endif /* __cplusplus */ -#endif - yychar = YYLEX; - } - - /* Convert token to internal form (in yychar1) for indexing tables with */ - - if (yychar <= 0) /* This means end of input. */ - { - yychar1 = 0; - yychar = YYEOF; /* Don't call YYLEX any more */ - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Now at end of input.\n"); -#else /* __cplusplus */ - clog << "Now at end of input." << endl; -#endif /* __cplusplus */ -#endif - } - else - { - yychar1 = YYTRANSLATE(yychar); - -#if YYDEBUG != 0 - if (yydebug >= 3) - { -#ifndef __cplusplus - fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Next token is " << yychar << " (" << yytname[yychar1]; -#endif /* __cplusplus */ -#ifdef YYPRINT -#ifndef __cplusplus - YYPRINT (stderr, yychar, yylval); -#else /* __cplusplus */ - YYPRINT (yychar, yylval); -#endif /* __cplusplus */ -#endif -#ifndef __cplusplus - fprintf (stderr, ")\n"); -#else /* __cplusplus */ - clog << ')' << endl; -#endif /* __cplusplus */ - } -#endif - } - - yyn += yychar1; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) - goto yydefault; - - yyn = yytable[yyn]; - - /* yyn is what to do for this token type in this state. - Negative => reduce, -yyn is rule number. - Positive => shift, yyn is new state. - New state is final state => don't bother to shift, - just return success. - 0, or most negative number => error. */ - - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrlab; - - if (yyn == YYFINAL) - YYACCEPT; - - /* Shift the lookahead token. */ - -#if YYDEBUG != 0 - if (yydebug) - { - if (yydebug_reducing) - { -#ifndef __cplusplus - fprintf(stderr, "\nShift:"); -#else /* __cplusplus */ - clog << endl << "Shift:"; -#endif /* __cplusplus */ - yydebug_reducing = 0; - } - if (yydebug >= 2) -#ifndef __cplusplus - fprintf (stderr, "Shifting token %d: %s", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Shifting token " << yychar << ": " << yytname[yychar1]; -#endif /* __cplusplus */ - else -#ifndef __cplusplus - fprintf (stderr, " %s", yytname[yychar1]); -#else /* __cplusplus */ - clog << ' ' << yytname[yychar1]; -#endif /* __cplusplus */ -#ifdef YYPRINT -#ifndef __cplusplus - YYPRINT (stderr, yychar, yylval); -#else /* __cplusplus */ - YYPRINT (yychar, yylval); -#endif /* __cplusplus */ -#endif - if (yydebug >= 2) -#ifndef __cplusplus - fputc ('\n', stderr); -#else /* __cplusplus */ - clog << endl; -#endif /* __cplusplus */ - } -#endif - - /* Discard the token being shifted unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - /* count tokens shifted since error; after three, turn off error status. */ - if (yyerrstatus) yyerrstatus--; - - yystate = yyn; - goto yynewstate; - -/* Do the default action for the current state. */ -yydefault: - - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - -/* Do a reduction. yyn is the number of a rule to reduce with. */ -yyreduce: - yylen = yyr2[yyn]; - if (yylen > 0) - yyval = yyvsp[1-yylen]; /* implement default value of the action */ - -#if YYDEBUG != 0 - if (yydebug) - { - int i; - if (!yydebug_reducing) - { -#ifndef __cplusplus - fputc('\n', stderr); -#else /* __cplusplus */ - clog << endl; -#endif /* __cplusplus */ - yydebug_reducing = 1; - } - if (yydebug >= 2) -#ifndef __cplusplus - fprintf (stderr, "Reducing via rule %d (line %d): ", yyn, yyrline[yyn]); -#else /* __cplusplus */ - clog << "Reducing via rule " << yyn << " (line " << yyrline[yyn] << " ): "; -#endif /* __cplusplus */ - else -#ifndef YYFILE -#define YYFILE "<parser>" -#endif -#ifndef __cplusplus - fprintf (stderr, YYFILE ":%d: ", yyrline[yyn]); -#else /* __cplusplus */ - clog << YYFILE ":" << yyrline[yyn] << ": "; -#endif /* __cplusplus */ - - /* Print the symbols being reduced, and their result. */ -#ifdef __cplusplus - clog << yytname[yyr1[yyn]] << " <-"; -#endif /* __cplusplus */ - for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) -#ifndef __cplusplus - fprintf (stderr, "%s ", yytname[yyrhs[i]]); - fprintf (stderr, "-> %s\n", yytname[yyr1[yyn]]); -#else /* __cplusplus */ - clog << ' ' << yytname[yyrhs[i]]; - clog << endl; -#endif /* __cplusplus */ - } -#endif - - - switch (yyn) { - -case 1: -#line 268 "./iid.y" -{ - /* cd to the directory specified as argument, flush sets */ - - SetDirectory(yyvsp[0]. strdef ) ; - FlushSets() ; - ; - break;} -case 3: -#line 276 "./iid.y" -{ - /* print the list of files resulting from Query */ - - PrintSet(yyvsp[0]. setdef ) ; - ; - break;} -case 4: -#line 282 "./iid.y" -{ - /* run PAGER on the list of files in SET */ - - RunPager(Pager, yyvsp[0]. setdef ) ; - ; - break;} -case 5: -#line 288 "./iid.y" -{ - /* describe sets created so far */ - - DescribeSets() ; - ; - break;} -case 6: -#line 294 "./iid.y" -{ - /* run PAGER on the help file */ - - RunPager(Pager, HelpSet) ; - ; - break;} -case 7: -#line 300 "./iid.y" -{ - exit(0) ; - ; - break;} -case 8: -#line 304 "./iid.y" -{ - /* run the shell command and eat the results as a file set */ - - OneDescription(RunProg(yyvsp[-1]. strdef ->id, yyvsp[0]. listdef )) ; - free(yyvsp[-1]. strdef ) ; - ; - break;} -case 9: -#line 311 "./iid.y" -{ - /* run the shell command */ - - RunShell(yyvsp[-1]. strdef ->id, yyvsp[0]. listdef ) ; - free(yyvsp[-1]. strdef ) ; - ; - break;} -case 10: -#line 321 "./iid.y" -{ - /* Turn on verbose query flag */ - - VerboseQuery = 1 ; - ; - break;} -case 11: -#line 330 "./iid.y" -{ - /* Turn off verbose query flag */ - - VerboseQuery = 0 ; - ; - break;} -case 12: -#line 339 "./iid.y" -{ - /* value of query is set associated with primitive */ - - yyval. setdef = yyvsp[0]. setdef ; - ; - break;} -case 13: -#line 345 "./iid.y" -{ - /* value of query is intersection of the two query sets */ - - yyval. setdef = SetIntersect(yyvsp[-2]. setdef , yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 14: -#line 354 "./iid.y" -{ - /* value of query is union of the two query sets */ - - yyval. setdef = SetUnion(yyvsp[-2]. setdef , yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 15: -#line 363 "./iid.y" -{ - /* value of query is inverse of other query */ - - yyval. setdef = SetInverse(yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 16: -#line 375 "./iid.y" -{ - /* Value of primitive is value of recorded set */ - - yyval. setdef = yyvsp[0]. setdef ; - ; - break;} -case 17: -#line 381 "./iid.y" -{ - /* Value of primitive is obtained by running an lid query */ - - yyval. setdef = RunProg(LidCommand, yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 18: -#line 390 "./iid.y" -{ - /* Value of primitive is obtained by running an aid query */ - - yyval. setdef = RunProg("aid -kmn", yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 19: -#line 399 "./iid.y" -{ - /* Match names from database against pattern */ - yyval. setdef = RunProg("pid -kmn", yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 20: -#line 407 "./iid.y" -{ - /* value of primitive is value of query */ - - yyval. setdef = yyvsp[-1]. setdef ; - ; - break;} -case 21: -#line 416 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - LidCommand = DefaultCommand ; - ; - break;} -case 22: -#line 424 "./iid.y" -{ - /* arg list is Id_list */ - - yyval. listdef = yyvsp[0]. listdef ; - LidCommand = "lid -kmn" ; - ; - break;} -case 23: -#line 434 "./iid.y" -{ - /* arg list is Id_list */ - - yyval. listdef = yyvsp[0]. listdef ; - ; - break;} -case 24: -#line 443 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 25: -#line 450 "./iid.y" -{ - /* make arg list holding names from set */ - - yyval. listdef = InitList() ; - yyval. listdef = SetList(yyval. listdef , yyvsp[0]. setdef ) ; - ; - break;} -case 26: -#line 457 "./iid.y" -{ - /* extend arg list with additional ID */ - - yyval. listdef = ExtendList(yyvsp[-1]. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 27: -#line 463 "./iid.y" -{ - /* extend arg list with additional file names */ - - yyval. listdef = SetList(yyvsp[-1]. listdef , yyvsp[0]. setdef ) ; - ; - break;} -case 28: -#line 472 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 29: -#line 479 "./iid.y" -{ - /* extend arg list with additional ID */ - - yyval. listdef = ExtendList(yyvsp[-1]. listdef , yyvsp[0]. strdef ) ; - ; - break;} -} - /* the action file gets copied in in place of this dollarsign */ -#line 557 "/usr/lib/bison.simple" - - yyvsp -= yylen; - yyssp -= yylen; -#ifdef YYLSP_NEEDED - yylsp -= yylen; -#endif - -#if YYDEBUG != 0 - if (yydebug >= 3) - { - short *ssp1 = yyss - 1; -#ifndef __cplusplus - fprintf (stderr, "state stack now"); -#else /* __cplusplus */ - clog << "state stack now"; -#endif /* __cplusplus */ - while (ssp1 != yyssp) -#ifndef __cplusplus - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); -#else /* __cplusplus */ - clog << ' ' << *++ssp1; - clog << endl; -#endif /* __cplusplus */ - } -#endif - - *++yyvsp = yyval; - -#ifdef YYLSP_NEEDED - yylsp++; - if (yylen == 0) - { - yylsp->first_line = yylloc.first_line; - yylsp->first_column = yylloc.first_column; - yylsp->last_line = (yylsp-1)->last_line; - yylsp->last_column = (yylsp-1)->last_column; - yylsp->text = 0; - } - else - { - yylsp->last_line = (yylsp+yylen-1)->last_line; - yylsp->last_column = (yylsp+yylen-1)->last_column; - } -#endif - - /* Now "shift" the result of the reduction. - Determine what state that goes to, - based on the state we popped back to - and the rule number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTBASE] + *yyssp; - if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTBASE]; - - goto yynewstate; - -yyerrlab: /* here on detecting error */ - - if (! yyerrstatus) - /* If not already recovering from an error, report this error. */ - { - ++yynerrs; - -#ifdef YYERROR_VERBOSE - yyn = yypact[yystate]; - - if (yyn > YYFLAG && yyn < YYLAST) - { - int size = 0; - char *msg; - int x, count; - - count = 0; - /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - size += strlen(yytname[x]) + 15, count++; - msg = (char *) malloc(size + 15); - if (msg != 0) - { - strcpy(msg, "parse error"); - - if (count < 5) - { - count = 0; - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - { - strcat(msg, count == 0 ? ", expecting `" : " or `"); - strcat(msg, yytname[x]); - strcat(msg, "'"); - count++; - } - } - yyerror(msg); - free(msg); - } - else - yyerror ("parse error; also virtual memory exceeded"); - } - else -#endif /* YYERROR_VERBOSE */ - yyerror("parse error"); - } - - goto yyerrlab1; -yyerrlab1: /* here on error raised explicitly by an action */ - - if (yyerrstatus == 3) - { - /* if just tried and failed to reuse lookahead token after an error, discard it. */ - - /* return failure if at end of input */ - if (yychar == YYEOF) - YYABORT; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Discarding token " << yychar << " (" << yytname[yychar1] << ")." << endl; -#endif /* __cplusplus */ -#endif - - yychar = YYEMPTY; - } - - /* Else will try to reuse lookahead token - after shifting the error token. */ - - yyerrstatus = 3; /* Each real token shifted decrements this */ - - goto yyerrhandle; - -yyerrdefault: /* current state does not do anything special for the error token. */ - -#if 0 - /* This is wrong; only states that explicitly want error tokens - should shift them. */ - yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ - if (yyn) goto yydefault; -#endif - -yyerrpop: /* pop the current state because it cannot handle the error token */ - - if (yyssp == yyss) YYABORT; - yyvsp--; - yystate = *--yyssp; -#ifdef YYLSP_NEEDED - yylsp--; -#endif - -#if YYDEBUG != 0 - if (yydebug) - { - short *ssp1 = yyss - 1; -#ifndef __cplusplus - fprintf (stderr, "Error: state stack now"); -#else /* __cplusplus */ - clog << "Error: state stack now"; -#endif /* __cplusplus */ - while (ssp1 != yyssp) -#ifndef __cplusplus - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); -#else /* __cplusplus */ - clog << ' ' << *++ssp1; - clog << endl; -#endif /* __cplusplus */ - } -#endif - -yyerrhandle: - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yyerrdefault; - - yyn += YYTERROR; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) - goto yyerrdefault; - - yyn = yytable[yyn]; - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrpop; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrpop; - - if (yyn == YYFINAL) - YYACCEPT; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Shifting error token, "); -#else /* __cplusplus */ - clog << "Shifting error token, "; -#endif /* __cplusplus */ -#endif - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - yystate = yyn; - goto yynewstate; -} -#line 486 "./iid.y" - - -/* ScanLine - a global variable holding a pointer to the current - * command being scanned. - */ -char * ScanLine ; - -/* ScanPtr - a global pointer to the current scan position in ScanLine. - */ -char * ScanPtr ; - -/* yytext - buffer holding the token. - */ -char yytext [ MAXCMD ] ; - -/* yyerror - process syntax errors. - */ -int -yyerror( char const * s ) -{ - if (*ScanPtr == '\0') { - fprintf(stderr,"Syntax error near end of command.\n") ; - } else { - fprintf(stderr,"Syntax error on or before %s\n",ScanPtr) ; - } - return(0) ; -} - -/* ScanInit - initialize the yylex routine for the new line of input. - * Basically just initializes the global variables that hold the char - * ptrs the scanner uses. - */ -void -ScanInit( char * line ) -{ - /* skip the leading white space - the yylex routine is sensitive - * to keywords in the first position on the command line. - */ - - while (isspace(*line)) ++line ; - ScanLine = line ; - ScanPtr = line ; -} - -/* yylex - the scanner for iid. Basically a kludge ad-hoc piece of junk, - * but what the heck, if it works... - * - * Mostly just scans for non white space strings and returns ID for them. - * Does check especially for '(' and ')'. Just before returning ID it - * checks for command names if it is the first token on line or - * AND, OR, LID, AID if it is in the middle of a line. - */ -int -yylex( void ) -{ - char * bp ; - char c ; - int code = ID ; - char * dp ; - char * sp ; - int val ; - - bp = ScanPtr ; - while (isspace(*bp)) ++bp ; - sp = bp ; - c = *sp++ ; - if ((c == '(') || (c == ')') || (c == '\0')) { - ScanPtr = sp ; - if (c == '\0') { - --ScanPtr ; - } - return(c) ; - } else { - dp = yytext ; - while (! ((c == '(') || (c == ')') || (c == '\0') || isspace(c))) { - *dp++ = c ; - c = *sp++ ; - } - *dp++ = '\0' ; - ScanPtr = sp - 1 ; - if (bp == ScanLine) { - - /* first token on line, check for command names */ - - if (strcaseequ(yytext, "SS")) return(SS) ; - if (strcaseequ(yytext, "FILES")) return(FILES) ; - if (strcaseequ(yytext, "F")) return(FILES) ; - if (strcaseequ(yytext, "HELP")) return(HELP) ; - if (strcaseequ(yytext, "H")) return(HELP) ; - if (strcaseequ(yytext, "?")) return(HELP) ; - if (strcaseequ(yytext, "BEGIN")) return(BEGIN) ; - if (strcaseequ(yytext, "B")) return(BEGIN) ; - if (strcaseequ(yytext, "SETS")) return(SETS) ; - if (strcaseequ(yytext, "SHOW")) return(SHOW) ; - if (strcaseequ(yytext, "P")) return(SHOW) ; - if (strcaseequ(yytext, "OFF")) return(OFF) ; - if (strcaseequ(yytext, "Q")) return(OFF) ; - if (strcaseequ(yytext, "QUIT")) return(OFF) ; - if (yytext[0] == '!') { - code = SHELL_COMMAND ; - } else { - code = SHELL_QUERY ; - } - } else { - - /* not first token, check for operator names */ - - if (strcaseequ(yytext, "LID")) return(LID) ; - if (strcaseequ(yytext, "AID")) return(AID) ; - if (strcaseequ(yytext, "AND")) return(AND) ; - if (strcaseequ(yytext, "OR")) return(OR) ; - if (strcaseequ(yytext, "NOT")) return(NOT) ; - if (strcaseequ(yytext, "MATCH")) return(MATCH) ; - if ((yytext[0] == 's' || yytext[0] == 'S') && isdigit(yytext[1])) { - - /* this might be a set specification */ - - sp = &yytext[1] ; - val = 0 ; - for ( ; ; ) { - c = *sp++ ; - if (c == '\0') { - if (val < NextSetNum) { - yylval.setdef = TheSets[val] ; - return(SET) ; - } - } - if (isdigit(c)) { - val = (val * 10) + (c - '0') ; - } else { - break ; - } - } - } - } - yylval.strdef = (id_type *)malloc(sizeof(id_type) + strlen(yytext)) ; - if (yylval.strdef == NULL) { - fatal("Out of memory in yylex") ; - } - yylval.strdef->next_id = NULL ; - if (code == SHELL_COMMAND) { - strcpy(yylval.strdef->id, &yytext[1]) ; - } else { - strcpy(yylval.strdef->id, yytext) ; - } - return(code) ; - } -} - -/* The main program for iid - parse the command line, initialize processing, - * loop processing one command at a time. - */ -int -main( int argc , char * argv [ ] ) -{ - int c ; /* current option */ - char * CmdPtr = NULL ; /* Points to the command string */ - char Command [ MAXCMD ] ; /* Buffer for reading commands */ - int DoPrompt ; /* 1 if should write a prompt */ - int errors = 0 ; /* error count */ - - program_name = argv[0]; - DoPrompt = isatty(fileno(stdin)) ; - while ((c = getopt(argc, argv, "Hac:")) != EOF) { - switch(c) { - case 'a': - DefaultCommand = "aid -kmn" ; - break ; - case 'c': - CmdPtr = optarg ; - break ; - case 'H': - fputs("\ -iid: interactive ID database query tool. Call with:\n\ - iid [-a] [-c] [-H]\n\ -\n\ --a\tUse the aid as the default query command (not lid).\n\ --c cmd\tExecute the single query cmd and exit.\n\ --H\tPrint this message and exit.\n\ -\n\ -To get help after starting program type 'help'.\n\ -",stderr) ; - exit(0) ; - default: - ++errors ; - break ; - } - } - if (argc != optind) { - fputs("iid: Excess arguments ignored.\n",stderr) ; - ++errors ; - } - if (errors) { - fputs("run iid -H for help.\n",stderr) ; - exit(1) ; - } - - /* initialize global data */ - - InitIid() ; - - /* run the parser */ - - if (CmdPtr) { - ScanInit(CmdPtr) ; - exit(yyparse()) ; - } else { - for ( ; ; ) { - if (DoPrompt) { - fputs(Prompt, stdout) ; - fflush(stdout) ; - } - gets(Command) ; - if (feof(stdin)) { - if (DoPrompt) fputs("\n", stdout) ; - strcpy(Command, "off") ; - } - ScanInit(Command) ; - errors += yyparse() ; - } - } -} - - -/* ArgListSize - count the size of an arg list so can alloca() enough - * space for the command. - */ -int -ArgListSize( id_list_type * idlp ) -{ - id_type * idep ; - int size = 0; - - idep = idlp->id_list ; - while (idep != NULL) { - size += 1 + strlen(idep->id); - idep = idep->next_id; - } - return size; -} - -/* SetListSize - count the size of a string build up from a set so we can - * alloca() enough space for args. - */ -int -SetListSize( set_type * sp ) -{ - int i ; - int size = 0 ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - size += 1 + strlen(FileList[i]->name); - } - } - } - return size; -} - -/* FlushFiles - clear out the TheFiles array for the start of a new - * query. - */ -void -FlushFiles( void ) -{ - int i ; - - if (TheFiles != NULL) { - for (i = 0; i <= MaxCurFile; ++i) { - TheFiles[i] = 0 ; - } - } - MaxCurFile = 0 ; -} - -/* fatal - sometimes the only thing to do is die... - */ -void -fatal( char const * s ) -{ - fprintf(stderr,"Fatal error: %s\n", s) ; - exit(1) ; -} - -/* CountBits - count the number of bits in a bit set. Actually fairly - * tricky since it needs to deal with sets having infinite tails - * as a result of a NOT operation. - */ -int -CountBits( set_type * sp ) -{ - unsigned long bit_mask ; - int count = 0 ; - int i ; - - i = 0; - for ( ; ; ) { - for (bit_mask = high_bit; bit_mask != 0; bit_mask >>= 1) { - if (bit_mask == NextMaskBit && i == NextMaskWord) { - return(count) ; - } - if (i < sp->set_size) { - if (sp->set_data[i] & bit_mask) { - ++count ; - } - } else { - if (sp->set_tail == 0) return count; - if (sp->set_tail & bit_mask) { - ++count; - } - } - } - ++i; - } -} - -/* OneDescription - Print a description of a set. This includes - * the set number, the number of files in the set, and the - * set description string. - */ -void -OneDescription( set_type * sp ) -{ - int elt_count ; - char setnum[20] ; - - sprintf(setnum,"S%d",sp->set_num) ; - elt_count = CountBits(sp) ; - printf("%5s %6d %s\n",setnum,elt_count,sp->set_desc) ; -} - -/* DescribeSets - Print description of all the sets. - */ -void -DescribeSets( void ) -{ - int i ; - - if (NextSetNum > 0) { - for (i = 0; i < NextSetNum; ++i) { - OneDescription(TheSets[i]) ; - } - } else { - printf("No sets defined yet.\n") ; - } -} - -/* SetList - Go through the bit set and add the file names in - * it to an identifier list. - */ -id_list_type * -SetList( id_list_type * idlp , set_type * sp ) -{ - int i ; - id_type * idep ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - idep = (id_type *)malloc(sizeof(id_type) + - strlen(FileList[i]->name)) ; - if (idep == NULL) { - fatal("Out of memory in SetList") ; - } - idep->next_id = NULL ; - strcpy(idep->id, FileList[i]->name) ; - idlp = ExtendList(idlp, idep) ; - } - } - } - return(idlp) ; -} - -/* PrintSet - Go through the bit set and print the file names - * corresponding to all the set bits. - */ -void -PrintSet( set_type * sp ) -{ - int i ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - printf("%s\n",FileList[i]->name) ; - } - } - } -} - -/* Free up all space used by current set of sets and reset all - * set numbers. - */ -void -FlushSets( void ) -{ - int i ; - - for (i = 0; i < NextSetNum; ++i) { - free(TheSets[i]->set_desc) ; - free(TheSets[i]) ; - } - NextSetNum = 0 ; -} - -/* InitList - create an empty identifier list. - */ -id_list_type * -InitList( void ) -{ - id_list_type * idlp ; - - idlp = (id_list_type *)malloc(sizeof(id_list_type)) ; - if (idlp == NULL) { - fatal("Out of memory in InitList") ; - } - idlp->id_count = 0 ; - idlp->end_ptr_ptr = & (idlp->id_list) ; - idlp->id_list = NULL ; - return(idlp) ; -} - -/* ExtendList - add one identifier to an ID list. - */ -id_list_type * -ExtendList( id_list_type * idlp , id_type * idp ) -{ - *(idlp->end_ptr_ptr) = idp ; - idlp->end_ptr_ptr = &(idp->next_id) ; - return(idlp) ; -} - -/* InitIid - do all initial processing for iid. - * 1) Determine the size of a unsigned long for bit set stuff. - * 2) Find out the name of the pager program to use. - * 3) Create the HelpSet (pointing to the help file). - * 4) Setup the prompt. - */ -void -InitIid( void ) -{ - unsigned long bit_mask = 1 ; /* find number of bits in long */ - int i ; - char const * page ; /* pager program */ - - do { - high_bit = bit_mask ; - bit_mask <<= 1 ; - } while (bit_mask != 0) ; - - NextMaskBit = high_bit ; - - page = getenv("PAGER") ; - if (page == NULL) { - page = PAGER ; - } - strcpy(Pager, page) ; - - FlushFiles() ; - InstallFile(IID_HELP_FILE) ; - HelpSet = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (HelpSet == NULL) { - fatal("No memory for set in InitIid") ; - } - HelpSet->set_tail = 0 ; - HelpSet->set_desc = NULL ; - HelpSet->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - HelpSet->set_data[i] = TheFiles[i] ; - } - - page = getenv("PS1") ; - if (page == NULL) { - page = PROMPT ; - } - strcpy(Prompt, page) ; -} - -/* InstallFile - install a file name in the symtab. Return the - * symbol table pointer of the file. - */ -symtab_type * -InstallFile( char const * fp ) -{ - char c ; - unsigned long hash_code ; - int i ; - char const * sp ; - symtab_type * symp ; - - hash_code = 0 ; - sp = fp ; - while ((c = *sp++) != '\0') { - hash_code <<= 1 ; - hash_code ^= (unsigned long)(c) ; - if (hash_code & high_bit) { - hash_code &= ~ high_bit ; - hash_code ^= 1 ; - } - } - hash_code %= HASH_SIZE ; - symp = HashTable[hash_code] ; - while (symp != NULL && strcmp(symp->name, fp)) { - symp = symp->hash_link ; - } - if (symp == NULL) { - symp = (symtab_type *)malloc(sizeof(symtab_type) + strlen(fp)) ; - if (symp == NULL) { - fatal("No memory for symbol table entry in InstallFile") ; - } - strcpy(symp->name, fp) ; - symp->hash_link = HashTable[hash_code] ; - HashTable[hash_code] = symp ; - if (NextMaskWord >= FileSpace) { - FileSpace += 1000 ; - if (TheFiles != NULL) { - TheFiles = (unsigned long *) - realloc(TheFiles, sizeof(unsigned long) * FileSpace) ; - } else { - TheFiles = (unsigned long *) - malloc(sizeof(unsigned long) * FileSpace) ; - } - if (TheFiles == NULL) { - fatal("No memory for TheFiles in InstallFile") ; - } - for (i = NextMaskWord; i < FileSpace; ++i) { - TheFiles[i] = 0 ; - } - } - symp->mask_word = NextMaskWord ; - symp->mask_bit = NextMaskBit ; - NextMaskBit >>= 1 ; - if (NextMaskBit == 0) { - NextMaskBit = high_bit ; - ++NextMaskWord ; - } - if (NextFileNum >= ListSpace) { - ListSpace += 1000 ; - if (FileList == NULL) { - FileList = (symtab_type **) - malloc(sizeof(symtab_type *) * ListSpace) ; - } else { - FileList = (symtab_type **) - realloc(FileList, ListSpace * sizeof(symtab_type *)) ; - } - if (FileList == NULL) { - fatal("No memory for FileList in InstallFile") ; - } - } - FileList[NextFileNum++] = symp ; - /* put code here to sort the file list by name someday */ - } - TheFiles[symp->mask_word] |= symp->mask_bit ; - if (symp->mask_word > MaxCurFile) { - MaxCurFile = symp->mask_word ; - } - return(symp) ; -} - -/* RunPager - run the users pager program on the list of files - * in the set. - */ -void -RunPager( char * pp , set_type * sp ) -{ - char * cmd ; - int i ; - - cmd = (char *)TEMP_ALLOC(SetListSize(sp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - strcat(cmd, " ") ; - strcat(cmd, FileList[i]->name) ; - } - } - } - system(cmd) ; - TEMP_FREE(cmd) ; -} - -/* AddSet - add a new set to the universal list of sets. Assign - * it the next set number. - */ -void -AddSet( set_type * sp ) -{ - if (NextSetNum >= SetSpace) { - SetSpace += 1000 ; - if (TheSets != NULL) { - TheSets = (set_type **) - realloc(TheSets, sizeof(set_type *) * SetSpace) ; - } else { - TheSets = (set_type **) - malloc(sizeof(set_type *) * SetSpace) ; - } - if (TheSets == NULL) { - fatal("No memory for TheSets in AddSet") ; - } - } - sp->set_num = NextSetNum ; - TheSets[NextSetNum++] = sp ; -} - -/* RunProg - run a program with arguments from id_list and - * accept list of file names back from the program which - * are installed in the symbol table and used to construct - * a new set. - */ -set_type * -RunProg( char const * pp , id_list_type * idlp ) -{ - int c ; - char * cmd ; - char * dp ; - struct obstack pipe_output_obstack; - int i ; - id_type * idep ; - id_type * next_id ; - FILE * prog ; - set_type * sp ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - FlushFiles() ; - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - - /* run program with popen, reading the output. Assume each - * white space terminated string is a file name. - */ - - prog = popen(cmd, "r") ; - obstack_init (&pipe_output_obstack); - - while (1) - { - c = getc (prog); - if (c == EOF || isspace (c)) - { - int n; - if ((n = obstack_object_size (&pipe_output_obstack)) > 0) - { - char *_file; - - obstack_1grow (&pipe_output_obstack, 0); - ++n; - _file = obstack_finish (&pipe_output_obstack); - InstallFile(_file) ; - if (n != strlen (_file) + 1) - abort (); - obstack_free (&pipe_output_obstack, _file); - } - if (c == EOF) - break; - } - else - { - obstack_1grow (&pipe_output_obstack, c); - } - } - obstack_free (&pipe_output_obstack, NULL); - - if (pclose(prog) != 0) { - /* if there was an error make an empty set, who knows what - * garbage the program printed. - */ - FlushFiles() ; - } - - sp = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (sp == NULL) { - fatal("No memory for set in RunProg") ; - } - sp->set_tail = 0 ; - sp->set_desc = (char *)malloc(strlen(cmd) + 1) ; - if (sp->set_desc == NULL) { - fatal("No memory for set description in RunProg") ; - } - strcpy(sp->set_desc, cmd) ; - sp->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - sp->set_data[i] = TheFiles[i] ; - } - AddSet(sp) ; - TEMP_FREE(cmd); - return(sp) ; -} - -/* SetDirectory - change the working directory. This will - * determine which ID file is found by the subprograms. - */ -void -SetDirectory( id_type * dir ) -{ - if (chdir(dir->id) != 0) { - fprintf(stderr,"Directory %s not accessible.\n", dir->id) ; - } - free(dir) ; -} - -/* SetIntersect - construct a new set from the intersection - * of two others. Also construct a new description string. - */ -set_type * -SetIntersect( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - if (sp1->set_tail || sp2->set_tail) { - new_size = MAX(sp1->set_size, sp2->set_size) ; - } else { - new_size = MIN(sp1->set_size, sp2->set_size) ; - } - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetIntersect") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 10) ; - if (desc == NULL) { - fatal("No memory for set description in SetIntersect") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") AND (") ; - desc += 7 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? sp1->set_data[i] : sp1->set_tail) & - ((i < sp2->set_size) ? sp2->set_data[i] : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail & sp2->set_tail ; - return(new_set) ; -} - -/* SetUnion - construct a new set from the union of two others. - * Also construct a new description string. - */ -set_type * -SetUnion( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - new_size = MAX(sp1->set_size, sp2->set_size) ; - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetUnion") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 9) ; - if (desc == NULL) { - fatal("No memory for set description in SetUnion") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") OR (") ; - desc += 6 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? (sp1->set_data[i]) : sp1->set_tail) | - ((i < sp2->set_size) ? (sp2->set_data[i]) : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail | sp2->set_tail ; - return(new_set) ; -} - -/* SetInverse - construct a new set from the inverse of another. - * Also construct a new description string. - * - * This is kind of tricky. An inverse set in iid may grow during - * the course of a session. By NOTing the set_tail extension the - * inverse at any given time will be defined as the inverse against - * a universe that grows as additional queries are made and new files - * are added to the database. - * - * Several alternative definitions were possible (snapshot the - * universe at the time of the NOT, go read the ID file to - * determine the complete universe), but this one was the one - * I picked. - */ -set_type * -SetInverse( set_type * sp ) -{ - char * desc ; - int i ; - set_type * new_set ; - - new_set = (set_type *)malloc(sizeof(set_type) + - (sp->set_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetInverse") ; - } - desc = (char *)malloc(strlen(sp->set_desc) + 5) ; - if (desc == NULL) { - fatal("No memory for set description in SetInverse") ; - } - new_set->set_desc = desc ; - strcpy(desc,"NOT ") ; - desc += 4 ; - strcpy(desc, sp->set_desc) ; - AddSet(new_set) ; - new_set->set_size = sp->set_size ; - for (i = 0; i < sp->set_size; ++i) { - new_set->set_data[i] = ~ sp->set_data[i] ; - } - new_set->set_tail = ~ sp->set_tail ; - return(new_set) ; -} - -/* RunShell - run a program with arguments from id_list. - */ -void -RunShell( char * pp , id_list_type * idlp ) -{ - char * cmd ; - id_type * idep ; - id_type * next_id ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - system(cmd) ; - TEMP_FREE(cmd); -} diff --git a/iid.help b/iid.help deleted file mode 100644 index 6ec102c..0000000 --- a/iid.help +++ /dev/null @@ -1,92 +0,0 @@ -The iid program is an interactive shell on top of the mkid, lid, aid -database programs. It allows interactive queries of an ID database in -a fashion similar to a DIALOG session. Iid remembers the sets of files -that were reported by any lid or aid request. These sets are refered -to by set numbers. The commands available are: - -BEGIN <directory> cd to directory (presumably containing an ID file). -B short for BEGIN -SS <query> run query displaying the sets generated -FILES <query> run query listing the files in the final set -F short for FILES -SHOW <set number> run pager program on files in set -P short for SHOW -SETS show currently defined sets -HELP run pager on this file -? or H short commands for HELP -OFF exit iid -<cmd> run a shell command as a file name query -!<cmd> run a shell command - -A <set number> is the letter 's' (or 'S') followed (with no space) by -a number. Set numbers may be used as terms in a query. - -A <query> is: - <set number> - <identifier> - lid <identifier list> - aid <identifier list> - match <wild card list> - <query> or <query> - <query> and <query> - -The words "lid", "aid", "match", "or", and "and" are keywords, along -with any word that looks like a set number. If you have to use one of -these (or in arguments to lid, aid or match, shell escape characters) -then quote the name. - -The "match" operator constructs a set of files by running the "pid" -program with the wild card pattern as an argument. This is the only -operator which constructs sets based on file names rather than -contents. - -An identifier by itself is simply shorthand for "lid identifier". (If -the -a option was used to invoke iid, then a simple identifier is -shorthand for "aid identifier"). - -Example run: - -===> iid -===> ss lid "^get" or lid "Arg$" - S0 14 lid -kmn "^get" - S1 3 lid -kmn "Arg$" - S2 15 (lid -kmn "^get") OR (lid -kmn "Arg$") -===> f s1 -lid.c -paths.c -init.c -===> ls *.c - S3 28 ls *.c -===> ls s* - S4 9 ls s* -===> ss s3 and s4 - S5 4 (ls *.c) AND (ls s*) -===> !grep vhil s5 -scan-c.c: setCArgs("vhil",'+',"v"); -scan-c.c: setCArgs("vhil",'+',"v"); -===> off - -In this example the 'ss' command displays the sets it creats as it -does the parts of the query. In this case 3 sets are created, set S0 -has 14 files in it, set S1 has 3 files and the union of the two sets, -S2, has 15 files. A description of the query that created any given -set is kept along with the set and displayed when sets are printed. - -The 'f s1' command says list the files in set S1, and the three files -in the set are displayed. - -The 'ls' commands are examples of using arbitrary shell commands to -generate lists of files. In this case the 'ls' command. (This could -have been done as part of another query using the 'match' operator). - -The '!grep vhil s5' command runs the 'grep' shell command passing as -arguments 'vhil' and the names of all the files in s5. - -The 'off' command terminated the example session. - -Keywords, commands, and set numbers are recognized regardless of case -(and is And is aNd). Other parameters are case sensitive. - -The iid program can also be run in a batch mode using the -c option. -For more information on command line options, run "iid -H", or use the -Unix 'man' command. @@ -1,1359 +0,0 @@ -%{ -/* iid.y -- interactive mkid query language - Copyright (C) 1991 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> -#include <getopt.h> - -#include <config.h> -#include "strxtra.h" -#include "obstack.h" -#include "xmalloc.h" - -FILE *popen (); - -#define obstack_chunk_alloc xmalloc -#define obstack_chunk_free free - -#if HAVE_ALLOCA - -#if HAVE_ALLOCA_H -#include <alloca.h> -#endif -#define TEMP_ALLOC(s) alloca(s) -#define TEMP_FREE(s) - -#else /* not HAVE_ALLOCA */ - -#define TEMP_ALLOC(s) malloc(s) -#define TEMP_FREE(s) free(s) - -#endif /* not HAVE_ALLOCA */ - -#define HASH_SIZE 947 /* size of hash table for file names */ -#define INIT_FILES 8000 /* start with bits for this many */ -#define INIT_SETSPACE 500 /* start with room for this many */ -#define MAXCMD 1024 /* input command buffer size */ - -#define MAX(a,b) (((a)<(b))?(b):(a)) -#define MIN(a,b) (((a)>(b))?(b):(a)) - -#ifndef PAGER -#define PAGER "pg" -#endif - -#define PROMPT "iid> " - -/* set_type is the struct defining a set of file names - * The file names are stored in a symbol table and assigned - * unique numbers. The set is a bit set of file numbers. - * One of these set structs is calloced for each new set - * constructed, the size allocated depends on the max file - * bit number. An array of pointers to sets are kept to - * represent the complete set of sets. - */ - -struct set_struct { - char * set_desc ; /* string describing the set */ - int set_num ; /* the set number */ - int set_size ; /* number of long words in set */ - unsigned long int set_tail ; /* set extended with these bits */ - unsigned long int set_data[1] ;/* the actual set data (calloced) */ -} ; -typedef struct set_struct set_type ; - -/* id_type is one element of an id_list - */ - -struct id_struct { - struct id_struct * next_id ; /* Linked list of IDs */ - char id [ 1 ] ; /* calloced data holding id string */ -} ; -typedef struct id_struct id_type ; - -/* id_list_type is used during parsing to build lists of - * identifiers that will eventually represent arguments - * to be passed to the database query programs. - */ - -struct id_list_struct { - int id_count ; /* count of IDs in the list */ - id_type * * end_ptr_ptr ;/* pointer to link word at end of list */ - id_type * id_list ; /* pointer to list of IDs */ -} ; -typedef struct id_list_struct id_list_type ; - -/* symtab_type is used to record file names in the symbol table. - */ -struct symtab_struct { - struct symtab_struct * hash_link ; /* list of files with same hash code */ - int mask_word ; /* word in bit vector */ - unsigned long mask_bit ; /* bit in word */ - char name [ 1 ] ; /* the file name */ -} ; -typedef struct symtab_struct symtab_type ; - -/* LidCommand is the command to run for a Lid_group. It is set - * to "lid -kmn" if explicitly preceeded by "lid", otherwise - * it is the default command which is determined by an option. - */ -char const * LidCommand ; - -/* DefaultCommand is the default command for a Lid_group. If - * the -a option is given to iid, it is set to use 'aid'. - */ -char const * DefaultCommand = "lid -kmn" ; - -/* FileList is a lexically ordered list of file symbol table - * pointers. It is dynamically expanded when necessary. - */ -symtab_type * * FileList = NULL ; - -/* FileSpace is the number of long ints in TheFiles array. - */ -int FileSpace = 0 ; - -/* HashTable is the symbol table used to store file names. Each - * new name installed is assigned the next consecutive file number. - */ -symtab_type * HashTable [ HASH_SIZE ] ; - -/* HelpSet is a dummy set containing only one bit set which corresponds - * to the help file name. Simply a cheesy way to maximize sharing of - * the code that runs the pager. - */ -set_type * HelpSet ; - -/* high_bit is a unsigned long with the most significant bit set. - */ -unsigned long high_bit ; - -/* ListSpace is the amount of space avail in the FileList. - */ -int ListSpace = 0 ; - -/* MaxCurFile - max word that has any bit currently set in the - * TheFiles array. - */ -int MaxCurFile = 0 ; - -/* NextFileNum is the file number that will be assigned to the next - * new file name seen when it is installed in the symtab. - */ -int NextFileNum = 0 ; - -/* NextMaskBit is the bit within the next mask word that will - * correspond to the next file added to the symbol table. - */ -unsigned long NextMaskBit ; - -/* NextMaskWord is the next word number to be assigned to a file - * bit mask entry. - */ -int NextMaskWord = 0 ; - -/* NextSetNum is the number that will be assigned to the next set - * created. Starts at 0 because I am a C programmer. - */ -int NextSetNum = 0 ; - -/* The PAGER program to run on a SHOW command. - */ -char Pager[MAXCMD] ; - -/* Prompt - the string to use for a prompt. - */ -char Prompt[MAXCMD] ; - -/* SetSpace is the number of pointers available in TheSets. TheSets - * is realloced when we run out of space. - */ -int SetSpace = 0 ; - -/* TheFiles is a bit set used to construct the initial set of files - * generated while running one of the subprograms. It is copied to - * the alloced set once we know how many bits are set. - */ -unsigned long * TheFiles = NULL ; - -/* TheSets is a dynamically allocated array of pointers pointing - * the sets that have been allocated. It represents the set of - * sets. - */ -set_type * * TheSets = NULL ; - -/* VerboseQuery controls the actions of the semantic routines during - * the process of a query. If TRUE the sets are described as they - * are constructed. - */ -int VerboseQuery ; - -char const *program_name ; - -int yyerror __P(( char const * s )) ; -void ScanInit __P(( char * line )) ; -int yylex __P(( void )) ; -int ArgListSize __P(( id_list_type * idlp )) ; -int SetListSize __P(( set_type * sp )) ; -void FlushFiles __P(( void )) ; -void fatal __P(( char const * s )) ; -int CountBits __P(( set_type * sp )) ; -void OneDescription __P(( set_type * sp )) ; -void DescribeSets __P(( void )) ; -id_list_type * SetList __P(( id_list_type * idlp , set_type * sp )) ; -void PrintSet __P(( set_type * sp )) ; -void FlushSets __P(( void )) ; -id_list_type * InitList __P(( void )) ; -id_list_type * ExtendList __P(( id_list_type * idlp , id_type * idp )) ; -void InitIid __P(( void )) ; -symtab_type * InstallFile __P(( char const * fp )) ; -void RunPager __P(( char * pp , set_type * sp )) ; -void AddSet __P(( set_type * sp )) ; -set_type * RunProg __P(( char const * pp , id_list_type * idlp )) ; -void SetDirectory __P(( id_type * dir )) ; -set_type * SetIntersect __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetUnion __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetInverse __P(( set_type * sp )) ; -void RunShell __P(( char * pp , id_list_type * idlp )) ; - -%} - -%union { - set_type * setdef ; - id_type * strdef ; - id_list_type * listdef ; -} - -%token < setdef > SET - -%token < strdef > ID SHELL_QUERY SHELL_COMMAND - -%type < setdef > Query Primitive - -%type < listdef > Lid_group Aid_group Id_list Command_list - -%token LID AID BEGIN SETS SS FILES SHOW HELP OFF MATCH - -%left OR - -%left AND - -%left NOT - -%start Command - -%% - -Command : - BEGIN ID - { - /* cd to the directory specified as argument, flush sets */ - - SetDirectory($2) ; - FlushSets() ; - } -| Set_query Query -| File_query Query - { - /* print the list of files resulting from Query */ - - PrintSet($2) ; - } -| SHOW SET - { - /* run PAGER on the list of files in SET */ - - RunPager(Pager, $2) ; - } -| SETS - { - /* describe sets created so far */ - - DescribeSets() ; - } -| HELP - { - /* run PAGER on the help file */ - - RunPager(Pager, HelpSet) ; - } -| OFF - { - exit(0) ; - } -| SHELL_QUERY Command_list - { - /* run the shell command and eat the results as a file set */ - - OneDescription(RunProg($1->id, $2)) ; - free($1) ; - } -| SHELL_COMMAND Command_list - { - /* run the shell command */ - - RunShell($1->id, $2) ; - free($1) ; - } -; - -Set_query : - SS - { - /* Turn on verbose query flag */ - - VerboseQuery = 1 ; - } -; - -File_query : - FILES - { - /* Turn off verbose query flag */ - - VerboseQuery = 0 ; - } -; - -Query : - Primitive - { - /* value of query is set associated with primitive */ - - $$ = $1 ; - } -| Query AND Query - { - /* value of query is intersection of the two query sets */ - - $$ = SetIntersect($1, $3) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| Query OR Query - { - /* value of query is union of the two query sets */ - - $$ = SetUnion($1, $3) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| NOT Query - { - /* value of query is inverse of other query */ - - $$ = SetInverse($2) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -; - -Primitive : - SET - { - /* Value of primitive is value of recorded set */ - - $$ = $1 ; - } -| Lid_group - { - /* Value of primitive is obtained by running an lid query */ - - $$ = RunProg(LidCommand, $1) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| Aid_group - { - /* Value of primitive is obtained by running an aid query */ - - $$ = RunProg("aid -kmn", $1) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| MATCH Id_list - { - /* Match names from database against pattern */ - $$ = RunProg("pid -kmn", $2) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| '(' Query ')' - { - /* value of primitive is value of query */ - - $$ = $2 ; - } -; - -Lid_group : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - LidCommand = DefaultCommand ; - } -| LID Id_list - { - /* arg list is Id_list */ - - $$ = $2 ; - LidCommand = "lid -kmn" ; - } -; - -Aid_group : - AID Id_list - { - /* arg list is Id_list */ - - $$ = $2 ; - } -; - -Command_list : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - } -| SET - { - /* make arg list holding names from set */ - - $$ = InitList() ; - $$ = SetList($$, $1) ; - } -| Command_list ID - { - /* extend arg list with additional ID */ - - $$ = ExtendList($1, $2) ; - } -| Command_list SET - { - /* extend arg list with additional file names */ - - $$ = SetList($1, $2) ; - } -; - -Id_list : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - } -| Id_list ID - { - /* extend arg list with additional ID */ - - $$ = ExtendList($1, $2) ; - } -; - -%% - -/* ScanLine - a global variable holding a pointer to the current - * command being scanned. - */ -char * ScanLine ; - -/* ScanPtr - a global pointer to the current scan position in ScanLine. - */ -char * ScanPtr ; - -/* yytext - buffer holding the token. - */ -char yytext [ MAXCMD ] ; - -/* yyerror - process syntax errors. - */ -int -yyerror( char const * s ) -{ - if (*ScanPtr == '\0') { - fprintf(stderr,"Syntax error near end of command.\n") ; - } else { - fprintf(stderr,"Syntax error on or before %s\n",ScanPtr) ; - } - return(0) ; -} - -/* ScanInit - initialize the yylex routine for the new line of input. - * Basically just initializes the global variables that hold the char - * ptrs the scanner uses. - */ -void -ScanInit( char * line ) -{ - /* skip the leading white space - the yylex routine is sensitive - * to keywords in the first position on the command line. - */ - - while (isspace(*line)) ++line ; - ScanLine = line ; - ScanPtr = line ; -} - -/* yylex - the scanner for iid. Basically a kludge ad-hoc piece of junk, - * but what the heck, if it works... - * - * Mostly just scans for non white space strings and returns ID for them. - * Does check especially for '(' and ')'. Just before returning ID it - * checks for command names if it is the first token on line or - * AND, OR, LID, AID if it is in the middle of a line. - */ -int -yylex( void ) -{ - char * bp ; - char c ; - int code = ID ; - char * dp ; - char * sp ; - int val ; - - bp = ScanPtr ; - while (isspace(*bp)) ++bp ; - sp = bp ; - c = *sp++ ; - if ((c == '(') || (c == ')') || (c == '\0')) { - ScanPtr = sp ; - if (c == '\0') { - --ScanPtr ; - } - return(c) ; - } else { - dp = yytext ; - while (! ((c == '(') || (c == ')') || (c == '\0') || isspace(c))) { - *dp++ = c ; - c = *sp++ ; - } - *dp++ = '\0' ; - ScanPtr = sp - 1 ; - if (bp == ScanLine) { - - /* first token on line, check for command names */ - - if (strcaseequ(yytext, "SS")) return(SS) ; - if (strcaseequ(yytext, "FILES")) return(FILES) ; - if (strcaseequ(yytext, "F")) return(FILES) ; - if (strcaseequ(yytext, "HELP")) return(HELP) ; - if (strcaseequ(yytext, "H")) return(HELP) ; - if (strcaseequ(yytext, "?")) return(HELP) ; - if (strcaseequ(yytext, "BEGIN")) return(BEGIN) ; - if (strcaseequ(yytext, "B")) return(BEGIN) ; - if (strcaseequ(yytext, "SETS")) return(SETS) ; - if (strcaseequ(yytext, "SHOW")) return(SHOW) ; - if (strcaseequ(yytext, "P")) return(SHOW) ; - if (strcaseequ(yytext, "OFF")) return(OFF) ; - if (strcaseequ(yytext, "Q")) return(OFF) ; - if (strcaseequ(yytext, "QUIT")) return(OFF) ; - if (yytext[0] == '!') { - code = SHELL_COMMAND ; - } else { - code = SHELL_QUERY ; - } - } else { - - /* not first token, check for operator names */ - - if (strcaseequ(yytext, "LID")) return(LID) ; - if (strcaseequ(yytext, "AID")) return(AID) ; - if (strcaseequ(yytext, "AND")) return(AND) ; - if (strcaseequ(yytext, "OR")) return(OR) ; - if (strcaseequ(yytext, "NOT")) return(NOT) ; - if (strcaseequ(yytext, "MATCH")) return(MATCH) ; - if ((yytext[0] == 's' || yytext[0] == 'S') && isdigit(yytext[1])) { - - /* this might be a set specification */ - - sp = &yytext[1] ; - val = 0 ; - for ( ; ; ) { - c = *sp++ ; - if (c == '\0') { - if (val < NextSetNum) { - yylval.setdef = TheSets[val] ; - return(SET) ; - } - } - if (isdigit(c)) { - val = (val * 10) + (c - '0') ; - } else { - break ; - } - } - } - } - yylval.strdef = (id_type *)malloc(sizeof(id_type) + strlen(yytext)) ; - if (yylval.strdef == NULL) { - fatal("Out of memory in yylex") ; - } - yylval.strdef->next_id = NULL ; - if (code == SHELL_COMMAND) { - strcpy(yylval.strdef->id, &yytext[1]) ; - } else { - strcpy(yylval.strdef->id, yytext) ; - } - return(code) ; - } -} - -/* The main program for iid - parse the command line, initialize processing, - * loop processing one command at a time. - */ -int -main( int argc , char * argv [ ] ) -{ - int c ; /* current option */ - char * CmdPtr = NULL ; /* Points to the command string */ - char Command [ MAXCMD ] ; /* Buffer for reading commands */ - int DoPrompt ; /* 1 if should write a prompt */ - int errors = 0 ; /* error count */ - - program_name = argv[0]; - DoPrompt = isatty(fileno(stdin)) ; - while ((c = getopt(argc, argv, "Hac:")) != EOF) { - switch(c) { - case 'a': - DefaultCommand = "aid -kmn" ; - break ; - case 'c': - CmdPtr = optarg ; - break ; - case 'H': - fputs("\ -iid: interactive ID database query tool. Call with:\n\ - iid [-a] [-c] [-H]\n\ -\n\ --a\tUse the aid as the default query command (not lid).\n\ --c cmd\tExecute the single query cmd and exit.\n\ --H\tPrint this message and exit.\n\ -\n\ -To get help after starting program type 'help'.\n\ -",stderr) ; - exit(0) ; - default: - ++errors ; - break ; - } - } - if (argc != optind) { - fputs("iid: Excess arguments ignored.\n",stderr) ; - ++errors ; - } - if (errors) { - fputs("run iid -H for help.\n",stderr) ; - exit(1) ; - } - - /* initialize global data */ - - InitIid() ; - - /* run the parser */ - - if (CmdPtr) { - ScanInit(CmdPtr) ; - exit(yyparse()) ; - } else { - for ( ; ; ) { - if (DoPrompt) { - fputs(Prompt, stdout) ; - fflush(stdout) ; - } - gets(Command) ; - if (feof(stdin)) { - if (DoPrompt) fputs("\n", stdout) ; - strcpy(Command, "off") ; - } - ScanInit(Command) ; - errors += yyparse() ; - } - } -} - - -/* ArgListSize - count the size of an arg list so can alloca() enough - * space for the command. - */ -int -ArgListSize( id_list_type * idlp ) -{ - id_type * idep ; - int size = 0; - - idep = idlp->id_list ; - while (idep != NULL) { - size += 1 + strlen(idep->id); - idep = idep->next_id; - } - return size; -} - -/* SetListSize - count the size of a string build up from a set so we can - * alloca() enough space for args. - */ -int -SetListSize( set_type * sp ) -{ - int i ; - int size = 0 ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - size += 1 + strlen(FileList[i]->name); - } - } - } - return size; -} - -/* FlushFiles - clear out the TheFiles array for the start of a new - * query. - */ -void -FlushFiles( void ) -{ - int i ; - - if (TheFiles != NULL) { - for (i = 0; i <= MaxCurFile; ++i) { - TheFiles[i] = 0 ; - } - } - MaxCurFile = 0 ; -} - -/* fatal - sometimes the only thing to do is die... - */ -void -fatal( char const * s ) -{ - fprintf(stderr,"Fatal error: %s\n", s) ; - exit(1) ; -} - -/* CountBits - count the number of bits in a bit set. Actually fairly - * tricky since it needs to deal with sets having infinite tails - * as a result of a NOT operation. - */ -int -CountBits( set_type * sp ) -{ - unsigned long bit_mask ; - int count = 0 ; - int i ; - - i = 0; - for ( ; ; ) { - for (bit_mask = high_bit; bit_mask != 0; bit_mask >>= 1) { - if (bit_mask == NextMaskBit && i == NextMaskWord) { - return(count) ; - } - if (i < sp->set_size) { - if (sp->set_data[i] & bit_mask) { - ++count ; - } - } else { - if (sp->set_tail == 0) return count; - if (sp->set_tail & bit_mask) { - ++count; - } - } - } - ++i; - } -} - -/* OneDescription - Print a description of a set. This includes - * the set number, the number of files in the set, and the - * set description string. - */ -void -OneDescription( set_type * sp ) -{ - int elt_count ; - char setnum[20] ; - - sprintf(setnum,"S%d",sp->set_num) ; - elt_count = CountBits(sp) ; - printf("%5s %6d %s\n",setnum,elt_count,sp->set_desc) ; -} - -/* DescribeSets - Print description of all the sets. - */ -void -DescribeSets( void ) -{ - int i ; - - if (NextSetNum > 0) { - for (i = 0; i < NextSetNum; ++i) { - OneDescription(TheSets[i]) ; - } - } else { - printf("No sets defined yet.\n") ; - } -} - -/* SetList - Go through the bit set and add the file names in - * it to an identifier list. - */ -id_list_type * -SetList( id_list_type * idlp , set_type * sp ) -{ - int i ; - id_type * idep ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - idep = (id_type *)malloc(sizeof(id_type) + - strlen(FileList[i]->name)) ; - if (idep == NULL) { - fatal("Out of memory in SetList") ; - } - idep->next_id = NULL ; - strcpy(idep->id, FileList[i]->name) ; - idlp = ExtendList(idlp, idep) ; - } - } - } - return(idlp) ; -} - -/* PrintSet - Go through the bit set and print the file names - * corresponding to all the set bits. - */ -void -PrintSet( set_type * sp ) -{ - int i ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - printf("%s\n",FileList[i]->name) ; - } - } - } -} - -/* Free up all space used by current set of sets and reset all - * set numbers. - */ -void -FlushSets( void ) -{ - int i ; - - for (i = 0; i < NextSetNum; ++i) { - free(TheSets[i]->set_desc) ; - free(TheSets[i]) ; - } - NextSetNum = 0 ; -} - -/* InitList - create an empty identifier list. - */ -id_list_type * -InitList( void ) -{ - id_list_type * idlp ; - - idlp = (id_list_type *)malloc(sizeof(id_list_type)) ; - if (idlp == NULL) { - fatal("Out of memory in InitList") ; - } - idlp->id_count = 0 ; - idlp->end_ptr_ptr = & (idlp->id_list) ; - idlp->id_list = NULL ; - return(idlp) ; -} - -/* ExtendList - add one identifier to an ID list. - */ -id_list_type * -ExtendList( id_list_type * idlp , id_type * idp ) -{ - *(idlp->end_ptr_ptr) = idp ; - idlp->end_ptr_ptr = &(idp->next_id) ; - return(idlp) ; -} - -/* InitIid - do all initial processing for iid. - * 1) Determine the size of a unsigned long for bit set stuff. - * 2) Find out the name of the pager program to use. - * 3) Create the HelpSet (pointing to the help file). - * 4) Setup the prompt. - */ -void -InitIid( void ) -{ - unsigned long bit_mask = 1 ; /* find number of bits in long */ - int i ; - char const * page ; /* pager program */ - - do { - high_bit = bit_mask ; - bit_mask <<= 1 ; - } while (bit_mask != 0) ; - - NextMaskBit = high_bit ; - - page = getenv("PAGER") ; - if (page == NULL) { - page = PAGER ; - } - strcpy(Pager, page) ; - - FlushFiles() ; - InstallFile(IID_HELP_FILE) ; - HelpSet = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (HelpSet == NULL) { - fatal("No memory for set in InitIid") ; - } - HelpSet->set_tail = 0 ; - HelpSet->set_desc = NULL ; - HelpSet->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - HelpSet->set_data[i] = TheFiles[i] ; - } - - page = getenv("PS1") ; - if (page == NULL) { - page = PROMPT ; - } - strcpy(Prompt, page) ; -} - -/* InstallFile - install a file name in the symtab. Return the - * symbol table pointer of the file. - */ -symtab_type * -InstallFile( char const * fp ) -{ - char c ; - unsigned long hash_code ; - int i ; - char const * sp ; - symtab_type * symp ; - - hash_code = 0 ; - sp = fp ; - while ((c = *sp++) != '\0') { - hash_code <<= 1 ; - hash_code ^= (unsigned long)(c) ; - if (hash_code & high_bit) { - hash_code &= ~ high_bit ; - hash_code ^= 1 ; - } - } - hash_code %= HASH_SIZE ; - symp = HashTable[hash_code] ; - while (symp != NULL && strcmp(symp->name, fp)) { - symp = symp->hash_link ; - } - if (symp == NULL) { - symp = (symtab_type *)malloc(sizeof(symtab_type) + strlen(fp)) ; - if (symp == NULL) { - fatal("No memory for symbol table entry in InstallFile") ; - } - strcpy(symp->name, fp) ; - symp->hash_link = HashTable[hash_code] ; - HashTable[hash_code] = symp ; - if (NextMaskWord >= FileSpace) { - FileSpace += 1000 ; - if (TheFiles != NULL) { - TheFiles = (unsigned long *) - realloc(TheFiles, sizeof(unsigned long) * FileSpace) ; - } else { - TheFiles = (unsigned long *) - malloc(sizeof(unsigned long) * FileSpace) ; - } - if (TheFiles == NULL) { - fatal("No memory for TheFiles in InstallFile") ; - } - for (i = NextMaskWord; i < FileSpace; ++i) { - TheFiles[i] = 0 ; - } - } - symp->mask_word = NextMaskWord ; - symp->mask_bit = NextMaskBit ; - NextMaskBit >>= 1 ; - if (NextMaskBit == 0) { - NextMaskBit = high_bit ; - ++NextMaskWord ; - } - if (NextFileNum >= ListSpace) { - ListSpace += 1000 ; - if (FileList == NULL) { - FileList = (symtab_type **) - malloc(sizeof(symtab_type *) * ListSpace) ; - } else { - FileList = (symtab_type **) - realloc(FileList, ListSpace * sizeof(symtab_type *)) ; - } - if (FileList == NULL) { - fatal("No memory for FileList in InstallFile") ; - } - } - FileList[NextFileNum++] = symp ; - /* put code here to sort the file list by name someday */ - } - TheFiles[symp->mask_word] |= symp->mask_bit ; - if (symp->mask_word > MaxCurFile) { - MaxCurFile = symp->mask_word ; - } - return(symp) ; -} - -/* RunPager - run the users pager program on the list of files - * in the set. - */ -void -RunPager( char * pp , set_type * sp ) -{ - char * cmd ; - int i ; - - cmd = (char *)TEMP_ALLOC(SetListSize(sp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - strcat(cmd, " ") ; - strcat(cmd, FileList[i]->name) ; - } - } - } - system(cmd) ; - TEMP_FREE(cmd) ; -} - -/* AddSet - add a new set to the universal list of sets. Assign - * it the next set number. - */ -void -AddSet( set_type * sp ) -{ - if (NextSetNum >= SetSpace) { - SetSpace += 1000 ; - if (TheSets != NULL) { - TheSets = (set_type **) - realloc(TheSets, sizeof(set_type *) * SetSpace) ; - } else { - TheSets = (set_type **) - malloc(sizeof(set_type *) * SetSpace) ; - } - if (TheSets == NULL) { - fatal("No memory for TheSets in AddSet") ; - } - } - sp->set_num = NextSetNum ; - TheSets[NextSetNum++] = sp ; -} - -/* RunProg - run a program with arguments from id_list and - * accept list of file names back from the program which - * are installed in the symbol table and used to construct - * a new set. - */ -set_type * -RunProg( char const * pp , id_list_type * idlp ) -{ - int c ; - char * cmd ; - char * dp ; - struct obstack pipe_output_obstack; - int i ; - id_type * idep ; - id_type * next_id ; - FILE * prog ; - set_type * sp ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - FlushFiles() ; - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - - /* run program with popen, reading the output. Assume each - * white space terminated string is a file name. - */ - - prog = popen(cmd, "r") ; - obstack_init (&pipe_output_obstack); - - while (1) - { - c = getc (prog); - if (c == EOF || isspace (c)) - { - int n; - if ((n = obstack_object_size (&pipe_output_obstack)) > 0) - { - char *_file; - - obstack_1grow (&pipe_output_obstack, 0); - ++n; - _file = obstack_finish (&pipe_output_obstack); - InstallFile(_file) ; - if (n != strlen (_file) + 1) - abort (); - obstack_free (&pipe_output_obstack, _file); - } - if (c == EOF) - break; - } - else - { - obstack_1grow (&pipe_output_obstack, c); - } - } - obstack_free (&pipe_output_obstack, NULL); - - if (pclose(prog) != 0) { - /* if there was an error make an empty set, who knows what - * garbage the program printed. - */ - FlushFiles() ; - } - - sp = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (sp == NULL) { - fatal("No memory for set in RunProg") ; - } - sp->set_tail = 0 ; - sp->set_desc = (char *)malloc(strlen(cmd) + 1) ; - if (sp->set_desc == NULL) { - fatal("No memory for set description in RunProg") ; - } - strcpy(sp->set_desc, cmd) ; - sp->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - sp->set_data[i] = TheFiles[i] ; - } - AddSet(sp) ; - TEMP_FREE(cmd); - return(sp) ; -} - -/* SetDirectory - change the working directory. This will - * determine which ID file is found by the subprograms. - */ -void -SetDirectory( id_type * dir ) -{ - if (chdir(dir->id) != 0) { - fprintf(stderr,"Directory %s not accessible.\n", dir->id) ; - } - free(dir) ; -} - -/* SetIntersect - construct a new set from the intersection - * of two others. Also construct a new description string. - */ -set_type * -SetIntersect( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - if (sp1->set_tail || sp2->set_tail) { - new_size = MAX(sp1->set_size, sp2->set_size) ; - } else { - new_size = MIN(sp1->set_size, sp2->set_size) ; - } - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetIntersect") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 10) ; - if (desc == NULL) { - fatal("No memory for set description in SetIntersect") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") AND (") ; - desc += 7 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? sp1->set_data[i] : sp1->set_tail) & - ((i < sp2->set_size) ? sp2->set_data[i] : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail & sp2->set_tail ; - return(new_set) ; -} - -/* SetUnion - construct a new set from the union of two others. - * Also construct a new description string. - */ -set_type * -SetUnion( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - new_size = MAX(sp1->set_size, sp2->set_size) ; - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetUnion") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 9) ; - if (desc == NULL) { - fatal("No memory for set description in SetUnion") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") OR (") ; - desc += 6 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? (sp1->set_data[i]) : sp1->set_tail) | - ((i < sp2->set_size) ? (sp2->set_data[i]) : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail | sp2->set_tail ; - return(new_set) ; -} - -/* SetInverse - construct a new set from the inverse of another. - * Also construct a new description string. - * - * This is kind of tricky. An inverse set in iid may grow during - * the course of a session. By NOTing the set_tail extension the - * inverse at any given time will be defined as the inverse against - * a universe that grows as additional queries are made and new files - * are added to the database. - * - * Several alternative definitions were possible (snapshot the - * universe at the time of the NOT, go read the ID file to - * determine the complete universe), but this one was the one - * I picked. - */ -set_type * -SetInverse( set_type * sp ) -{ - char * desc ; - int i ; - set_type * new_set ; - - new_set = (set_type *)malloc(sizeof(set_type) + - (sp->set_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetInverse") ; - } - desc = (char *)malloc(strlen(sp->set_desc) + 5) ; - if (desc == NULL) { - fatal("No memory for set description in SetInverse") ; - } - new_set->set_desc = desc ; - strcpy(desc,"NOT ") ; - desc += 4 ; - strcpy(desc, sp->set_desc) ; - AddSet(new_set) ; - new_set->set_size = sp->set_size ; - for (i = 0; i < sp->set_size; ++i) { - new_set->set_data[i] = ~ sp->set_data[i] ; - } - new_set->set_tail = ~ sp->set_tail ; - return(new_set) ; -} - -/* RunShell - run a program with arguments from id_list. - */ -void -RunShell( char * pp , id_list_type * idlp ) -{ - char * cmd ; - id_type * idep ; - id_type * next_id ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - system(cmd) ; - TEMP_FREE(cmd); -} @@ -1,211 +0,0 @@ -.TH LID 1 -.SH NAME -lid, gid, eid, aid, pid \- query id database -.SH SYNOPSIS -.B lid -.RB [ \-f \^file] -.RB [ \-u \^n] -.RB [ \-r \^dir] -.RB [ \-edoxamseknc] -patterns... -.PP -.B gid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-edoxamsec] -patterns... -.PP -.B eid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-doxamsec] -patterns... -.PP -.B aid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-doxamsc] -patterns... -.PP -.B pid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-ekncb] -patterns... -.SH DESCRIPTION -These commands provide a flexible query interface to the -.I id -database. -.I Lid\^ -does a lookup on -.IR patters -and prints out lines in this way: -.PP -.nf -idname ../hdir/hfile.h ../cdir/{cfile1,cfile2}.c -.fi -.PP -Notice that multiple files with the same directory prefix -and suffix are concatenated in the globbing-set-notation of -.IR csh (1). -Also notice that all of the -.I id -database query commands adjust the list of pathnames to be relative -to your current working directory, provided that -.IR mkid (1) -was used to build the database, and your working directory -is located within the sub-tree covered by the -.I id -database. -.PP -If multiple names match on pattern, then there will be one line -of output per name. The mnemonic significance of the name is -\fI\|l(ookup) id\fP. -.PP -.I Gid -does a lookup and then searches for the names it matches in the -files where they occur. The mnemonic for this name is -\fI\|g(rep)id\fP. -.PP -.I Eid -does a lookup, and then invokes an editor on all files with -the matched name as an initial search string. Of course, this -name stands for -\fI\|e(dit) id\fP. -.PP -.I Eid -uses four environment variables to control its invocation of the -editor. -Naturally, -.B EDITOR -is used to locate the editing program. -.B EIDARG -is a -.IR printf (3S) -string used to specify the form of the initial-search-string -argument. If the editor does not support such an argument, -this variable may be left unset. -.B EIDLDEL -and -.B EIDRDEL -specify the form of the left and right word-delimiters respectively. -The best way to explain the use of these last three variables is -with an example. Here are the proper settings for vi(1): -.nf -EIDARG='+/%s/' # initial search argument template -EIDLDEL='\\<' # left word-delimiter -EIDRDEL='\\>' # right word-delimiter -.fi -.PP -.I Patterns -may be simple alpha-numeric strings, or regular expressions in the -style of -.IR regcmp (3). -If the string contains no regular-expression meta-characters, it is -searched for as a -.IR word . -If the string contains meta-characters, or if the \-e argument is -supplied, it is searched for as regular-expression. -.PP -.I Aid\^ -produces output in the style of -.I lid\^ -but its pattern arguments are searched for as substrings within -the identifiers in the database. No regular-expression search -is performed, even if the pattern contains meta-characters. -The search is conducted in an alphabetic case insensitive manner. -The mnemonic for this name is -\fI\|a(propos) id\fP. -.PP -.I Pid\^ -is used to match the input patterns against the names of the files -in the database rather than the contents of the files. The pattern -is assumed to be a simple shell wild card pattern unless the -.B \-e -option is given, in which case full regular expression matching -is used. -The -.B \-b -option can be used to restrict the match to just the basename portion -of the full absolute path name of the file. -The mnemonic for this name is -\fI\|p(ath) id\fP. -.PP -The following options are recognized: -.TP 10 -.BR \-f file\^ -Use -.I file\^ -as the database instead of the default -.BR ID . -.TP 10 -.BR \-u n -Lists all identifiers in the database that are non-unique within the first -.I n -characters. This facility is particularly helpful when porting a program -to a system whose compiler or linker has fewer significant characters -for identifiers. -.TP 10 -.BR \-r dir\^ -Assume the names stored in the database are relative to this directory. -This option is useful if you create the database in one place, then move -it somewhere else. Normally all the query tools assume the names in -the database are relative to the location of the database. -.TP 10 -.B \-c -This option is similar to -.BR \-r , -but it tells the id query tool to assume the names in the ID database -are stored relative to the current working directory. -.TP 10 -.B \-k -Suppresses the use of \fL{\fP and \fL}\fP as a shorthand in the -generated list of file names. Each name is output in full. -.TP 10 -.B \-n -Suppresses printing the name of the search string, only the names of -the files containing the string are printed. Together with the \fB\-k\fP -option this can be used to generate lists of files to pass to other -programs. -.PP -.TP 10 -.B \-b -In the -.I pid -program, the -.B \-b -option is used to force pattern matching on just the base names of the -file, otherwise the pattern matching is done on the full absolute file -name. -.PP -The remaining options are for use in conjunction with numeric patterns: -.TP 10 -.B \-doxa -These options may be specified in any combination. -They limit numeric matches to specific radixes. -The -.BR \-d , -.BR \-o , -and -.B \-x -options limit matches to decimal, octal, and hexadecimal respectively. -The -.BR \-a -option is a shorthand for specifying all three radixes. -.PP -Searches for numbers -are conducted numerically rather than lexically, so that all -representations for a given number are potentially available -from a single search. -.TP 10 -.B \-m -Merge multiple lines of output into a single line. -.TP 10 -.B \-s -Limit the results of the search to identifiers that occur only -once in the entire set of sources covered by the database. -This option is useful for finding identifiers that are defined -but never used. -.SH SEE ALSO -mkid(1), -fid(1). @@ -1,1365 +0,0 @@ -/* lid.c -- primary query interface for mkid database - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdlib.h> -#include <unistd.h> -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <signal.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <assert.h> -#include <limits.h> -#include <regex.h> - -#include <config.h> -#include "alloc.h" -#include "idfile.h" -#include "token.h" -#include "bitops.h" -#include "strxtra.h" -#include "misc.h" -#include "filenames.h" - -typedef void (*doit_t) __P((char const *name, char **argv)); - -unsigned char *tree8_to_bits __P((unsigned char *bits_vec, unsigned char const *hits_tree8)); -void tree8_to_bits_1 __P((unsigned char **bits_vec, unsigned char const **hits_tree8, int level)); -char **tree8_to_argv __P((unsigned char const *hits_tree8)); -char **bits_to_argv __P((unsigned char const *bits_vec)); - -static void usage __P((void)); -int common_prefix_suffix __P((char const *path1, char const *path2)); -void look_id __P((char const *name, char **argv)); -void grep_id __P((char const *name, char **argv)); -void edit_id __P((char const *name, char **argv)); -int skip_to_argv __P((char **argv)); -int find_plain __P((char const *arg, doit_t doit)); -int find_anchor __P((char const *arg, doit_t doit)); -int find_regexp __P((char const *arg, doit_t doit)); -int find_number __P((char const *arg, doit_t doit)); -int find_non_unique __P((unsigned int, doit_t doit)); -int find_apropos __P((char const *arg, doit_t doit)); -void parse_frequency_arg __P((char const *arg)); -int frequency_wanted __P((char const *tok)); -char const *strcpos __P((char const *s1, char const *s2)); -char const *file_regexp __P((char const *name0, char const *left_delimit, char const *right_delimit)); -off_t find_token __P((char const *token)); -int is_regexp __P((char *name)); -char **vec_to_argv __P((int const *vec)); -int file_name_wildcard __P((char const *re, char const *fn)); -int match_file_names __P((char const *re, doit_t doit)); -int word_match __P((char const *name0, char const *line)); -int radix __P((char const *name)); -int stoi __P((char const *name)); -int otoi __P((char const *name)); -int dtoi __P((char const *name)); -int xtoi __P((char const *name)); -void savetty __P((void)); -void restoretty __P((void)); -void linetty __P((void)); -void chartty __P((void)); - -enum radix { - RADIX_OCT = 1, - RADIX_DEC = 2, - RADIX_HEX = 4, - RADIX_ALL = RADIX_DEC | RADIX_OCT | RADIX_HEX -}; - -#define TOLOWER(c) (isupper (c) ? tolower (c) : (c)) -#define IS_ALNUM(c) (isalnum (c) || (c) == '_') - -#ifndef BRACE_NOTATION_DEFAULT -#define BRACE_NOTATION_DEFAULT 1 -#endif - -/* Sorry about all the globals, but it's really cleaner this way. */ -FILE *id_FILE; -int merging; -int radix_arg; -int echo_on = 1; -int brace_notation_on = BRACE_NOTATION_DEFAULT; -int file_name_regexp = 0; -int match_base = 0; -char *anchor_dir; -int tree8_levels; -unsigned int bits_vec_size; -char PWD_buf[MAXPATHLEN]; -struct idhead idh; -int (*find_func) __P((char const *, doit_t)); -unsigned short frequency_low = 1; -unsigned short frequency_high = USHRT_MAX; -char *buf; -char *buf2; -unsigned char *bits_vec; - -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "Usage: %s [-f<file>] [-u<n>] [-r<dir>] [-mewdoxaskncg] patterns...\n", program_name); - exit (1); -} - -int -main (int argc, char **argv) -{ - char const *id_file_name = IDFILE; - doit_t doit = look_id; - int force_merge = 0; - unsigned int unique_limit = 0; - int (*forced_find_func) __P((char const *, doit_t)) = NULL; - - program_name = basename ((argc--, *argv++)); - - while (argc) - { - char const *arg = (argc--, *argv++); - int op = *arg++; - switch (op) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - while (*arg) - switch (*arg++) - { - case 'f': - id_file_name = arg; - goto nextarg; - case 'u': - unique_limit = stoi (arg); - goto nextarg; - case 'm': - force_merge = 1; - break; - case 'e': - forced_find_func = find_regexp; - file_name_regexp = 1; - break; - case 'w': - forced_find_func = find_plain; - break; - case 'd': - radix_arg |= RADIX_DEC; - break; - case 'o': - radix_arg |= RADIX_OCT; - break; - case 'x': - radix_arg |= RADIX_HEX; - break; - case 'a': - radix_arg |= RADIX_ALL; - break; - case 'F': - parse_frequency_arg (arg); - goto nextarg; - case 'k': - brace_notation_on = 0; - break; - case 'g': - brace_notation_on = 1; - break; - case 'n': - echo_on = 0; - break; - case 'b': - match_base = 1; - break; - case 'c': - maybe_anchor_usage (); - anchor_dir = PWD_buf; - break; - case 'r': - maybe_anchor_usage (); - anchor_dir = arg; - goto nextarg; - default: - usage (); - } - nextarg:; - } -argsdone: - - get_PWD (PWD_buf); - id_file_name = find_id_file (id_file_name); - - if (anchor_dir == NULL) - anchor_dir = strdup (span_dir_name (PWD_buf, id_file_name)); - else if (anchor_dir != PWD_buf) - anchor_dir = strdup (span_dir_name (PWD_buf, anchor_dir)); - - id_FILE = init_id_file (id_file_name, &idh); - bits_vec_size = (idh.idh_files + 7) >> 3; - tree8_levels = tree8_count_levels (idh.idh_files); - - switch (program_name[0]) - { - case 'a': - forced_find_func = find_apropos; - /*FALLTHROUGH*/ - case 'l': - doit = look_id; - break; - case 'g': - doit = grep_id; - break; - case 'e': - doit = edit_id; - break; - case 'p': - forced_find_func = match_file_names; - doit = look_id; - break; - default: - program_name = "[algep]id"; - usage (); - } - - if (argc == 0) - { - (argc++, --argv); - *(char const **)argv = "."; - } - - while (argc) - { - long val = -1; - char *arg = (argc--, *argv++); - - if (forced_find_func) - find_func = forced_find_func; - else if (radix (arg) && (val = stoi (arg)) >= 0) - find_func = find_number; - else if (is_regexp (arg)) - find_func = find_regexp; - else if (arg[0] == '^') - find_func = find_anchor; - else - find_func = find_plain; - - if ((doit == look_id && !force_merge) - || (find_func == find_number - && val > 7 - && radix_arg != RADIX_DEC - && radix_arg != RADIX_OCT - && radix_arg != RADIX_HEX)) - merging = 0; - else - merging = 1; - - buf = malloc (idh.idh_buf_size); - buf2 = malloc (idh.idh_buf_size); - bits_vec = MALLOC (unsigned char, bits_vec_size); - - if (unique_limit) - { - if (!find_non_unique (unique_limit, doit)) - fprintf (stderr, "All identifiers are unique within the first %d characters\n", unique_limit); - exit (0); - } - else if (!(*find_func) (arg, doit)) - { - fprintf (stderr, "%s: not found\n", arg); - continue; - } - } - exit (0); -} - -/* common_prefix_suffix returns non-zero if two file names have a - fully common directory prefix and a common suffix (i.e., they're - eligible for coalescing with brace notation. */ - -int -common_prefix_suffix (char const *file_name_1, char const *file_name_2) -{ - char const *slash_1; - char const *slash_2; - - slash_1 = strrchr (file_name_1, '/'); - slash_2 = strrchr (file_name_2, '/'); - - if (slash_1 == NULL && slash_2 == NULL) - return strequ (suff_name (file_name_1), suff_name (file_name_2)); - if ((slash_1 - file_name_1) != (slash_2 - file_name_2)) - return 0; - if (!strnequ (file_name_1, file_name_2, slash_1 - file_name_1)) - return 0; - return strequ (suff_name (slash_1), suff_name (slash_2)); -} - -void -look_id (char const *name, char **argv) -{ - char const *arg; - char const *dir; - int using_braces = 0; - - if (echo_on) - printf ("%-14s ", name); - while (*argv) - { - arg = *argv++; - if (*argv && brace_notation_on && common_prefix_suffix (arg, *argv)) - { - if (using_braces) - printf (",%s", root_name (arg)); - else - { - dir = dirname (arg); - if (dir && !strequ (dir, ".")) - printf ("%s/", dir); - printf ("{%s", root_name (arg)); - } - using_braces = 1; - } - else - { - if (using_braces) - printf (",%s}%s", root_name (arg), suff_name (arg)); - else - fputs (arg, stdout); - using_braces = 0; - if (*argv) - putchar (' '); - } - } - putchar ('\n'); -} - -void -grep_id (char const *name, char **argv) -{ - char line[BUFSIZ]; - char const *re = NULL; - int line_number; - - if (merging) - { - re = file_regexp (name, "[^a-zA-Z0-9_]_*", "[^a-zA-Z0-9_]"); - if (re) - { - char const *regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return; - } - } - } - - line[0] = ' '; /* sentry */ - while (*argv) - { - char const *file_name = *argv++; - FILE *gid_FILE = fopen (file_name, "r"); - - if (gid_FILE == NULL) - { - filerr ("open", file_name); - continue; - } - line_number = 0; - while (fgets (&line[1], sizeof (line), gid_FILE)) - { - line_number++; - if (re) - { - if (!re_exec (line)) - continue; - } - else if (!word_match (name, line)) - continue; - printf ("%s:%d: %s", file_name, line_number, &line[1]); - } - fclose (gid_FILE); - } -} - -void -edit_id (char const *name, char **argv) -{ - char re_buffer[BUFSIZ]; - char ed_arg_buffer[BUFSIZ]; - char const *re; - int c; - int skip; - static char const *editor; - static char const *eid_arg; - static char const *eid_right_del; - static char const *eid_left_del; - - if (editor == NULL) - { - editor = getenv ("EDITOR"); - if (editor == NULL) - { - editor = "vi"; - eid_arg = "+1;/%s/"; - eid_left_del = "\\<"; - eid_right_del = "\\>"; - } - } - if (eid_left_del == NULL) - { - eid_arg = getenv ("EIDARG"); - eid_left_del = getenv ("EIDLDEL"); - if (eid_left_del == NULL) - eid_left_del = ""; - eid_right_del = getenv ("EIDRDEL"); - if (eid_right_del == NULL) - eid_right_del = ""; - } - - look_id (name, argv); - savetty (); - for (;;) - { - printf ("Edit? [y1-9^S/nq] "); - fflush (stdout); - chartty (); - c = (getchar () & 0177); - restoretty (); - switch (TOLOWER (c)) - { - case '/': - case ('s' & 037): - putchar ('/'); - skip = skip_to_argv (argv); - if (skip < 0) - continue; - argv += skip; - goto editit; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - putchar (c); - skip = c - '0'; - break; - case 'y': - putchar (c); - /*FALLTHROUGH*/ - case '\n': - case '\r': - skip = 0; - break; - case 'q': - putchar (c); - putchar ('\n'); - exit (0); - case 'n': - putchar (c); - putchar ('\n'); - return; - default: - putchar (c); - putchar ('\n'); - continue; - } - - putchar ('\n'); - while (skip--) - if (*++argv == NULL) - continue; - break; - } -editit: - - if (merging) - re = file_regexp (name, eid_left_del, eid_right_del); - else - re = NULL; - if (re == NULL) - { - re = re_buffer; - sprintf (re_buffer, "%s%s%s", eid_left_del, name, eid_right_del); - } - - switch (fork ()) - { - case -1: - fprintf (stderr, "%s: Cannot fork (%s)\n", program_name, strerror (errno)); - exit (1); - case 0: - argv--; - if (eid_arg) - { - argv--; - sprintf (ed_arg_buffer, eid_arg, re); - argv[1] = ed_arg_buffer; - } - *(char const **) argv = editor; - execvp (editor, argv); - filerr ("exec", editor); - default: - { - void (*oldint) __P((int)) = signal (SIGINT, SIG_IGN); - void (*oldquit) __P((int)) = signal (SIGQUIT, SIG_IGN); - - while (wait (0) == -1 && errno == EINTR) - ; - - signal (SIGINT, oldint); - signal (SIGQUIT, oldquit); - } - break; - } -} - -int -skip_to_argv (char **argv) -{ - char pattern[BUFSIZ]; - unsigned int count; - - if (gets (pattern) == NULL) - return -1; - - for (count = 0; *argv; count++, argv++) - if (strcpos (*argv, pattern)) - return count; - return -1; -} - -int -find_plain (char const *arg, doit_t doit) -{ - if (find_token (arg) == 0) - return 0; - gets_past_00 (buf, id_FILE); - assert (*buf); - if (!frequency_wanted (buf)) - return 0; - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - return 1; -} - -int -find_anchor (char const *arg, doit_t doit) -{ - int count; - unsigned int length; - - if (find_token (++arg) == 0) - return 0; - - length = strlen (arg); - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (!strnequ (arg, buf, length)) - break; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (--arg, bits_to_argv (bits_vec)); - - return count; -} - -int -find_regexp (char const *re, doit_t doit) -{ - int count; - char const *regexp_error; - - regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return 0; - } - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (!re_exec (buf)) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (re, bits_to_argv (bits_vec)); - - return count; -} - -int -find_number (char const *arg, doit_t doit) -{ - int count; - int rdx; - int val; - int hit_digits = 0; - - rdx = (val = stoi (arg)) ? RADIX_ALL : radix (arg); - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - if (hit_digits) - { - if (!isdigit (*buf)) - break; - } - else - { - if (isdigit (*buf)) - hit_digits = 1; - } - - if (!((radix_arg ? radix_arg : rdx) & radix (buf)) - || stoi (buf) != val) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (arg, bits_to_argv (bits_vec)); - - return count; -} - -/* Find identifiers that are non-unique within the first `count' - characters. */ -int -find_non_unique (unsigned int limit, doit_t doit) -{ - char *old = buf; - char *new = buf2; - int consecutive = 0; - int count = 0; - char name[1024]; - - if (limit <= 1) - usage (); - assert (limit < sizeof(name)); - - name[0] = '^'; - *new = '\0'; - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - while (gets_past_00 (old, id_FILE) > 0) - { - char *tmp; - if (!(tok_flags (old) & TOK_NAME)) - continue; - tmp = old; - old = new; - new = tmp; - if (!strnequ (new, old, limit)) - { - if (consecutive && merging) - { - strncpy (&name[1], old, limit); - (*doit) (name, bits_to_argv (bits_vec)); - } - consecutive = 0; - continue; - } - if (!consecutive++) - { - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (old)); - else - (*doit) (old, tree8_to_argv (tok_hits_addr (old))); - count++; - } - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (new)); - else - (*doit) (new, tree8_to_argv (tok_hits_addr (new))); - count++; - } - if (consecutive && merging) - { - strncpy (&name[1], new, limit); - (*doit) (name, bits_to_argv (bits_vec)); - } - return count; -} - -int -find_apropos (char const *arg, doit_t doit) -{ - int count; - - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (strcpos (buf, arg) == NULL) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (arg, bits_to_argv (bits_vec)); - - return count; -} - -void -parse_frequency_arg (char const *arg) -{ - if (*arg == '-') - frequency_low = 1; - else - { - frequency_low = atoi (arg); - while (isdigit (*arg)) - arg++; - if (*arg == '-') - arg++; - } - if (*arg) - frequency_high = atoi (arg); - else if (arg[-1] == '-') - frequency_high = USHRT_MAX; - else - frequency_high = frequency_low; - if (frequency_low > frequency_high) - fprintf (stderr, "Bogus frequencies: %u > %u\n", frequency_low, frequency_high); -} - -int -frequency_wanted (char const *tok) -{ - unsigned int count = tok_count (tok); - return (frequency_low <= count && count <= frequency_high); -} - -/* if string `s2' occurs in `s1', return a pointer to the first match. - Ignore differences in alphabetic case. */ -char const * -strcpos (char const *s1, char const *s2) -{ - char const *s1p; - char const *s2p; - char const *s1last; - - for (s1last = &s1[strlen (s1) - strlen (s2)]; s1 <= s1last; s1++) - for (s1p = s1, s2p = s2; TOLOWER (*s1p) == TOLOWER (*s2p); s1p++) - if (*++s2p == '\0') - return s1; - return NULL; -} - -/* Convert the regular expression that we used to locate identifiers - in the id database into one suitable for locating the identifiers - in files. */ -char const * -file_regexp (char const *name0, char const *left_delimit, char const *right_delimit) -{ - static char re_buffer[BUFSIZ]; - char *name = (char *) name0; - - if (find_func == find_number && merging) - { - sprintf (re_buffer, "%s0*[Xx]*0*%d[Ll]*%s", left_delimit, stoi (name), right_delimit); - return re_buffer; - } - - if (!is_regexp (name) && name[0] != '^') - return NULL; - - if (name[0] == '^') - name0++; - else - left_delimit = ""; - while (*++name) - ; - if (*--name == '$') - *name = '\0'; - else - right_delimit = ""; - - sprintf (re_buffer, "%s%s%s", left_delimit, name0, right_delimit); - return re_buffer; -} - -off_t -find_token (char const *token_0) -{ - off_t offset = 0; - off_t start = idh.idh_tokens_offset - 2; - off_t end = idh.idh_end_offset; - off_t anchor_offset = 0; - int order = -1; - - while (start < end) - { - int c; - int incr = 1; - char const *token; - - offset = start + (end - start) / 2; - fseek (id_FILE, offset, SEEK_SET); - offset += skip_past_00 (id_FILE); - if (offset >= end) - { - offset = start + 2; - fseek (id_FILE, offset, SEEK_SET); - } - - /* compare the token names */ - token = token_0; - while (*token == (c = getc (id_FILE)) && *token && c) - { - token++; - incr++; - } - if (c && !*token && find_func == find_anchor) - anchor_offset = offset; - order = *token - c; - - if (order < 0) - end = offset - 2; - else if (order > 0) - start = offset + incr + skip_past_00 (id_FILE) - 2; - else - break; - } - - if (order) - { - if (anchor_offset) - offset = anchor_offset; - else - return 0; - } - fseek (id_FILE, offset, SEEK_SET); - return offset; -} - -/* Are there any regexp meta-characters in name?? */ -int -is_regexp (char *name) -{ - int backslash = 0; - - if (*name == '^') - name++; - while (*name) - { - if (*name == '\\') - { - if (strchr ("<>", name[1])) - return 1; - name++, backslash++; - } - else if (strchr ("[]{}().*+^$", *name)) - return 1; - name++; - } - if (backslash) - while (*name) - { - if (*name == '\\') - strcpy (name, name + 1); - name++; - } - return 0; -} - -/* file_name_wildcard implements a simple pattern matcher that - emulates the shell wild card capability. - - * - any string of chars - ? - any char - [] - any char in set (if first char is !, any not in set) - \ - literal match next char */ -int -file_name_wildcard (char const *re, char const *fn) -{ - int c; - int i; - char set[256]; - int revset; - - while ((c = *re++) != '\0') - { - if (c == '*') - { - if (*re == '\0') - return 1; /* match anything at end */ - while (*fn != '\0') - { - if (file_name_wildcard (re, fn)) - return 1; - ++fn; - } - return 0; - } - else if (c == '?') - { - if (*fn++ == '\0') - return 0; - } - else if (c == '[') - { - c = *re++; - memset (set, 0, 256); - if (c == '!') - { - revset = 1; - c = *re++; - } - else - revset = 0; - while (c != ']') - { - if (c == '\\') - c = *re++; - set[c] = 1; - if ((*re == '-') && (*(re + 1) != ']')) - { - re += 1; - while (++c <= *re) - set[c] = 1; - ++re; - } - c = *re++; - } - if (revset) - for (i = 1; i < 256; ++i) - set[i] = !set[i]; - if (!set[(int)*fn++]) - return 0; - } - else - { - if (c == '\\') - c = *re++; - if (c != *fn++) - return 0; - } - } - return (*fn == '\0'); -} - -/* match_file_names implements the pid tool. This matches the *names* - of files in the database against the input pattern rather than the - *contents* of the files. */ - -int -match_file_names (char const *re, doit_t doit) -{ - char const *abs_name; - struct idarg *ida = id_args; - int i; - int count = 0; - int matched; - - if (file_name_regexp) - { - char const *regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return 0; - } - } - - for (i = 0; i < idh.idh_files; i++, ida++) - { - if (*ida->ida_arg == 0) - continue; - if (match_base) - { - abs_name = strrchr (ida->ida_arg, '/'); - if (abs_name == NULL) - abs_name = ida->ida_arg; - } - else - abs_name = span_file_name (anchor_dir, ida->ida_arg); - if (file_name_regexp) - matched = re_exec (abs_name); - else - matched = file_name_wildcard (re, abs_name); - if (matched) - { - BITSET (bits_vec, i); - ++count; - } - } - if (count) - (*doit) (re, bits_to_argv (bits_vec)); - return count; -} - -/* Does `name' occur in `line' delimited by non-alphanumerics?? */ -int -word_match (char const *name0, char const *line) -{ - char const *name = name0; - - for (;;) - { - /* find an initial-character match */ - while (*line != *name) - { - if (*line == '\0' || *line == '\n') - return 0; - line++; - } - /* do we have a word delimiter on the left ?? */ - if (isalnum (line[-1])) - { - line++; - continue; - } - /* march down both strings as long as we match */ - while (*++name == *++line) - ; - /* is this the end of `name', is there a word delimiter ?? */ - if (*name == '\0' && !IS_ALNUM (*line)) - return 1; - name = name0; - } -} - -/* Use the C lexical rules to determine an ascii number's radix. The - radix is returned as a bit map, so that more than one radix may - apply. In particular, it is impossible to determine the radix of - 0, so return all possibilities. */ -int -radix (char const *name) -{ - if (!isdigit (*name)) - return 0; - if (*name != '0') - return RADIX_DEC; - name++; - if (*name == 'x' || *name == 'X') - return RADIX_HEX; - while (*name && *name == '0') - name++; - return (RADIX_OCT | ((*name) ? 0 : RADIX_DEC)); -} - -/* Convert an ascii string number to an integer. Determine the radix - before converting. */ -int -stoi (char const *name) -{ - switch (radix (name)) - { - case RADIX_DEC: - return (dtoi (name)); - case RADIX_OCT: - return (otoi (&name[1])); - case RADIX_HEX: - return (xtoi (&name[2])); - case RADIX_DEC | RADIX_OCT: - return 0; - default: - return -1; - } -} - -/* Convert an ascii octal number to an integer. */ -int -otoi (char const *name) -{ - int n = 0; - - while (*name >= '0' && *name <= '7') - { - n *= 010; - n += *name++ - '0'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -/* Convert an ascii decimal number to an integer. */ -int -dtoi (char const *name) -{ - int n = 0; - - while (isdigit (*name)) - { - n *= 10; - n += *name++ - '0'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -/* Convert an ascii hex number to an integer. */ -int -xtoi (char const *name) -{ - int n = 0; - - while (isxdigit (*name)) - { - n *= 0x10; - if (isdigit (*name)) - n += *name++ - '0'; - else if (islower (*name)) - n += 0xa + *name++ - 'a'; - else - n += 0xA + *name++ - 'A'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -unsigned char * -tree8_to_bits (unsigned char *bv_0, unsigned char const *hits_tree8) -{ - unsigned char* bv = bv_0; - tree8_to_bits_1 (&bv, &hits_tree8, tree8_levels); - return bv_0; -} - -void -tree8_to_bits_1 (unsigned char **bv, unsigned char const **hits_tree8, int level) -{ - int hits = *(*hits_tree8)++; - - if (--level) - { - int incr = 1 << ((level - 1) * 3); - int bit; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (bit & hits) - tree8_to_bits_1 (bv, hits_tree8, level); - else - *bv += incr; - } - } - else - *(*bv)++ |= hits; -} - -char ** -bits_to_argv (unsigned char const *bv) -{ - int const reserved_argv_slots = 3; - static char **argv_0; - char **argv; - struct idarg *ida = id_args; - struct idarg *end = &id_args[idh.idh_files]; - - if (argv_0 == NULL) - argv_0 = MALLOC (char *, idh.idh_files + reserved_argv_slots + 2); - argv = &argv_0[reserved_argv_slots]; - - for (;;) - { - int hits; - int bit; - - while (*bv == 0) - { - bv++; - ida += 8; - if (ida >= end) - goto out; - } - hits = *bv++; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (bit & hits) - { - if (!(ida->ida_flags & IDA_RELATIVE)) - { - char const *abs_name = span_file_name (anchor_dir, ida->ida_arg); - char const *rel_name = relative_file_name (PWD_buf, abs_name); - char const *short_name = (strlen (rel_name) > strlen (abs_name) - ? abs_name : rel_name); - if (!strequ (short_name, ida->ida_arg)) - ida->ida_arg = strdup (short_name); - ida->ida_flags |= IDA_RELATIVE; - } - *argv++ = ida->ida_arg; - } - if (++ida >= end) - goto out; - } - } -out: - *argv = NULL; - return &argv_0[reserved_argv_slots]; -} - -char ** -tree8_to_argv (unsigned char const *hits_tree8) -{ - memset (bits_vec, 0, bits_vec_size); - return bits_to_argv (tree8_to_bits (bits_vec, hits_tree8)); -} - -#if HAVE_TERMIOS_H - -#include <termios.h> -struct termios linemode; -struct termios charmode; -struct termios savemode; -#define GET_TTY_MODES(modes) tcgetattr (0, (modes)) -#define SET_TTY_MODES(modes) tcsetattr(0, TCSANOW, (modes)) - -#else /* not HAVE_TERMIOS_H */ - -# if HAVE_SYS_IOCTL_H -# include <sys/ioctl.h> -# endif - -# if HAVE_TERMIO_H - -# include <termio.h> -struct termio linemode; -struct termio charmode; -struct termio savemode; -#define GET_TTY_MODES(modes) ioctl (0, TCGETA, (modes)) -#define SET_TTY_MODES(modes) ioctl (0, TCSETA, (modes)) - -# else /* not HAVE_TERMIO_H */ - -# if HAVE_SGTTY_H - -# include <sgtty.h> -struct sgttyb linemode; -struct sgttyb charmode; -struct sgttyb savemode; - -# ifdef TIOCGETP -#define GET_TTY_MODES(modes) ioctl (0, TIOCGETP, (modes)) -#define SET_TTY_MODES(modes) ioctl (0, TIOCSETP, (modes)) -# else -#define GET_TTY_MODES(modes) gtty (0, (modes)) -#define SET_TTY_MODES(modes) stty (0, (modes)) -# endif - -void -savetty (void) -{ -# ifdef TIOCGETP - ioctl(0, TIOCGETP, &savemode); -# else - gtty(0, &savemode); -# endif - charmode = linemode = savemode; - - charmode.sg_flags &= ~ECHO; - charmode.sg_flags |= RAW; - - linemode.sg_flags |= ECHO; - linemode.sg_flags &= ~RAW; -} - -# endif /* not HAVE_SGTTY_H */ -# endif /* not HAVE_TERMIO_H */ -#endif /* not HAVE_TERMIOS_H */ - -#if HAVE_TERMIOS_H || HAVE_TERMIO_H - -void -savetty (void) -{ - GET_TTY_MODES (&savemode); - charmode = linemode = savemode; - - charmode.c_lflag &= ~(ECHO | ICANON | ISIG); - charmode.c_cc[VMIN] = 1; - charmode.c_cc[VTIME] = 0; - - linemode.c_lflag |= (ECHO | ICANON | ISIG); - linemode.c_cc[VEOF] = 'd' & 037; - linemode.c_cc[VEOL] = 0377; -} - -#endif - -#if HAVE_TERMIOS_H || HAVE_TERMIO_H || HAVE_SGTTY_H - -void -restoretty (void) -{ - SET_TTY_MODES (&savemode); -} - -void -linetty (void) -{ - SET_TTY_MODES (&linemode); -} - -void -chartty (void) -{ - SET_TTY_MODES (&charmode); -} - -#endif @@ -1,126 +0,0 @@ -/* misc.c -- miscellaneous common functions - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <string.h> -#include <errno.h> - -#include <config.h> -#include "strxtra.h" -#include "misc.h" - -#if !HAVE_BASENAME -char * -basename (char const *path) -{ - char *base; - - base = strrchr (path, '/'); - if (base) - return ++base; - else - return path; -} -#endif - -#if !HAVE_DIRNAME -char * -dirname (char const *path) -{ - char *base; - - base = strrchr (path, '/'); - if (base) - return strndup (path, base - path); - else - return "."; -} -#endif - -/* This is like fgets(3s), except that lines are delimited by NULs - rather than newlines. Also, we return the number of characters - gotten rather than the address of buf0. */ -int -fgets0 (char *buf0, int size, FILE * in_FILE) -{ - char *buf; - int c; - char *end; - - buf = buf0; - end = &buf[size]; - while ((c = getc (in_FILE)) > 0 && buf < end) - *buf++ = c; - *buf = '\0'; - return (buf - buf0); -} - -extern char const *program_name; - -void -filerr (char const *syscall, char const *file_name) -{ - fprintf (stderr, "%s: Cannot %s `%s' (%s)\n", program_name, syscall, file_name, strerror (errno)); -} - -int -tree8_count_levels (unsigned int cardinality) -{ - int levels = 1; - cardinality--; - while (cardinality >>= 3) - ++levels; - return levels; -} - -int -gets_past_00 (char *tok, FILE *input_FILE) -{ - int got = 0; - int c; - do - { - do - { - got++; - c = getc (input_FILE); - *tok++ = c; - } - while (c > 0); - got++; - c = getc (input_FILE); - *tok++ = c; - } - while (c > 0); - return got - 2; -} - -int -skip_past_00 (FILE *input_FILE) -{ - int skipped = 0; - do - { - do - skipped++; - while (getc (input_FILE) > 0); - skipped++; - } - while (getc (input_FILE) > 0); - return skipped; -} @@ -1,38 +0,0 @@ -/* misc.c -- defs for interface to misc.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _misc_h_ -#define _misc_h_ - -#if HAVE_BASENAME -char *basename (); -#else -char *basename __P((char const *path)); -#endif -#if HAVE_DIRNAME -char *dirname (); -#else -char *dirname __P((char const *path)); -#endif -int fgets0 __P((char *buf0, int size, FILE *in_FILE)); -void filerr __P((char const *syscall, char const *file_name)); -int tree8_count_levels __P((unsigned int cardinality)); -int gets_past_00 __P((char *tok, FILE *input_FILE)); -int skip_past_00 __P((FILE *input_FILE)); - -#endif /* not _misc_h_ */ @@ -1,187 +0,0 @@ -.TH MKID 1 -.SH NAME -mkid \- make an id database -.SH SYNOPSIS -.B mkid -.RB [ \-v ] -.RB [ \-f \^out-file] -.RB [ \-s \^directory] -.RB [ \-r \^directory] -.RB [ \-S \^scanarg] -.RB [ \-a \^arg-file] -.RB [ \- ] -.RB [ \-u ] -.RB [ files... ] -.SH DESCRIPTION -.I Mkid\^ -builds a database that stores numbers and identifier names, as well -as the names of the files in which they occur. -.I Mkid\^ -is particularly useful with large programs spread out across multiple -source files. It serves as an aid for program maintenance and as a -.I guide\^ -for perusing a program. -.PP -The following options are recognized: -.TP 10 -.B \-v -Verbose. -Report -.IR mkid 's -progress in building the database. The output comes on standard error. -.TP 10 -.BI \-f out-file\^ -Write the finished database into -.IR out-file . -.B ID\^ -is the default. -Normally the names of the files scanned are written to the database -as specified in the argument list. If the database sepcified with -.B \-f -is not located in the current directory, then the file names are -adjusted so that they are relative to the directory that the -database is located in. -.TP 10 -.BI \-s directory\^ -.TP 10 -.BI \-r directory\^ -If -.IR mkid 's -attempt to open a source-file fails, it will try to checkout the -corresponding SCCS or RCS file if present. The -.B \-s -option tells -.I mkid\^ -which directory holds the SCCS file. -Similarly, the -.B \-r -option tells -.I mkid\^ -which directory holds the RCS file. -If neither the RCS or SCCS directories are specified, -.I mkid\^ -will first look for an SCCS file in the current directory, then in -.BI sccs , -and finally in -.BI SCCS . -It will then look for an RCS file in the current directory, and finally in -.BI RCS . -.TP 10 -.BI \-a arg-file\^ -Open and read -.I arg-file\^ -in order to obtain a list of source file arguments. Source file names -must appear one to a line. -.BI \-S , -.BI \-r , -and -.BI \-s -arguments may also be placed one per line in -.IR file . -They are distinguished from source file names by their leading `-'. If a file name begins -with `-', it can be distinguished from an argument by explicitly prepending the current -directory string: `./'. -.TP 10 -.B \- -This operates in the same manner as the -.B \-a -option described above, but reads from the standard input instead of a file. -.TP 10 -.B \-u -Update an existing database. Only those files that have been modified -since the database was built will be rescanned. This is a significant -time-saver for updating large databases where few sources have changed. -.TP 10 -.B files... -If neither the -.BI \-a , -.BI \- , -nor -.BI \-u , -arguments have been specified, take file names from the command line. -.TP 10 -.BI \-S scanarg\^ -.I Mkid\^ -scans source files in order to obtain numbers and identifier names. -Since the lexical rules of languages differ, -.I mkid\^ -applies a different scanning function to each language in order -to conform to that language's lexical rules. -.I Mkid\^ -determines the source file's language by examining its filename -suffix which commonly occurs after a dot (`.'). -The -.B \-S -argument is a way of passing language specific arguments to the -scanner for that language. This argument takes a number of forms: -.br --S<suffix>=<language> -.br --S<language>-<arg> -.br -+S-<arg> -.br --S<lang>/<lang>/<filter> -.br -The first form associates a suffix with a language. -For example -S.c=vhil would cause all .c files to be scanned -as though they were language vhil rather than c. -You may find -out which suffixes are defined for which languages with the following -options: `-S<suffix>=?' tells which language is bound to -.IR <suffix> , -`-S?=<language>' tells which suffixes are bound to -.IR <language> , -and `-S?=?' reports all bindings between suffixes and languages. -.PP -The second form passes an argument for processing by the scanner -for a specific language. The third form passes an argument to -all scanners. -.PP -Finally, the <lang>/<lang>/<filter> form defines a shell command -to filter the file with. This can be used to run an arbitrary -program to filter the contents of a file before it is passed -to one of the existing language scanners. It is typically -used in conjunction with the plain text scanner. -The first <lang> defines a new language, the second <lang> -specifies an existing language whose scanner will be used, -and the remaining <filter> is an arbitrary shell command. -.PP -You may get a brief summary of the scanner-specific options for a -language by supplying the following option: `-S<language>?'. -.PP -Here is a brief summary of the options for the -.I `asm'\^ -(assembler) language. -.PP -The -.B \-u\^ -option controls whether or not the assembler scanner should strip -off a leading -.I underscore\^ -(`_') character. If your assembler prepends an -.I underscore\^ -to external symbols, then you should tell the scanner to strip it -off, so that references to the same symbol from assembly and from -a high-level language will look the same. -.PP -The -.B \-c<cc>\^ -option supplies the character(s) used to begin a comment that extends -to the end of the line. -.PP -The -.B \-a<cc>\^ -option indicates character(s) that are legal in names, in addition to -the alpha-numeric characters. If the option appears as `-a', names -that contain these characters are ignored. If it appears as `+a', these -names are added to the database. -.SH BUGS -This manual page needs to be more complete about the scanner-specific -arguments. -.PP -At the moment, the only scanners implemented are for C, assembly -language, and plain text. There ought to be scanners for Ada, Pascal, -Fortran, and Lisp. -.SH SEE ALSO -lid(1), deroff(1), detex(1). @@ -1,999 +0,0 @@ -/* mkid.c -- build an identifer database - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <sys/types.h> -#include <sys/stat.h> -#include <stdlib.h> -#include <stddef.h> -#include <unistd.h> -#include <limits.h> -#include <assert.h> -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <errno.h> - -#include <config.h> -#include "strxtra.h" -#include "alloc.h" -#include "idfile.h" -#include "token.h" -#include "bitops.h" -#include "misc.h" -#include "filenames.h" -#include "hash.h" -#include "scanners.h" - -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif - -struct summary -{ - struct token **sum_tokens; - unsigned char const *sum_hits; - struct summary *sum_parent; - union { - struct summary *u_kids[8]; /* when sum_level > 0 */ - struct idarg *u_files[8]; /* when sum_level == 0 */ - } sum_u; -#define sum_kids sum_u.u_kids -#define sum_files sum_u.u_files - unsigned long sum_tokens_size; - unsigned long sum_hits_count; - int sum_free_index; - int sum_level; -}; - -#define MAX_LEVELS 5 /* log_8 of the max # of files: log_8(32768) == 5 */ - -struct token -{ - unsigned short tok_count; - unsigned char tok_flags; - unsigned char tok_hits[MAX_LEVELS]; - char tok_name[1]; -}; - -char *bitsset __P((char *s1, char const *s2, int n)); -char *bitsclr __P((char *s1, char const *s2, int n)); -char *bitsand __P((char *s1, char const *s2, int n)); -char *bitsxor __P((char *s1, char const *s2, int n)); -int bitstst __P((char const *s1, char const *s2, int n)); -int bitsany __P((char const *s, int n)); -struct token *make_token __P((char const *name, int)); -void scan_1_file __P((char const *(*get_token) (FILE*, int*), FILE *source_FILE)); -struct idarg *parse_idargs __P((int argc, char **argv)); -struct idarg *parse_idargs_from_FILE __P((FILE *arg_FILE, struct idarg *idarg)); -void scan_files __P((struct idarg *idarg)); -void report_statistics __P((void)); - -unsigned long token_hash_1 __P((void const *key)); -unsigned long token_hash_2 __P((void const *key)); -int token_hash_cmp __P((void const *x, void const *y)); - -void write_idfile __P((char const *id_file, struct idarg *idargs)); -void bump_current_hits_signature __P((void)); -void init_hits_signature __P((int i)); -int bit_to_index __P((int bit)); -int token_qsort_cmp __P((void const *x, void const *y)); -void free_summary_tokens __P((void)); -void summarize __P((void)); -void assert_hits __P((struct summary *summary)); -void write_hits __P((FILE *fp, struct summary *summary, unsigned char const *tail_hits)); -void sign_token __P((struct token *token)); -void add_token_to_summary __P((struct summary *summary, struct token *token)); -void init_summary __P((void)); -struct summary *make_sibling_summary __P((struct summary *summary)); -int count_vec_size __P((struct summary *summary, unsigned char const *tail_hits)); -int count_buf_size __P((struct summary *summary, unsigned char const *tail_hits)); -void usage __P((void)); - -struct hash_table token_table; -struct hash_table file_table; - -/* Miscellaneous statistics */ -long input_chars; -long name_tokens; -long number_tokens; -long string_tokens; -long literal_tokens; -long comment_tokens; -long occurrences; -long heap_size; -long hits_length = 0; -long tokens_length = 0; -long output_length = 0; - -int verbose_flag = 0; -int statistics_flag = 1; - -int args_count = 0; /* # of args to save */ -int scan_count = 0; /* # of files to scan */ -int file_name_count = 0; /* # of files in database */ -int levels = 0; /* ceil(log(8)) of file_name_count */ - -unsigned char current_hits_signature[MAX_LEVELS]; -#define INIT_TOKENS_SIZE(level) (1 << ((level) + 13)) -struct summary *summary_root; -struct summary *summary_leaf; - -char PWD_buf[BUFSIZ]; /* The current working directory */ -char absolute_idfile_name[BUFSIZ]; /* The absolute name of the database */ -char const *id_file_name = IDFILE; - -char const *program_name; - -void -usage (void) -{ - fprintf (stderr, "\ -Usage: %s [-v] [-f<idfile>] [(+|-)l[<lang>]] [(+|-)S<scanarg>] [-a<argfile>] [-] [files...]\n\ - -v Verbose: print reports of progress\n\ - -a<file> Open file for arguments\n\ - - Read newline-separated args from stdin\n\ - -l<lang> Force files to be scanned as <lang> until +l<lang>\n\ - -S<lang>-<arg> Pass arg to <lang> scanner\n\ - -S.<suffix>=<lang> Scan files with .<suffix> as <lang>\n\ - -S<lang>? Print usage documentation for <lang>\n\ -\n\ -Version %s", - program_name, VERSION); -#ifdef __DATE__ - fprintf (stderr, "; Made %s %s", __DATE__, __TIME__); -#endif - fputc ('\n', stderr); - exit (1); -} - -void *sbrk (); - -int -main (int argc, char **argv) -{ - struct idarg *idarg_0; - char const *sbrk0; - - program_name = basename ((argc--, *argv++)); - init_scanners (); - - idarg_0 = parse_idargs (argc, argv); - if (idarg_0 == NULL) - { - fprintf (stderr, "Nothing to do...\n"); - return 0; - } - - sbrk0 = (char const *) sbrk (0); - hash_init (&token_table, scan_count * 64, token_hash_1, token_hash_2, token_hash_cmp); - - get_PWD (PWD_buf); - strcpy (absolute_idfile_name, span_file_name (PWD_buf, id_file_name)); - if (access (id_file_name, 06) < 0 - && (errno != ENOENT || access (dirname (id_file_name), 06) < 0)) - { - filerr ("modify", id_file_name); - return 1; - } - - init_hits_signature (0); - init_summary (); - - scan_files (idarg_0); - - if (token_table.ht_fill == 0) - return 0; - - free_summary_tokens (); - free (token_table.ht_vec); - - write_idfile (id_file_name, idarg_0); - heap_size = (char const *) sbrk (0) - sbrk0; - - if (statistics_flag) - report_statistics (); - return 0; -} - -void -scan_files (struct idarg *idarg) -{ - int keep_lang = 0; - - for ( ; idarg->ida_next; idarg = idarg->ida_next) - { - char const *(*scanner) __P((FILE*, int*)); - FILE *source_FILE; - char *arg = idarg->ida_arg; - char const *lang_name = NULL; - char const *suff; - char const *filter; - - if (idarg->ida_index < 0) - { - int op = *arg++; - switch (*arg++) - { - case 'l': - if (*arg == '\0') - { - keep_lang = 0; - lang_name = NULL; - break; - } - if (op == '+') - keep_lang = 1; - lang_name = arg; - break; - case 'S': - set_scan_args (op, strdup (arg)); - break; - default: - usage (); - } - continue; - } - if (!(idarg->ida_flags & IDA_SCAN_ME)) - goto skip; - - suff = strrchr (arg, '.'); - if (lang_name == NULL) - { - if (suff == NULL) - suff = ""; - lang_name = get_lang_name (suff); - if (lang_name == NULL) - lang_name = get_lang_name (""); - if (lang_name == NULL) - { - fprintf (stderr, "%s: No language assigned to suffix: `%s'\n", program_name, suff); - goto skip; - } - } - scanner = get_scanner (lang_name); - if (scanner == NULL) - { - fprintf (stderr, "%s: No scanner for language: `%s'\n", program_name, lang_name); - goto skip; - } - filter = get_filter (suff); - source_FILE = open_source_FILE (arg, filter); - if (source_FILE == NULL) - goto skip; - if (verbose_flag) - { - printf ("%s: ", lang_name); - printf (filter ? filter : "%s", arg); - fflush (stdout); - } - scan_1_file (scanner, source_FILE); - if (verbose_flag) - putchar ('\n'); - close_source_FILE (source_FILE, filter); - skip: - if (!keep_lang) - lang_name = NULL; - if (idarg->ida_index < file_name_count) - { - if (current_hits_signature[0] & 0x80) - summarize (); - bump_current_hits_signature (); - } - } -} - -void -report_statistics (void) -{ - printf ("Name=%ld, ", name_tokens); - printf ("Number=%ld, ", number_tokens); - printf ("String=%ld, ", string_tokens); - printf ("Literal=%ld, ", literal_tokens); - printf ("Comment=%ld\n", comment_tokens); - - printf ("Files=%d, ", scan_count); - printf ("Tokens=%ld, ", occurrences); - printf ("Bytes=%ld Kb, ", input_chars / 1024); - printf ("Heap=%ld Kb, ", heap_size / 1024); - printf ("Output=%ld (%ld tok, %ld hit)\n", output_length, tokens_length, hits_length); - - printf ("Load=%ld/%ld=%.2f, ", token_table.ht_fill, token_table.ht_size, - (double) token_table.ht_fill / (double) token_table.ht_size); - printf ("Rehash=%d, ", token_table.ht_rehashes); - printf ("Probes=%ld/%ld=%.2f, ", token_table.ht_probes, token_table.ht_lookups, - (double) token_table.ht_probes / (double) token_table.ht_lookups); - printf ("Freq=%ld/%ld=%.2f\n", occurrences, token_table.ht_fill, - (double) occurrences / (double) token_table.ht_fill); -} - -struct idarg * -parse_idargs (int argc, char **argv) -{ - struct idarg *idarg; - struct idarg *idarg_0; - char *arg; - int op; - FILE *arg_FILE = NULL; - int args_from = 0; - enum { - AF_CMDLINE = 0x1, /* file args came on command line */ - AF_FILE = 0x2, /* file args came from a file (-f<file>) */ - AF_USAGE = 0x8 - }; /* no file args necessary: usage query */ - - idarg = idarg_0 = CALLOC (struct idarg, 1); - - /* Process some arguments, and snarf-up some others for processing - later. */ - while (argc) - { - arg = (argc--, *argv++); - if (*arg != '-' && *arg != '+') - { - /* arguments are from command line (not pipe) */ - args_from |= AF_CMDLINE; - idarg->ida_arg = arg; - idarg->ida_flags = IDA_SCAN_ME; - idarg->ida_index = file_name_count++; - scan_count++; - idarg = (idarg->ida_next = CALLOC (struct idarg, 1)); - - continue; - } - op = *arg++; - switch (*arg++) - { - case '\0': - args_from |= AF_FILE; - idarg = parse_idargs_from_FILE (stdin, idarg); - break; - case 'a': - arg_FILE = fopen (arg, "r"); - if (arg_FILE == NULL) - filerr ("open", arg); - else - { - args_from |= AF_FILE; - idarg = parse_idargs_from_FILE (arg_FILE, idarg); - } - break; - case 'f': - id_file_name = arg; - break; - case 'v': - verbose_flag = 1; - break; - case 'S': - if (strchr (&arg[-2], '?')) - { - set_scan_args (op, arg); - args_from |= AF_USAGE; - } - /*FALLTHROUGH */ - case 'l': - idarg->ida_arg = &arg[-2]; - idarg->ida_index = -1; - idarg = (idarg->ida_next = CALLOC (struct idarg, 1)); - - args_count++; - break; - default: - usage (); - } - } - - if (args_from & AF_USAGE) - exit (0); - /* File args should only come from one place. Ding the user if - arguments came from multiple places, or if none were supplied at - all. */ - switch (args_from) - { - case AF_CMDLINE: - case AF_FILE: - if (file_name_count > 0) - break; - /*FALLTHROUGH */ - case 0: - fprintf (stderr, "%s: Use -u, -f<file>, or cmd-line for file args!\n", program_name); - usage (); - default: - fprintf (stderr, "%s: Use only one of: -u, -f<file>, or cmd-line for file args!\n", program_name); - usage (); - } - - if (scan_count == 0) - return NULL; - - return idarg_0; -} - - -/* Cons up a list of idarg as supplied in a file. */ -struct idarg * -parse_idargs_from_FILE (FILE *arg_FILE, struct idarg *idarg) -{ - int file_count; - char buf[BUFSIZ]; - char *arg; - - file_count = 0; - while (fgets (buf, sizeof (buf), arg_FILE)) - { - idarg->ida_arg = arg = strndup (buf, strlen (buf) - 1); - if (*arg == '+' || *arg == '-') - idarg->ida_index = -1; - else - { - idarg->ida_flags = IDA_SCAN_ME; - idarg->ida_index = file_name_count++; - scan_count++; - } - idarg = idarg->ida_next = CALLOC (struct idarg, 1); - } - return idarg; -} - -void -scan_1_file (get_token_t get_token, FILE *source_FILE) -{ - struct stat stat_buf; - struct token **slot; - char const *key; - int bytes = 0; - int total_tokens = 0; - int new_tokens = 0; - int distinct_tokens = 0; - int flags; - struct token *token; - - if (fstat (fileno (source_FILE), &stat_buf) == 0) - { - bytes = stat_buf.st_size; - input_chars += bytes; - } - - while ((key = (*get_token) (source_FILE, &flags)) != NULL) - { - if (*key == '\0') - continue; - total_tokens++; - slot = (struct token **) hash_lookup (&token_table, key - offsetof (struct token, tok_name)); - token = *slot; - if (token) - { - token->tok_flags |= flags; - if (token->tok_count < USHRT_MAX) - token->tok_count++; - if (!(token->tok_hits[0] & current_hits_signature[0])) - { - sign_token (token); - distinct_tokens++; - } - } else { - *slot = token = make_token (key, flags); - sign_token (token); - distinct_tokens++; - new_tokens++; - if (token_table.ht_fill++ >= token_table.ht_capacity) - rehash (&token_table); - } - } - if (verbose_flag) - { - printf (" uniq=%d/%d", distinct_tokens, total_tokens); - if (total_tokens != 0) - printf ("=%.2f", (double) distinct_tokens / (double) total_tokens); - printf (", new=%d/%d", new_tokens, distinct_tokens); - if (distinct_tokens != 0) - printf ("=%.2f", (double) new_tokens / (double) distinct_tokens); - } -} - -/* As the database is written, may need to adjust the file names. If - we are generating the ID file in a remote directory, then adjust - the file names to be relative to the location of the ID database. - - (This would be a common useage if you want to make a database for a - directory which you have no write access to, so you cannot create - the ID file.) */ -void -write_idfile (char const *file_name, struct idarg *idarg) -{ - struct token **tokens; - int i; - FILE *id_FILE; - struct idhead idh; - int fixup_names; - char *lsl; - int buf_size; - int vec_size; - int tok_size; - int max_buf_size = 0; - int max_vec_size = 0; - - if (verbose_flag) - printf ("Sorting tokens...\n"); - assert (summary_root->sum_hits_count == token_table.ht_fill); - tokens = REALLOC (summary_root->sum_tokens, struct token *, token_table.ht_fill); - qsort (tokens, token_table.ht_fill, sizeof (struct token *), token_qsort_cmp); - - if (verbose_flag) - printf ("Writing `%s'...\n", file_name); - lsl = strrchr (relative_file_name (PWD_buf, absolute_idfile_name), '/'); - if (lsl == NULL) - { - /* The database is in the cwd, don't adjust the names */ - fixup_names = 0; - } - else - { - /* The database is not in cwd, adjust names so they are relative - to the location of the database, make absolute_idfile_name just be the - directory path to ID. */ - fixup_names = 1; - *(lsl + 1) = '\0'; - } - id_FILE = fopen (file_name, "w+b"); - if (id_FILE == NULL) - { - filerr ("create", file_name); - exit (1); - } - idh.idh_magic[0] = IDH_MAGIC_0; - idh.idh_magic[1] = IDH_MAGIC_1; - idh.idh_version = IDH_VERSION; - idh.idh_flags = IDH_COUNTS; - - /* write out the list of pathnames */ - fseek (id_FILE, sizeof_idhead (), 0); - idh.idh_args_offset = ftell (id_FILE); - for ( ; idarg->ida_next; idarg = idarg->ida_next) - { - if (*idarg->ida_arg != '-' && fixup_names) - fputs (relative_file_name (absolute_idfile_name, span_file_name (PWD_buf, idarg->ida_arg)), id_FILE); - else - fputs (idarg->ida_arg, id_FILE); - putc ('\0', id_FILE); - } - idh.idh_files = file_name_count; - - /* write out the list of identifiers */ - - putc ('\0', id_FILE); - putc ('\0', id_FILE); - idh.idh_tokens_offset = ftell (id_FILE); - - for (i = 0; i < token_table.ht_fill; i++, tokens++) - { - struct token *token = *tokens; - occurrences += token->tok_count; - if (token->tok_flags & TOK_NUMBER) - number_tokens++; - if (token->tok_flags & TOK_NAME) - name_tokens++; - if (token->tok_flags & TOK_STRING) - string_tokens++; - if (token->tok_flags & TOK_LITERAL) - literal_tokens++; - if (token->tok_flags & TOK_COMMENT) - comment_tokens++; - - fputs (token->tok_name, id_FILE); - putc ('\0', id_FILE); - if (token->tok_count > 0xff) - token->tok_flags |= TOK_SHORT_COUNT; - putc (token->tok_flags, id_FILE); - putc (token->tok_count & 0xff, id_FILE); - if (token->tok_flags & TOK_SHORT_COUNT) - putc (token->tok_count >> 8, id_FILE); - - vec_size = count_vec_size (summary_root, token->tok_hits + levels); - buf_size = count_buf_size (summary_root, token->tok_hits + levels); - hits_length += buf_size; - tok_size = strlen (token->tok_name) + 1; - tokens_length += tok_size; - buf_size += tok_size + sizeof (token->tok_flags) + sizeof (token->tok_count) + 2; - if (buf_size > max_buf_size) - max_buf_size = buf_size; - if (vec_size > max_vec_size) - max_vec_size = vec_size; - - write_hits (id_FILE, summary_root, token->tok_hits + levels); - putc ('\0', id_FILE); - putc ('\0', id_FILE); - } - assert_hits (summary_root); - idh.idh_tokens = token_table.ht_fill; - output_length = ftell (id_FILE); - idh.idh_end_offset = output_length - 2; - idh.idh_buf_size = max_buf_size; - idh.idh_vec_size = max_vec_size; - - write_idhead (id_FILE, &idh); - fclose (id_FILE); -} - -unsigned long -token_hash_1 (void const *key) -{ - return_STRING_HASH_1 (((struct token const *) key)->tok_name); -} - -unsigned long -token_hash_2 (void const *key) -{ - return_STRING_HASH_2 (((struct token const *) key)->tok_name); -} - -int -token_hash_cmp (void const *x, void const *y) -{ - return_STRING_COMPARE (((struct token const *) x)->tok_name, - ((struct token const *) y)->tok_name); -} - -int -token_qsort_cmp (void const *x, void const *y) -{ - return_STRING_COMPARE ((*(struct token const *const *) x)->tok_name, - (*(struct token const *const *) y)->tok_name); -} - -struct token * -make_token (char const *name, int flags) -{ - struct token *token = (struct token *) malloc (sizeof (struct token) + strlen (name)); - - if (!token) - { - fprintf (stderr, "malloc failure! \n"); - exit (1); - } - token->tok_count = 1; - token->tok_flags = flags; - memset (token->tok_hits, 0, sizeof (token->tok_hits)); - strcpy (token->tok_name, name); - - return token; -} - -/* ///////////// summary stuff //////////////////////////////////////////// */ - -void -bump_current_hits_signature (void) -{ - unsigned char *hits = current_hits_signature; - while (*hits & 0x80) - *hits++ = 1; - *hits <<= 1; -} - -void -init_hits_signature (int i) -{ - unsigned char *hits = current_hits_signature; - unsigned char const *end = ¤t_hits_signature[MAX_LEVELS]; - while (hits < end) - { - *hits = 1 << (i & 7); - i >>= 3; - hits++; - } -} - -int -bit_to_index (int bit) -{ - int i = 0; - while (bit >>= 1) - i++; - return i; -} - -void -free_summary_tokens (void) -{ - struct summary *summary = summary_leaf; - while (summary != summary_root) - { - free (summary->sum_tokens); - summary = summary->sum_parent; - } -} - -void -summarize (void) -{ - unsigned char const *hits_sig = current_hits_signature; - struct summary *summary = summary_leaf; - - do - { - unsigned long count = summary->sum_hits_count; - unsigned char *hits = MALLOC (unsigned char, count + 1); - unsigned int level = summary->sum_level; - struct token **tokens = summary->sum_tokens; - unsigned long init_size = INIT_TOKENS_SIZE (summary->sum_level); - - if (verbose_flag) - { - char const *fmt; - if (count < init_size / 2) - fmt = "level %d: %ld < %ld/2\n"; - else if (count > init_size * 2) - fmt = "level %d: %ld > %ld*2\n"; - else if (count < init_size) - fmt = "level %d: %ld < %ld\n"; - else if (count > init_size) - fmt = "level %d: %ld > %ld\n"; - else - fmt = "level %d: %ld == %ld\n"; - printf (fmt, summary->sum_level, count, init_size); - } - - qsort (tokens, count, sizeof (struct token *), token_qsort_cmp); - summary->sum_hits = hits; - while (count--) - { - unsigned char *hit = &(*tokens++)->tok_hits[level]; - *hits++ = *hit; - *hit = 0; - } - *hits++ = 0; - if (summary->sum_parent) - { - free (summary->sum_tokens); - summary->sum_tokens = 0; - } - summary = summary->sum_parent; - } - while (*++hits_sig & 0x80); - summary_leaf = make_sibling_summary (summary_leaf); -} - -void -init_summary (void) -{ - unsigned long size = INIT_TOKENS_SIZE (0); - summary_root = summary_leaf = CALLOC (struct summary, 1); - summary_root->sum_tokens_size = size; - summary_root->sum_tokens = MALLOC (struct token *, size); -} - -struct summary * -make_sibling_summary (struct summary *summary) -{ - struct summary *parent = summary->sum_parent; - unsigned long size; - - if (parent == NULL) - { - levels++; - summary_root = summary->sum_parent = parent = CALLOC (struct summary, 1); - parent->sum_level = levels; - parent->sum_kids[0] = summary; - parent->sum_hits_count = summary->sum_hits_count; - parent->sum_free_index = 1; - size = INIT_TOKENS_SIZE (levels); - if (summary->sum_tokens_size >= size) - { - parent->sum_tokens_size = summary->sum_tokens_size; - parent->sum_tokens = summary->sum_tokens; - } - else - { - parent->sum_tokens_size = size; - parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size); - } - summary->sum_tokens = 0; - } - if (parent->sum_free_index == 8) - parent = make_sibling_summary (parent); - summary = CALLOC (struct summary, 1); - summary->sum_level = parent->sum_level - 1; - parent->sum_kids[parent->sum_free_index++] = summary; - summary->sum_parent = parent; - size = INIT_TOKENS_SIZE (summary->sum_level); - summary->sum_tokens_size = size; - summary->sum_tokens = MALLOC (struct token *, size); - return summary; -} - -int -count_vec_size (struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits); - - kids = summary->sum_kids; - if (*kids == NULL) - { - static char bits_per_nybble[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; - return bits_per_nybble[hits & 0xf] + bits_per_nybble[hits >> 4]; - } - else - { - int bit; - int count = 0; - --tail_hits; - for (bit = 1; bit & 0xff; bit <<= 1, ++kids) - if (bit & hits) - count += count_vec_size (*kids, tail_hits); - return count; - } -} - -int -count_buf_size (struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits); - - kids = summary->sum_kids; - if (*kids == NULL) - return 1; - else - { - int bit; - int count = 1; - --tail_hits; - for (bit = 1; bit & 0xff; bit <<= 1, ++kids) - if (bit & hits) - count += count_buf_size (*kids, tail_hits); - return count; - } -} - -void -assert_hits (struct summary* summary) -{ - struct summary **kids = summary->sum_kids; - struct summary **end = &kids[8]; - - assert (summary->sum_hits == NULL || *summary->sum_hits == 0); - - if (end[-1] == 0) - while (*--end == 0) - ; - while (kids < end) - assert_hits (*kids++); -} - -void -write_hits (FILE *fp, struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits++ : *tail_hits); - - assert (hits); - putc (hits, fp); - - kids = summary->sum_kids; - if (*kids) - { - int bit; - --tail_hits; - for (bit = 1; (bit & 0xff) && *kids; bit <<= 1, ++kids) - if (bit & hits) - write_hits (fp, *kids, tail_hits); - } -} - -void -sign_token (struct token *token) -{ - unsigned char *tok_hits = token->tok_hits; - unsigned char *hits_sig = current_hits_signature; - unsigned char *end = ¤t_hits_signature[MAX_LEVELS]; - struct summary *summary = summary_leaf; - - while (summary) - { - if (*tok_hits == 0) - add_token_to_summary (summary, token); - if (*tok_hits & *hits_sig) - break; - *tok_hits |= *hits_sig; - summary = summary->sum_parent; - tok_hits++; - hits_sig++; - } - while (hits_sig < end) - { - if (*tok_hits & *hits_sig) - break; - *tok_hits |= *hits_sig; - tok_hits++; - hits_sig++; - } -} - -void -add_token_to_summary (struct summary *summary, struct token *token) -{ - unsigned long size = summary->sum_tokens_size; - - if (summary->sum_hits_count >= size) - { - size *= 2; - summary->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size); - summary->sum_tokens_size = size; - } - summary->sum_tokens[summary->sum_hits_count++] = token; -} - -int -bitsany (char const *s, int n) -{ - while (n--) - if (*s++) - return 1; - - return 0; -} - -char * -bitsset (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ |= *s2++; - - return s1; -} - -char * -bitsclr (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ &= ~*s2++; - - return s1; -} - -#if 0 - -char * -bitsand (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ &= *s2++; - - return s1; -} - -char * -bitsxor (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ ^= *s2++; - - return s1; -} - -int -bitstst (char const *s1, char const *s2, int n) -{ - while (n--) - if (*s1++ & *s2++) - return 1; - - return 0; -} - -#endif diff --git a/mkid.info b/mkid.info deleted file mode 100644 index 72eee25..0000000 --- a/mkid.info +++ /dev/null @@ -1,1097 +0,0 @@ -This is Info file mkid.info, produced by Makeinfo-1.55 from the input -file mkid.texinfo. - -START-INFO-DIR-ENTRY -* mkid: (mkid). Identifier database utilities -END-INFO-DIR-ENTRY - - This file documents the `mkid' identifier database utilities. - - Copyright (C) 1991 Tom Horsley - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation. - - -File: mkid.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir) - -GNU `mkid' -********** - -* Menu: - -* Overview:: What is an ID database and what tools manipulate it? -* Mkid:: Mkid -* Database Query Tools:: Database Query Tools -* Iid:: Iid -* Other Tools:: Other Tools -* Command Index:: Command Index - - -File: mkid.info, Node: Overview, Next: Mkid, Prev: Top, Up: Top - -Overview -******** - - An ID database is simply a file containing a list of file names, a -list of identifiers, and a binary relation (stored as a bit matrix) -indicating which of the identifiers appear in each file. With this -database and some tools to manipulate the data, a host of tasks become -simpler and faster. You can `grep' through hundreds of files for a -name, skipping the files that don't contain the name. You can search -for all the memos containing references to a project. You can edit -every file that calls some function, adding a new required argument. -Anyone with a large software project to maintain, or a large set of -text files to organize can benefit from the ID database and the tools -that manipulate it. - - There are several programs in the ID family. The `mkid' program -scans the files, finds the identifiers and builds the ID database. The -`lid' and `aid' tools are used to generate lists of file names -containing an identifier (perhaps to recompile every file that -references a macro which just changed). The `eid' program will invoke -an editor on each of the files containing an identifier and the `gid' -program will `grep' for an identifier in the subset of files known to -contain it. The `pid' tool is used to query the path names of the -files in the database (rather than the contents). Finally, the `iid' -tool is an interactive program supporting complex queries to intersect -and join sets of file names. - -* Menu: - -* History:: History - - -File: mkid.info, Node: History, Prev: Overview, Up: Overview - -History -======= - - Greg McGary conceived of the ideas behind mkid when he began hacking -the UNIX kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first mkid-like tools -were built with shell scripts, and produced an ascii database that looks -much like the output of `lid' with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSDish kernel. Lookups -were done with the UNIX command `look', modified to handle very long -lines. - - In 1986, Greg rewrote mkid, lid, fid and idx in C to improve -performance. Database-build times were shortened by an order of -magnitude. The mkid tools were first posted to `comp.sources.unix' -September of 1987. - - Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the `iid' program. A pre-release of `mkid2' -was posted to `alt.sources' near the end of 1990. At that time Tom -wrote this texinfo manual with the encouragement the net community. -(Tom thanks Doug Scofield and Bill Leonard whom I dragooned into -helping me poorf raed and edit -- they found several problems in the -initial version.) - - In January, 1995, Greg McGary reemerged as the primary maintaner and -is hereby launching `mkid-3' whose primary new feature is an efficient -algorithm for building databases that is linear over the size of the -input text for both time and space. (The old algorithm was quadratic -for space and choked on very large source trees.) The code is now under -GPL and might become a part of the GNU system. `Mkid-3' is an interim -release, since several significant enhacements are in the works. These -include an optional coupling with GNU grep, so that grep can use an ID -database for hints; a cscope work-alike query interface; incremental -update of the ID database; and an automatic file-tree walker so you -need not explicitly supply every file name argument to the `mkid' -program. - - -File: mkid.info, Node: Mkid, Next: Database Query Tools, Prev: Overview, Up: Top - -Mkid -**** - - The `mkid' program builds the ID database. To do this it must scan -each of the files included in the database. This takes some time, but -once the work is done the query programs run very rapidly. - - The `mkid' program knows how to scan a variety of of files. For -example, it knows how to skip over comments and strings in a C program, -only picking out the identifiers used in the code. - - Identifiers are not the only thing included in the database. -Numbers are also scanned and included in the database indexed by their -binary value. Since the same number can be written many different ways -(47, 0x2f, 057 in a C program for instance), this feature allows you to -find hard coded uses of constants without regard to the radix used to -specify them. - - All the places in this document where identifiers are written about -should really mention identifiers and numbers, but that gets fairly -clumsy after a while, so you should always keep in mind that numbers are -included in the database as well as identifiers. - -* Menu: - -* Mkid Command Line Options:: Mkid Command Line Options -* Builtin Scanners:: Builtin Scanners -* Adding Your Own Scanner:: Adding Your Own Scanner -* Mkid Examples:: Mkid Examples - - -File: mkid.info, Node: Mkid Command Line Options, Next: Builtin Scanners, Prev: Mkid, Up: Mkid - -Mkid Command Line Options -========================= - - - Command: mkid [`-v'] [`-SSCANARG'] [`-aARG-FILE'] [`-'] - [`-fOUT-FILE'] [`-u'] [`files'...] - `-v' - Verbose. Mkid tells you as it scans each file and indicates - which scanner it is using. It also summarizes some statistics - about the database at the end. - - `-SSCANARG' - The `-S' option is used to specify arguments to the various - language scanners. *Note Scanner Arguments::, for details. - - `-aARG-FILE' - Name a file containing additional command line arguments (one - per line). This may be used to specify lists of file names - longer than will fit on a command line. - - `-' - A simple `-' by itself means read arguments from stdin. - - `-fOUT-FILE' - Specify the name of the database file to create. The default - name is `ID' (in the current directory), but you may specify - any name. The file names stored in the database will be - stored relative to the directory containing the database, so - if you move the database after creating it, you may have - trouble finding files unless they remain in the same relative - position. - - `-u' - The `-u' option updates an existing database by rescanning - any files that have changed since the database was written. - Unfortunately you cannot incrementally add new files to a - database. - - `files' - Remaining arguments are names of files to be scanned and - included in the database. - -* Menu: - -* Scanner Arguments:: Scanner Arguments - - -File: mkid.info, Node: Scanner Arguments, Prev: Mkid Command Line Options, Up: Mkid Command Line Options - -Scanner Arguments ------------------ - - Scanner arguments all start with `-S'. Scanner arguments are used to -tell `mkid' which language scanner to use for which files, to pass -language specific options to the individual scanners, and to get some -limited online help about scanner options. - - `Mkid' usually determines which language scanner to use on a file by -looking at the suffix of the file name. The suffix starts at the last -`.' in a file name and includes the `.' and all remaining characters -(for example the suffix of `fred.c' is `.c'). Not all files have a -suffix, and not all suffixes are bound to a specific language by mkid. -If `mkid' cannot determine what language a file is, it will use the -language bound to the `.default' suffix. The plain text scanner is -normally bound to `.default', but the `-S' option can be used to change -any language bindings. - - There are several different forms for scanner options: -`-S.<SUFFIX>=<LANGUAGE>' - `Mkid' determines which language scanner to use on a file by - examining the file name suffix. The `.' is part of the suffix and - must be specified in this form of the `-S' option. For example - `-S.y=c' tells `mkid' to use the `c' language scanner for all - files ending in the `.y' suffix. - -`-S.<SUFFIX>=?' - `Mkid' has several built in suffixes it already recognizes. Passing - a `?' will cause it to print the language it will use to scan files - with that suffix. - -`-S?=<LANGUAGE>' - This form will print which suffixes are scanned with the given - language. - -`-S?=?' - This prints all the suffix==>language bindings recognized by - `mkid'. - -`-S<LANGUAGE>-<ARG>' - Each language scanner accepts scanner dependent arguments. This - form of the `-S' option is used to pass arbitrary arguments to the - language scanners. - -`-S<LANGUAGE>?' - Passing a `?' instead of a language option will print a brief - summary of the options recognized by the specified language - scanner. - -`-S<NEW LANGUAGE>/<BUILTIN LANGUAGE>/<FILTER COMMAND>' - This form specifies a new language defined in terms of a builtin - language and a shell command that will be used to filter the file - prior to passing on to the builtin language scanner. - - -File: mkid.info, Node: Builtin Scanners, Next: Adding Your Own Scanner, Prev: Mkid Command Line Options, Up: Mkid - -Builtin Scanners -================ - - If you run `mkid -S?=?' you will find bindings for a number of -languages; unfortunately pascal, though mentioned in the list, is not -actually supported. The supported languages are documented below (1). - -* Menu: - -* C:: C -* Plain Text:: Plain Text -* Assembler:: Assembler - - ---------- Footnotes ---------- - - (1) This is not strictly true -- vhil is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored - - -File: mkid.info, Node: C, Next: Plain Text, Prev: Builtin Scanners, Up: Builtin Scanners - -C -- - - The C scanner is probably the most popular. It scans identifiers out -of C programs, skipping over comments and strings in the process. The -normal `.c' and `.h' suffixes are automatically recognized as C -language, as well as the more obscure `.y' (yacc) and `.l' (lex) -suffixes. - - The `-S' options recognized by the C scanner are: - -`-Sc-s<CHARACTER>' - Allow the specified <CHARACTER> in identifiers (some dialects of C - allow `$' in identifiers, so you could say `-Sc-s$' to accept that - dialect). - -`-Sc-u' - Don't strip leading underscores from identifier names (this is the - default mode of operation). - -`-Sc+u' - Do strip leading underscores from identifier names (I don't know - why you would want to do this in C programs, but the option is - available). - - -File: mkid.info, Node: Plain Text, Next: Assembler, Prev: C, Up: Builtin Scanners - -Plain Text ----------- - - The plain text scanner is designed for scanning documents. This is -typically the scanner used when adding custom scanners, and several -custom scanners are built in to `mkid' and defined in terms of filters -and the text scanner. A troff scanner runs `deroff' over the file then -feeds the result to the text scanner. A compressed man page scanner -runs `pcat' piped into `col -b', and a TeX scanner runs `detex'. - - Options: - -`-Stext+a<CHARACTER>' - Include the specified character in identifiers. By default, - standard C identifiers are recognized. - -`-Stext-a<CHARACTER>' - Exclude the specified character from identifiers. - -`-Stext+s<CHARACTER>' - Squeeze the specified character out of identifiers. By default, the - characters `'', `-', and `.' are squeezed out of identifiers. - This generates transformations like FRED'S==>FREDS or - A.S.P.C.A.==>ASPCA. - -`-Stext-s<CHARACTER>' - Do not squeeze out the specified character. - - -File: mkid.info, Node: Assembler, Prev: Plain Text, Up: Builtin Scanners - -Assembler ---------- - - Assemblers come in several flavors, so there are several options to -control scanning of assembly code: - -`-Sasm-c<CHARACTER>' - The specified character starts a comment that extends to end of - line (in many assemblers this is a semicolon or number sign -- - there is no default value for this). - -`-Sasm+u' - Strip the leading underscores off identifiers (the default - behavior). - -`-Sasm-u' - Do not strip the leading underscores. - -`-Sasm+a<CHARACTER>' - The specified character is allowed in identifiers. - -`-Sasm-a<CHARACTER>' - The specified character is allowed in identifiers, but any - identifier containing that character is ignored (often a `.' or `@' - will be used to indicate an internal temp label, you may want to - ignore these). - -`-Sasm+p' - Recognize C preprocessor directives in assembler source (default). - -`-Sasm-p' - Do not recognize C preprocessor directives in assembler source. - -`-Sasm+C' - Skip over C style comments in assembler source (default). - -`-Sasm-C' - Do not skip over C style comments in assembler source. - - -File: mkid.info, Node: Adding Your Own Scanner, Next: Mkid Examples, Prev: Builtin Scanners, Up: Mkid - -Adding Your Own Scanner -======================= - - There are two ways to add new scanners to `mkid'. The first is to -modify the code in `getscan.c' and add a new `scan-*.c' file with the -code for your scanner. This is not too hard, but it requires relinking -and installing a new version of `mkid', which might be inconvenient, -and would lead to the proliferation of `mkid' versions. - - The second technique uses the `-S<lang>/<lang>/<filter>' form of -the `-S' option to specify a new language scanner. In this form the -first language is the name of the new language to be defined, the -second language is the name of an existing language scanner to be -invoked on the output of the filter command specified as the third -component of the `-S' option. - - The filter is an arbitrary shell command. Somewhere in the filter -string, a `%s' should occur. This `%s' is replaced by the name of the -source file being scanned, the shell command is invoked, and whatever -comes out on STDOUT is scanned using the builtin scanner. - - For example, no scanner is provided for texinfo files (like this -one). If I wished to index the contents of this file, but avoid -indexing the texinfo directives, I would need a filter that stripped -out the texinfo directives, but left the remainder of the file intact. -I could then use the plain text scanner on the remainder. A quick way -to specify this might be: - - '-S/texinfo/text/sed s,@[a-z]*,,g < %s' - - This defines a new language scanner (TEXINFO) defined in terms of a -`sed' command to strip out texinfo directives (at signs followed by -letters). Once the directives are stripped, the remaining text is run -through the plain text scanner. - - This is just an example, to do a better job I would actually need to -delete some lines (such as those beginning with `@end') as well as -deleting the `@' directives embedded in the text. - - -File: mkid.info, Node: Mkid Examples, Prev: Adding Your Own Scanner, Up: Mkid - -Mkid Examples -============= - - The simplest example of `mkid' is something like: - - mkid *.[chy] - - This will build an ID database indexing all the identifiers and -numbers in the `.c', `.h', and `.y' files in the current directory. -Because those suffixes are already known to `mkid' as C language files, -no other special arguments are required. - - From a simple example, lets go to a more complex one. Suppose you -want to build a database indexing the contents of all the MAN pages. -Since `mkid' already knows how to deal with `.z' files, let's assume -your system is using the `compress' program to store compressed -cattable versions of the MAN pages. The `compress' program creates -files with a `.Z' suffix, so `mkid' will have to be told how to scan -`.Z' files. The following code shows how to combine the `find' command -with the special scanner arguments to `mkid' to generate the required ID -database: - - cd /usr/catman - find . -name '*.Z' -print | mkid '-Sman/text/uncompress -c < %s' -S.Z=man - - - This example first switches to the `/usr/catman' directory where the -compressed MAN pages are stored. The `find' command then finds all the -`.Z' files under that directory and prints their names. This list is -piped into the `mkid' program. The `-' argument by itself (at the end -of the line) tells `mkid' to read arguments (in this case the list of -file names) from STDIN. The first `-S' argument defines a new language -(MAN) in terms of the `uncompress' utility and the existing text -scanner. The second `-S' argument tells `mkid' to treat all `.Z' files -as language MAN. In practice, you might find the `mkid' arguments need -to be even more complex, something like: - - mkid '-Sman/text/uncompress -c < %s | col -b' -S.Z=man - - - This will take the additional step of getting rid of any underlining -and backspacing which might be present in the compressed MAN pages. - - -File: mkid.info, Node: Database Query Tools, Next: Iid, Prev: Mkid, Up: Top - -Database Query Tools -******************** - - The ID database is useless without database query tools. The -remainder of this document describes those tools. - - The `lid', `gid', `aid', `eid', and `pid' programs are all the same -program installed with links to different names. The name used to -invoke the program determines how it will act. - - The `iid' program is an interactive query shell that sits on top of -the other query tools. - -* Menu: - -* Common Options:: Common command line options -* Patterns:: Identifier pattern matching -* Lid:: Look up identifiers -* Aid:: Case insensitive lid -* Gid:: Grep for identifiers -* Eid:: Edit files with matching identifiers -* Pid:: Look up path names in database - - -File: mkid.info, Node: Common Options, Next: Patterns, Prev: Database Query Tools, Up: Database Query Tools - -Common Options -============== - - Since many of the programs are really links to one common program, it -is only reasonable to expect that most of the query tools would share -common command line options. Not all options make sense for all -programs, but they are all described here. The description of each -program gives the options that program uses. - -`-f<FILE>' - Read the database specified by <FILE>. Normally the tools look for - a file named `ID' in either the current directory or in any of the - directories above the current directory. This means you can keep a - global `ID' database in the root of a large source tree and use - the query tools from anywhere within that tree. - -`-r<DIRECTORY>' - The query tools usually assume the file names in the database are - relative to the directory holding the database. The `-r' option - tells the tools to look for the files relative to <DIRECTORY> - regardless of the location of the database. - -`-c' - This is shorthand for `-r`pwd`'. It tells the query tools to assume - the file names are stored relative to the current working - directory. - -`-e' - Force the pattern arguments to be treated as regular expressions. - Normally the query tools attempt to guess if the patterns are - regular expressions or simple identifiers by looking for special - characters in the pattern. - -`-w' - Force the pattern arguments to be treated as simple words even if - they contain special regular expression characters. - -`-k' - Normally the query tools that generate lists of file names attempt - to compress the lists using the `csh' brace notation. This option - suppresses the file name compression and outputs each name in full. - (This is particularly useful if you are a `ksh' user and want to - feed the list of names to another command -- the `-k' option comes - from the `k' in `ksh'). - -`-g' - It is possible to build the query tools so the `-k' option is the - default behavior. If this is the case for your system, the `-g' - option turns on the globbing of file names using the `csh' brace - notation. - -`-n' - Normally the query tools that generate lists of file names also - list the matching identifier at the head of the list of names. - This is irritating if you want just a list of names to feed to - another command, so the `-n' option suppresses the identifier and - lists only file names. - -`-b' - This option is only used by the `pid' tool. It restricts `pid' to - pattern match only the basename part of a file name. Normally the - absolute file name is matched against the pattern. - -`-d -o -x -a' - These options may be used in any combination to limit the radix of - numeric matches. The `-d' option will allow matches on decimal - numbers, `-o' on octal, and `-x' on hexadecimal numbers. The `-a' - option is shorthand for specifying all three. Any combination of - these options may be used. - -`-m' - Merge multiple lines of output into a single line. (If your query - matches more than one identifier the default action is to generate - a separate line of output for each matching identifier). - -`-s' - Search for identifiers that appear only once in the database. This - helps to locate identifiers that are defined but never used. - -`-u<NUMBER>' - List identifiers that conflict in the first <NUMBER> characters. - This could be useful porting programs to brain-dead computers that - refuse to support long identifiers, but your best long term option - is to set such computers on fire. - - -File: mkid.info, Node: Patterns, Next: Lid, Prev: Common Options, Up: Database Query Tools - -Patterns -======== - - You can attempt to match either simple identifiers or numbers in a -query, or you can specify a regular expression pattern which may match -many different identifiers in the database. The query programs use -either REGEX and REGCMP or RE_COMP and RE_EXEC, depending on which one -is available in the library on your system. These might not always -support the exact same regular expression syntax, so consult your local -MAN pages to find out. Any regular expression routines should support -the following syntax: - -`.' - A dot matches any character. - -`[ ]' - Brackets match any of the characters specified within the - brackets. You can match any characters *except* the ones in - brackets by typing `^' as the first character. A range of - characters can be specified using `-'. - -`*' - An asterisk means repeat the previous pattern zero or more times. - -`^' - An `^' at the beginning of a pattern means the pattern must match - starting at the first character of the identifier. - -`$' - A `$' at the end of the pattern means the pattern must match ending - at the last character in the identifier. - - -File: mkid.info, Node: Lid, Next: Aid, Prev: Patterns, Up: Database Query Tools - -Lid -=== - - - Command: lid [`-f<FILE>'] [`-u<N>'] [`-r<DIR>'] [`-ewdoxamskgnc'] - PATTERNS... - - The `lid' program stands for LOOKUP IDENTIFIER. It searches the -database for any identifiers matching the patterns and prints the names -of the files that match each pattern. The exact format of the output -depends on the options. - - -File: mkid.info, Node: Aid, Next: Gid, Prev: Lid, Up: Database Query Tools - -Aid -=== - - - Command: aid [`-f<FILE>'] [`-u<N>'] [`-r<DIR>'] [`-doxamskgnc'] - PATTERNS... - - The `aid' command is an abbreviation for APROPOS IDENTIFIER. The -patterns cannot be regular expressions, but it looks for them using a -case insensitive match, and any pattern that is a substring of an -identifier in the database will match that identifier. - - For example `aid get' might match the identifiers `fgets', -`GETLINE', and `getchar'. - - -File: mkid.info, Node: Gid, Next: Eid, Prev: Aid, Up: Database Query Tools - -Gid -=== - - - Command: gid [`-f<FILE>'] [`-u<N>'] [`-r<DIR>'] [`-doxasc'] - PATTERNS... - - The `gid' command stands for GREP FOR IDENTIFIERS. It finds -identifiers in the database that match the specified patterns, then -`greps' for those identifiers in just the set of files containing -matches. In a large source tree, this saves a fantastic amount of time. - - There is an EMACS interface to this program (*note GNU Emacs -Interface::.). If you are an EMACS user, you will probably prefer the -EMACS interface over the `eid' tool. - - -File: mkid.info, Node: Eid, Next: Pid, Prev: Gid, Up: Database Query Tools - -Eid -=== - - - Command: eid [`-f<FILE>'] [`-u<N>'] [`-r<DIR>'] [`-doxasc'] - PATTERNS... - - The `eid' command allows you to invoke an editor on each file -containing a matching pattern. The `EDITOR' environment variable is the -name of the program to be invoked. If the specified editor can accept -an initial search argument on the command line, you can use the -`EIDARG', `EIDLDEL', and `EIDRDEL' environment variables to specify the -form of that argument. - -`EDITOR' - The name of the editor program to invoke. - -`EIDARG' - A printf string giving the form of the argument to pass containing - the initial search string (the matching identifier). For `vi' it - should be set to `+/%s/''. - -`EIDLDEL' - A string giving the regular expression pattern that forces a match - at the beginning (left end) of a word. This string is inserted in - front of the matching identifier when composing the search - argument. For `vi', this should be `\<'. - -`EIDRDEL' - The matching right end word delimiter. For `vi', use `\>'. - - -File: mkid.info, Node: Pid, Prev: Eid, Up: Database Query Tools - -Pid -=== - - - Command: pid [`-f<FILE>'] [`-u<N>'] [`-r<DIR>'] [`-ebkgnc'] - PATTERNS... - - The `pid' tool is unlike all the other tools. It matches the -patterns against the file names in the database rather than the -identifiers in the database. Patterns are treated as shell wild card -patterns unless the `-e' option is given, in which case full regular -expression matching is done. - - The wild card pattern is matched against the absolute path name of -the file. Most shells treat slashes `/' and file names that start with -dot `.' specially, `pid' does not do this. It simply attempts to match -the absolute path name string against the wild card pattern. - - The `-b' option restricts the pattern matching to the base name of -the file (all the leading directory names are stripped prior to pattern -matching). - - -File: mkid.info, Node: Iid, Next: Other Tools, Prev: Database Query Tools, Up: Top - -Iid -*** - - - Command: iid [`-a'] [`-c<COMMAND>'] [`-H'] - `-a' - Normally `iid' uses the `lid' command to search for names. - If you give the `-a' option on the command line, then it will - use `aid' as the default search engine. - - `-c<COMMAND>' - In normal operation, `iid' starts up and prompts you for - commands used to build sets of files. The `-c' option is used - to pass a single query command to `iid' which it then - executes and exits. - - `-H' - The `-H' option prints a short help message and exits. To get - more help use the `help' command from inside `iid'. - - The `iid' program is an interactive ID query tool. It operates by -running the other query programs (such as `lid' and `aid') and creating -sets of file names returned by these queries. It also provides -operators for `anding' and `oring' these sets to create new sets. - - The `PAGER' environment variable names the program `iid' uses to -display files. If you use `emacs', you might want to set `PAGER' so it -invokes the `emacsclient' program. Check the file `lisp/server.el' in -the emacs source tree for documentation on this. It is useful not only -with X windows, but also when running `iid' from an emacs shell buffer. -There is also a somewhat spiffier version called gnuserv by Andy Norman -(`ange%anorman@hplabs.hp.com') which appeared in `comp.emacs' sometime -in 1989. - -* Menu: - -* Ss and Files commands:: Ss and Files commands -* Sets:: Sets -* Show:: Show -* Begin:: Begin -* Help:: Help -* Off:: Off -* Shell Commands as Queries:: Shell Commands as Queries -* Shell Escape:: Shell Escape - - -File: mkid.info, Node: Ss and Files commands, Next: Sets, Prev: Iid, Up: Iid - -Ss and Files commands -===================== - - The primary query commands are `ss' (for select sets) and `files' -(for show file names). These commands both take a query expression as an -argument. - - - Subcommand: ss QUERY - The `ss' command runs a query and builds a set (or sets) of file - names. The result is printed as a summary of the sets constructed - showing how many file names are in each set. - - - Subcommand: files QUERY - The `files' command is like the `ss' command, but rather than - printing a summary, it displays the full list of matching file - names. - - - Subcommand: f QUERY - The `f' command is merely a shorthand notation for `files'. - - Database queries are simple expressions with operators like `and' -and `or'. Parentheses can be used to group operations. The complete set -of operators is summarized below: - -`PATTERN' - Any pattern not recognized as one of the keywords in this table is - treated as an identifier to be searched for in the database. It is - passed as an argument to the default search program (normally - `lid', but `aid' is used if the `-a' option was given when `iid' - was started). The result of this operation is a set of file - names, and it is assigned a unique set number. - -`lid' - `lid' is a keyword. It is used to invoke `lid' with the list of - identifiers following it as arguments. This forces the use of `lid' - regardless of the state of the `-a' option (*note Lid::.). - -`aid' - The `aid' keyword is like the `lid' keyword, but it forces the use - of the `aid' program (*note Aid::.). - -`match' - The `match' operator invokes the `pid' program to do pattern - matching on file names rather than identifiers. The set generated - contains the file names that match the specified patterns (*note - Pid::.). - -`or' - The `or' operator takes two sets of file names as arguments and - generates a new set containing all the files from both sets. - -`and' - The `and' operator takes two sets of file names and generates a new - set containing only files from both sets. - -`not' - The `not' operator inverts a set of file names, producing the set - of all files not in the input set. - -`set number' - A set number consists of the letter `s' followed immediately by a - number. This refers to one of the sets created by a previous - query operation. During one `iid' session, each query generates a - unique set number, so any previously generated set may be used as - part of any new query by referring to the set number. - - The `not' operator has the highest precedence with `and' coming in -the middle and `or' having the lowest precedence. The operator names -are recognized using case insensitive matching, so `AND', `and', and -`aNd' are all the same as far as `iid' is concerned. If you wish to use -a keyword as an operand to one of the query programs, you must enclose -it in quotes. Any patterns containing shell special characters must -also be properly quoted or escaped, since the query commands are run by -invoking them with the shell. - - Summary of query expression syntax: - - A <query> is: - <set number> - <identifier> - lid <identifier list> - aid <identifier list> - match <wild card list> - <query> or <query> - <query> and <query> - not <query> - ( <query> ) - - -File: mkid.info, Node: Sets, Next: Show, Prev: Ss and Files commands, Up: Iid - -Sets -==== - - - Subcommand: sets - - The `sets' command displays all the sets created so far. Each one is -described by the query command that generated it. - - -File: mkid.info, Node: Show, Next: Begin, Prev: Sets, Up: Iid - -Show -==== - - - Subcommand: show SET - - - Subcommand: p SET - - The `show' and `p' commands are equivalent. They both accept a set -number as an argument and run the program given in the `PAGER' -environment variable with the file names in that set as arguments. - - -File: mkid.info, Node: Begin, Next: Help, Prev: Show, Up: Iid - -Begin -===== - - - Subcommand: begin DIRECTORY - - - Subcommand: b DIRECTORY - - The `begin' command (and its abbreviated version `b') is used to -begin a new `iid' session in a different directory (which presumably -contains a different database). It flushes all the sets created so far -and switches to the specified directory. It is equivalent to exiting -`iid', changing directories in the shell, and running `iid' again. - - -File: mkid.info, Node: Help, Next: Off, Prev: Begin, Up: Iid - -Help -==== - - - Subcommand: help - - - Subcommand: h - - - Subcommand: ? - - The `help', `h', and `?' command are three different ways to ask for -help. They all invoke the `PAGER' program to display a short help file. - - -File: mkid.info, Node: Off, Next: Shell Commands as Queries, Prev: Help, Up: Iid - -Off -=== - - - Subcommand: off - - - Subcommand: quit - - - Subcommand: q - - These three command (or just an end of file) all cause `iid' to exit. - - -File: mkid.info, Node: Shell Commands as Queries, Next: Shell Escape, Prev: Off, Up: Iid - -Shell Commands as Queries -========================= - - When the first word on an `iid' command is not recognized as a -builtin `iid' command, `iid' assumes the command is a shell command -which will write a list of file names to STDOUT. This list of file -names is used to generate a new set of files. - - Any set numbers that appear as arguments to this command are expanded -into lists of file names prior to running the command. - - -File: mkid.info, Node: Shell Escape, Prev: Shell Commands as Queries, Up: Iid - -Shell Escape -============ - - If a command starts with a bang (`!') character, the remainder of -the line is run as a shell command. Any set numbers that appear as -arguments to this command are expanded into lists of file names prior to -running the command. - - -File: mkid.info, Node: Other Tools, Next: Command Index, Prev: Iid, Up: Top - -Other Tools -*********** - - This chapter describes some support tools that work with the other ID -programs. - -* Menu: - -* GNU Emacs Interface:: Using gid.el -* Fid:: List identifiers in a file. -* Idx:: Extract identifiers from source file. - - -File: mkid.info, Node: GNU Emacs Interface, Next: Fid, Prev: Other Tools, Up: Other Tools - -GNU Emacs Interface -=================== - - The source distribution comes with a file named `gid.el'. This is a -GNU emacs interface to the `gid' tool. If you put the file where emacs -can find it (somewhere in your `EMACSLOADPATH') and put `(autoload 'gid -"gid" nil t)' in your `.emacs' file, you will be able to invoke the -`gid' function using `M-x gid'. - - This function prompts you with the word the cursor is on. If you want -to search for a different pattern, simply delete the line and type the -pattern of interest. - - It runs `gid' in a `*compilation*' buffer, so the normal -`next-error' function can be used to visit all the places the -identifier is found (*note Compilation: (emacs)Compilation.). - - -File: mkid.info, Node: Fid, Next: Idx, Prev: GNU Emacs Interface, Up: Other Tools - -Fid -=== - - - Command: fid [`-f<FILE>'] FILE1 [FILE2] - `-f<FILE>' - Look in the named database. - - `FILE1' - List the identifiers contained in file1 according to the - database. - - `FILE2' - If a second file is given, list only the identifiers both - files have in common. - - The `fid' program provides an inverse query. Instead of listing -files containing some identifier, it lists the identifiers found in a -file. - - -File: mkid.info, Node: Idx, Prev: Fid, Up: Other Tools - -Idx -=== - - - Command: idx [`-s<DIRECTORY>'] [`-r<DIRECTORY>'] [`-S<SCANARG>'] - FILES... - The `-s', `-r', and `-S' arguments to `idx' are identical to the - same arguments on `mkid' (*note Mkid Command Line Options::.). - - The `idx' command is more of a test frame for scanners than a tool -designed to be independently useful. It takes the same scanner arguments -as `mkid', but rather than building a database, it prints the -identifiers found to STDOUT, one per line. You can use it to try out a -scanner on a sample file to make sure it is extracting the identifiers -you believe it should extract. - - -File: mkid.info, Node: Command Index, Prev: Other Tools, Up: Top - -Command Index -************* - -* Menu: - -* ?: Help. -* aid: Aid. -* b: Begin. -* begin: Begin. -* eid: Eid. -* f: Ss and Files commands. -* fid: Fid. -* files: Ss and Files commands. -* gid: Gid. -* h: Help. -* help: Help. -* idx: Idx. -* iid: Iid. -* lid: Lid. -* mkid: Mkid Command Line Options. -* off: Off. -* p: Show. -* pid: Pid. -* q: Off. -* quit: Off. -* sets: Sets. -* show: Show. -* ss: Ss and Files commands. - - - -Tag Table: -Node: Top913 -Node: Overview1321 -Node: History2885 -Node: Mkid5050 -Node: Mkid Command Line Options6386 -Node: Scanner Arguments8147 -Node: Builtin Scanners10502 -Node: C11167 -Node: Plain Text12062 -Node: Assembler13130 -Node: Adding Your Own Scanner14318 -Node: Mkid Examples16295 -Node: Database Query Tools18272 -Node: Common Options19213 -Node: Patterns22929 -Node: Lid24171 -Node: Aid24593 -Node: Gid25124 -Node: Eid25744 -Node: Pid26868 -Node: Iid27758 -Node: Ss and Files commands29628 -Node: Sets33091 -Node: Show33331 -Node: Begin33659 -Node: Help34146 -Node: Off34427 -Node: Shell Commands as Queries34657 -Node: Shell Escape35183 -Node: Other Tools35525 -Node: GNU Emacs Interface35902 -Node: Fid36708 -Node: Idx37260 -Node: Command Index37935 - -End Tag Table diff --git a/mkid.texinfo b/mkid.texinfo deleted file mode 100644 index 076b313..0000000 --- a/mkid.texinfo +++ /dev/null @@ -1,957 +0,0 @@ -\input texinfo -@comment %**start of header (This is for running Texinfo on a region.) -@setfilename mkid.info -@settitle The ID Database -@setchapternewpage odd -@comment %**end of header (This is for running Texinfo on a region.) - -@include version.texi - -@ifinfo -@format -START-INFO-DIR-ENTRY -* mkid: (mkid). Identifier database utilities -END-INFO-DIR-ENTRY -@end format -@end ifinfo - -@ifinfo -This file documents the @code{mkid} identifier database utilities. - -Copyright (C) 1991 Tom Horsley - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end ifinfo - -@titlepage -@title The MKID Identifier Database, version @value{VERSION} -@subtitle A Simple, Fast, High-Capacity Cross-Referencer -@subtitle lid, gid, aid, eid, pid, iid -@author by Tom Horsley - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1991 Tom Horsley - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end titlepage - -@ifinfo -@node Top, Overview, (dir), (dir) -@top GNU @code{mkid} - -@menu -* Overview:: What is an ID database and what tools manipulate it? -* Mkid:: Mkid -* Database Query Tools:: Database Query Tools -* Iid:: Iid -* Other Tools:: Other Tools -* Command Index:: Command Index -@end menu - -@end ifinfo - -@node Overview, Mkid, Top, Top -@chapter Overview -@cindex Reference to First Chapter -An ID database is simply a file containing a list of file names, a list of -identifiers, and a binary relation (stored as a bit matrix) indicating which -of the identifiers appear in each file. With this database and some tools -to manipulate the data, a host of tasks become simpler and faster. You can -@code{grep} through hundreds of files for a name, skipping the files that -don't contain the name. You can search for all the memos containing -references to a project. You can edit every file that calls some function, -adding a new required argument. Anyone with a large software project to -maintain, or a large set of text files to organize can benefit from the ID -database and the tools that manipulate it. - -There are several programs in the ID family. The @code{mkid} program -scans the files, finds the identifiers and builds the ID database. The -@code{lid} and @code{aid} tools are used to generate lists of file names -containing an identifier (perhaps to recompile every file that -references a macro which just changed). The @code{eid} program will -invoke an editor on each of the files containing an identifier and the -@code{gid} program will @code{grep} for an identifier in the subset of -files known to contain it. The @code{pid} tool is used to query the -path names of the files in the database (rather than the contents). -Finally, the @code{iid} tool is an interactive program supporting -complex queries to intersect and join sets of file names. - -@menu -* History:: History -@end menu - -@node History, , Overview, Overview -@section History -Greg McGary conceived of the ideas behind mkid when he began hacking -the UNIX kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first mkid-like tools -were built with shell scripts, and produced an ascii database that looks -much like the output of `lid' with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSDish kernel. Lookups were -done with the UNIX command @code{look}, modified to handle very long lines. - -In 1986, Greg rewrote mkid, lid, fid and idx in C to improve -performance. Database-build times were shortened by an order of -magnitude. The mkid tools were first posted to @file{comp.sources.unix} -September of 1987. - -Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the @code{iid} program. A pre-release of -@code{mkid2} was posted to @file{alt.sources} near the end of 1990. At -that time Tom wrote this texinfo manual with the encouragement the net -community. (Tom thanks Doug Scofield and Bill Leonard whom I dragooned -into helping me poorf raed and edit --- they found several problems in -the initial version.) - -In January, 1995, Greg McGary reemerged as the primary maintaner and is -hereby launching @code{mkid-3} whose primary new feature is an efficient -algorithm for building databases that is linear over the size of the -input text for both time and space. (The old algorithm was quadratic -for space and choked on very large source trees.) The code is now under -GPL and might become a part of the GNU system. @code{Mkid-3} is an -interim release, since several significant enhacements are in the works. -These include an optional coupling with GNU grep, so that grep can use -an ID database for hints; a cscope work-alike query interface; -incremental update of the ID database; and an automatic file-tree walker -so you need not explicitly supply every file name argument to -the @code{mkid} program. - -@node Mkid, Database Query Tools, Overview, Top -@chapter Mkid -The @code{mkid} program builds the ID database. To do this it must scan -each of the files included in the database. This takes some time, but -once the work is done the query programs run very rapidly. - -The @code{mkid} program knows how to scan a variety of of files. For -example, it knows how to skip over comments and strings in a C program, -only picking out the identifiers used in the code. - -Identifiers are not the only thing included in the database. -Numbers are also scanned and included in the database indexed by -their binary value. Since the same number can be written many -different ways (47, 0x2f, 057 in a C program for instance), this -feature allows you to find hard coded uses of constants without -regard to the radix used to specify them. - -All the places in this document where identifiers are written about -should really mention identifiers and numbers, but that gets fairly -clumsy after a while, so you should always keep in mind that numbers are -included in the database as well as identifiers. - -@menu -* Mkid Command Line Options:: Mkid Command Line Options -* Builtin Scanners:: Builtin Scanners -* Adding Your Own Scanner:: Adding Your Own Scanner -* Mkid Examples:: Mkid Examples -@end menu - -@node Mkid Command Line Options, Builtin Scanners, Mkid, Mkid -@section Mkid Command Line Options -@deffn Command mkid [@code{-v}] [@code{-S@var{scanarg}}] [@code{-a@var{arg-file}}] [@code{-}] [@code{-f@var{out-file}}] [@code{-u}] [@code{files}@dots{}] -@table @code -@item -v -Verbose. Mkid tells you as it scans each file and indicates which scanner -it is using. It also summarizes some statistics about the database at -the end. -@item -S@var{scanarg} -The @code{-S} option is used to specify arguments to the various language -scanners. @xref{Scanner Arguments}, for details. -@item -a@var{arg-file} -Name a file containing additional command line arguments (one per line). This -may be used to specify lists of file names longer than will fit on a command -line. -@item - -A simple @code{-} by itself means read arguments from stdin. -@item -f@var{out-file} -Specify the name of the database file to create. The default name is @code{ID} -(in the current directory), but you may specify any name. The file names -stored in the database will be stored relative to the directory containing -the database, so if you move the database after creating it, you may have -trouble finding files unless they remain in the same relative position. -@item -u -The @code{-u} option updates an existing database by rescanning any files -that have changed since the database was written. Unfortunately you cannot -incrementally add new files to a database. -@item files -Remaining arguments are names of files to be scanned and included in the -database. -@end table -@end deffn - -@menu -* Scanner Arguments:: Scanner Arguments -@end menu - -@node Scanner Arguments, , Mkid Command Line Options, Mkid Command Line Options -@subsection Scanner Arguments -Scanner arguments all start with @code{-S}. Scanner arguments are used to tell -@code{mkid} which language scanner to use for which files, to pass language -specific options to the individual scanners, and to get some limited -online help about scanner options. - -@code{Mkid} usually determines which language scanner to use on a file -by looking at the suffix of the file name. The suffix starts at the last -@samp{.} in a file name and includes the @samp{.} and all remaining -characters (for example the suffix of @file{fred.c} is @file{.c}). Not -all files have a suffix, and not all suffixes are bound to a specific -language by mkid. If @code{mkid} cannot determine what language a file -is, it will use the language bound to the @file{.default} suffix. The -plain text scanner is normally bound to @file{.default}, but the -@code{-S} option can be used to change any language bindings. - -There are several different forms for scanner options: -@table @code -@item -S.@var{<suffix>}=@var{<language>} -@code{Mkid} determines which language scanner to use on a file by examining the -file name suffix. The @samp{.} is part of the suffix and must be specified -in this form of the @code{-S} option. For example @samp{-S.y=c} tells -@code{mkid} to use the @samp{c} language scanner for all files ending in -the @samp{.y} suffix. -@item -S.@var{<suffix>}=? -@code{Mkid} has several built in suffixes it already recognizes. Passing -a @samp{?} will cause it to print the language it will use to scan files -with that suffix. -@item -S?=@var{<language>} -This form will print which suffixes are scanned with the given language. -@item -S?=? -This prints all the suffix@expansion{}language bindings recognized by -@code{mkid}. -@item -S@var{<language>}-@var{<arg>} -Each language scanner accepts scanner dependent arguments. This form of the -@code{-S} option is used to pass arbitrary arguments to the language scanners. -@item -S@var{<language>}? -Passing a @samp{?} instead of a language option will print a brief summary -of the options recognized by the specified language scanner. -@item -S@var{<new language>}/@var{<builtin language>}/@var{<filter command>} -This form specifies a new language defined in terms of a builtin language -and a shell command that will be used to filter the file prior to passing -on to the builtin language scanner. -@end table - -@node Builtin Scanners, Adding Your Own Scanner, Mkid Command Line Options, Mkid -@section Builtin Scanners -If you run @code{mkid -S?=?} you will find bindings for a number of -languages; unfortunately pascal, though mentioned in the list, is not -actually supported. The supported languages are documented below -@footnote{This is not strictly true --- vhil is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored}. - -@menu -* C:: C -* Plain Text:: Plain Text -* Assembler:: Assembler -@end menu - -@node C, Plain Text, Builtin Scanners, Builtin Scanners -@subsection C - -The C scanner is probably the most popular. It scans identifiers out of -C programs, skipping over comments and strings in the process. The -normal @file{.c} and @file{.h} suffixes are automatically recognized as -C language, as well as the more obscure @file{.y} (yacc) and @file{.l} -(lex) suffixes. - -The @code{-S} options recognized by the C scanner are: - -@table @code -@item -Sc-s@var{<character>} -Allow the specified @var{<character>} in identifiers (some dialects of -C allow @code{$} in identifiers, so you could say @code{-Sc-s$} to -accept that dialect). -@item -Sc-u -Don't strip leading underscores from identifier names (this is the default -mode of operation). -@item -Sc+u -Do strip leading underscores from identifier names (I don't know why you -would want to do this in C programs, but the option is available). -@end table - -@node Plain Text, Assembler, C, Builtin Scanners -@subsection Plain Text -The plain text scanner is designed for scanning documents. This is -typically the scanner used when adding custom scanners, and several -custom scanners are built in to @code{mkid} and defined in terms of filters -and the text scanner. A troff scanner runs @code{deroff} over the file -then feeds the result to the text scanner. A compressed man page scanner -runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner runs -@code{detex}. - -Options: - -@table @code -@item -Stext+a@var{<character>} -Include the specified character in identifiers. By default, standard -C identifiers are recognized. -@item -Stext-a@var{<character>} -Exclude the specified character from identifiers. -@item -Stext+s@var{<character>} -Squeeze the specified character out of identifiers. By default, the -characters @samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers. -This generates transformations like @var{fred's}@expansion{}@var{freds} or -@var{a.s.p.c.a.}@expansion{}@var{aspca}. -@item -Stext-s@var{<character>} -Do not squeeze out the specified character. -@end table - -@node Assembler, , Plain Text, Builtin Scanners -@subsection Assembler -Assemblers come in several flavors, so there are several options to -control scanning of assembly code: - -@table @code -@item -Sasm-c@var{<character>} -The specified character starts a comment that extends to end of line -(in many assemblers this is a semicolon or number sign --- there is -no default value for this). -@item -Sasm+u -Strip the leading underscores off identifiers (the default behavior). -@item -Sasm-u -Do not strip the leading underscores. -@item -Sasm+a@var{<character>} -The specified character is allowed in identifiers. -@item -Sasm-a@var{<character>} -The specified character is allowed in identifiers, but any identifier -containing that character is ignored (often a @samp{.} or @samp{@@} -will be used to indicate an internal temp label, you may want to -ignore these). -@item -Sasm+p -Recognize C preprocessor directives in assembler source (default). -@item -Sasm-p -Do not recognize C preprocessor directives in assembler source. -@item -Sasm+C -Skip over C style comments in assembler source (default). -@item -Sasm-C -Do not skip over C style comments in assembler source. -@end table - -@node Adding Your Own Scanner, Mkid Examples, Builtin Scanners, Mkid -@section Adding Your Own Scanner - -There are two ways to add new scanners to @code{mkid}. The first is to -modify the code in @file{getscan.c} and add a new @file{scan-*.c} file -with the code for your scanner. This is not too hard, but it requires -relinking and installing a new version of @code{mkid}, which might be -inconvenient, and would lead to the proliferation of @code{mkid} -versions. - -The second technique uses the @code{-S<lang>/<lang>/<filter>} form -of the @code{-S} option to specify a new language scanner. In this form -the first language is the name of the new language to be defined, -the second language is the name of an existing language scanner to -be invoked on the output of the filter command specified as the -third component of the @code{-S} option. - -The filter is an arbitrary shell command. Somewhere in the filter string, -a @code{%s} should occur. This @code{%s} is replaced by the name of the -source file being scanned, the shell command is invoked, and whatever -comes out on @var{stdout} is scanned using the builtin scanner. - -For example, no scanner is provided for texinfo files (like this one). -If I wished to index the contents of this file, but avoid indexing the -texinfo directives, I would need a filter that stripped out the texinfo -directives, but left the remainder of the file intact. I could then use -the plain text scanner on the remainder. A quick way to specify this -might be: - -@example -'-S/texinfo/text/sed s,@@[a-z]*,,g < %s' -@end example - -This defines a new language scanner (@var{texinfo}) defined in terms of -a @code{sed} command to strip out texinfo directives (at signs followed -by letters). Once the directives are stripped, the remaining text is run -through the plain text scanner. - -This is just an example, to do a better job I would actually need to -delete some lines (such as those beginning with @code{@@end}) as well -as deleting the @code{@@} directives embedded in the text. - -@node Mkid Examples, , Adding Your Own Scanner, Mkid -@section Mkid Examples - -The simplest example of @code{mkid} is something like: - -@example -mkid *.[chy] -@end example - -This will build an ID database indexing all the -identifiers and numbers in the @file{.c}, @file{.h}, and @file{.y} files -in the current directory. Because those suffixes are already known to -@code{mkid} as C language files, no other special arguments are required. - -From a simple example, lets go to a more complex one. Suppose you want -to build a database indexing the contents of all the @var{man} pages. -Since @code{mkid} already knows how to deal with @file{.z} files, let's -assume your system is using the @code{compress} program to store -compressed cattable versions of the @var{man} pages. The -@code{compress} program creates files with a @code{.Z} suffix, so -@code{mkid} will have to be told how to scan @file{.Z} files. The -following code shows how to combine the @code{find} command with the -special scanner arguments to @code{mkid} to generate the required ID -database: - -@example -cd /usr/catman -find . -name '*.Z' -print | mkid '-Sman/text/uncompress -c < %s' -S.Z=man - -@end example - -This example first switches to the @file{/usr/catman} directory where -the compressed @var{man} pages are stored. The @code{find} command then -finds all the @file{.Z} files under that directory and prints their -names. This list is piped into the @code{mkid} program. The @code{-} -argument by itself (at the end of the line) tells @code{mkid} to read -arguments (in this case the list of file names) from @var{stdin}. The -first @code{-S} argument defines a new language (@var{man}) in terms of -the @code{uncompress} utility and the existing text scanner. The second -@code{-S} argument tells @code{mkid} to treat all @file{.Z} files as -language @var{man}. In practice, you might find the @code{mkid} -arguments need to be even more complex, something like: - -@example -mkid '-Sman/text/uncompress -c < %s | col -b' -S.Z=man - -@end example - -This will take the additional step of getting rid of any underlining and -backspacing which might be present in the compressed @var{man} pages. - -@node Database Query Tools, Iid, Mkid, Top -@chapter Database Query Tools - -The ID database is useless without database query tools. The remainder -of this document describes those tools. - -The @code{lid}, @code{gid}, -@code{aid}, @code{eid}, and @code{pid} programs are all the same program -installed with links to different names. The name used to invoke the -program determines how it will act. - -The @code{iid} program is an interactive query shell that sits on top -of the other query tools. - -@menu -* Common Options:: Common command line options -* Patterns:: Identifier pattern matching -* Lid:: Look up identifiers -* Aid:: Case insensitive lid -* Gid:: Grep for identifiers -* Eid:: Edit files with matching identifiers -* Pid:: Look up path names in database -@end menu - -@node Common Options, Patterns, Database Query Tools, Database Query Tools -@section Common Options - -Since many of the programs are really links to one common program, it -is only reasonable to expect that most of the query tools would share -common command line options. Not all options make sense for all programs, -but they are all described here. The description of each program -gives the options that program uses. - -@table @code -@item -f@var{<file>} -Read the database specified by @var{<file>}. Normally the tools look -for a file named @file{ID} in either the current directory or in any -of the directories above the current directory. This means you can keep -a global @file{ID} database in the root of a large source tree and use -the query tools from anywhere within that tree. -@item -r@var{<directory>} -The query tools usually assume the file names in the database are relative -to the directory holding the database. The @code{-r} option tells the -tools to look for the files relative to @var{<directory>} regardless -of the location of the database. -@item -c -This is shorthand for @code{-r`pwd`}. It tells the query tools to assume -the file names are stored relative to the current working directory. -@item -e -Force the pattern arguments to be treated as regular expressions. -Normally the query tools attempt to guess if the patterns are regular -expressions or simple identifiers by looking for special characters -in the pattern. -@item -w -Force the pattern arguments to be treated as simple words even if -they contain special regular expression characters. -@item -k -Normally the query tools that generate lists of file names attempt to -compress the lists using the @code{csh} brace notation. This option -suppresses the file name compression and outputs each name in full. -(This is particularly useful if you are a @code{ksh} user and want to -feed the list of names to another command --- the @code{-k} option -comes from the @code{k} in @code{ksh}). -@item -g -It is possible to build the query tools so the @code{-k} option is the -default behavior. If this is the case for your system, the @code{-g} -option turns on the globbing of file names using the @code{csh} brace -notation. -@item -n -Normally the query tools that generate lists of file names also list -the matching identifier at the head of the list of names. This is -irritating if you want just a list of names to feed to another command, -so the @code{-n} option suppresses the identifier and lists only -file names. -@item -b -This option is only used by the @code{pid} tool. It restricts @code{pid} -to pattern match only the basename part of a file name. Normally the -absolute file name is matched against the pattern. -@item -d -o -x -a -These options may be used in any combination to limit the radix of -numeric matches. The @code{-d} option will allow matches on decimal -numbers, @code{-o} on octal, and @code{-x} on hexadecimal numbers. -The @code{-a} option is shorthand for specifying all three. Any -combination of these options may be used. -@item -m -Merge multiple lines of output into a single line. (If your query -matches more than one identifier the default action is to generate -a separate line of output for each matching identifier). -@item -s -Search for identifiers that appear only once in the database. This -helps to locate identifiers that are defined but never used. -@item -u@var{<number>} -List identifiers that conflict in the first @var{<number>} characters. -This could be useful porting programs to brain-dead computers that -refuse to support long identifiers, but your best long term option -is to set such computers on fire. -@end table - -@node Patterns, Lid, Common Options, Database Query Tools -@section Patterns - -You can attempt to match either simple identifiers or numbers in a -query, or you can specify a regular expression pattern which may -match many different identifiers in the database. The query -programs use either @var{regex} and @var{regcmp} or @var{re_comp} -and @var{re_exec}, depending on which one is available in the library -on your system. These might not always support the exact same -regular expression syntax, so consult your local @var{man} pages -to find out. Any regular expression routines should support the following -syntax: - -@table @code -@item . -A dot matches any character. -@item [ ] -Brackets match any of the characters specified within the brackets. You -can match any characters @emph{except} the ones in brackets by typing -@code{^} as the first character. A range of characters can be specified -using @code{-}. -@item * -An asterisk means repeat the previous pattern zero or more times. -@item ^ -An @code{^} at the beginning of a pattern means the pattern must match -starting at the first character of the identifier. -@item $ -A @code{$} at the end of the pattern means the pattern must match ending -at the last character in the identifier. -@end table - -@node Lid, Aid, Patterns, Database Query Tools -@section Lid - -@deffn Command lid [@code{-f@var{<file>}}] [@code{-u@var{<n>}}] [@code{-r@var{<dir>}}] [@code{-ewdoxamskgnc}] patterns@dots{} -@end deffn - -The @code{lid} program stands for @var{lookup identifier}. -It searches the database for any identifiers matching the patterns -and prints the names of the files that match each pattern. The exact -format of the output depends on the options. - -@node Aid, Gid, Lid, Database Query Tools -@section Aid - -@deffn Command aid [@code{-f@var{<file>}}] [@code{-u@var{<n>}}] [@code{-r@var{<dir>}}] [@code{-doxamskgnc}] patterns@dots{} -@end deffn - -The @code{aid} command is an abbreviation for @var{apropos identifier}. -The patterns cannot be regular expressions, but it looks for them using -a case insensitive match, and any pattern that is a substring of an -identifier in the database will match that identifier. - -For example @samp{aid get} might match the identifiers @code{fgets}, -@code{GETLINE}, and @code{getchar}. - -@node Gid, Eid, Aid, Database Query Tools -@section Gid - -@deffn Command gid [@code{-f@var{<file>}}] [@code{-u@var{<n>}}] [@code{-r@var{<dir>}}] [@code{-doxasc}] patterns@dots{} -@end deffn - -The @code{gid} command stands for @var{grep for identifiers}. It finds -identifiers in the database that match the specified patterns, then -@code{greps} for those identifiers in just the set of files containing -matches. In a large source tree, this saves a fantastic amount of time. - -There is an @var{emacs} interface to this program (@pxref{GNU Emacs Interface}). -If you are an @var{emacs} user, you will probably prefer the @var{emacs} -interface over the @code{eid} tool. - -@node Eid, Pid, Gid, Database Query Tools -@section Eid - -@deffn Command eid [@code{-f@var{<file>}}] [@code{-u@var{<n>}}] [@code{-r@var{<dir>}}] [@code{-doxasc}] patterns@dots{} -@end deffn - -The @code{eid} command allows you to invoke an editor on each file containing -a matching pattern. The @code{EDITOR} environment variable is the name of the -program to be invoked. If the specified editor can accept an initial search -argument on the command line, you can use the @code{EIDARG}, @code{EIDLDEL}, -and @code{EIDRDEL} environment variables to specify the form of that argument. - -@table @code -@item EDITOR -The name of the editor program to invoke. -@item EIDARG -A printf string giving the form of the argument to pass containing the -initial search string (the matching identifier). For @code{vi} -it should be set to @samp{+/%s/'}. -@item EIDLDEL -A string giving the regular expression pattern that forces a match at -the beginning (left end) of a word. This string is inserted in front -of the matching identifier when composing the search argument. For @code{vi}, -this should be @samp{\<}. -@item EIDRDEL -The matching right end word delimiter. For @code{vi}, use @samp{\>}. -@end table - -@node Pid, , Eid, Database Query Tools -@section Pid - -@deffn Command pid [@code{-f@var{<file>}}] [@code{-u@var{<n>}}] [@code{-r@var{<dir>}}] [@code{-ebkgnc}] patterns@dots{} -@end deffn - -The @code{pid} tool is unlike all the other tools. It matches the -patterns against the file names in the database rather than the -identifiers in the database. Patterns are treated as shell wild card -patterns unless the @code{-e} option is given, in which case full -regular expression matching is done. - -The wild card pattern is matched against the absolute path name of the -file. Most shells treat slashes @samp{/} and file names that start with -dot @samp{.} specially, @code{pid} does not do this. It simply attempts -to match the absolute path name string against the wild card pattern. - -The @code{-b} option restricts the pattern matching to the base name of -the file (all the leading directory names are stripped prior to pattern -matching). - -@node Iid, Other Tools, Database Query Tools, Top -@chapter Iid - -@deffn Command iid [@code{-a}] [@code{-c@var{<command>}}] [@code{-H}] -@table @code -@item -a -Normally @code{iid} uses the @code{lid} command to search for names. -If you give the @code{-a} option on the command line, then it will -use @code{aid} as the default search engine. -@item -c@var{<command>} -In normal operation, @code{iid} starts up and prompts you for commands -used to build sets of files. The @code{-c} option is used to pass a -single query command to @code{iid} which it then executes and exits. -@item -H -The @code{-H} option prints a short help message and exits. To get more -help use the @code{help} command from inside @code{iid}. -@end table -@end deffn - -The @code{iid} program is an interactive ID query tool. It operates by -running the other query programs (such as @code{lid} and @code{aid}) -and creating sets of file names returned by these queries. It also -provides operators for @code{anding} and @code{oring} these sets to -create new sets. - -The @code{PAGER} environment variable names the program @code{iid} uses -to display files. If you use @code{emacs}, you might want to set -@code{PAGER} so it invokes the @code{emacsclient} program. Check the -file @file{lisp/server.el} in the emacs source tree for documentation on -this. It is useful not only with X windows, but also when running -@code{iid} from an emacs shell buffer. There is also a somewhat spiffier -version called gnuserv by Andy Norman -(@code{ange%anorman@@hplabs.hp.com}) which appeared in @file{comp.emacs} -sometime in 1989. - -@menu -* Ss and Files commands:: Ss and Files commands -* Sets:: Sets -* Show:: Show -* Begin:: Begin -* Help:: Help -* Off:: Off -* Shell Commands as Queries:: Shell Commands as Queries -* Shell Escape:: Shell Escape -@end menu - -@node Ss and Files commands, Sets, Iid, Iid -@section Ss and Files commands - -The primary query commands are @code{ss} (for select sets) and @code{files} -(for show file names). These commands both take a query expression as an -argument. - -@deffn Subcommand ss query -The @code{ss} command runs a query and builds a set (or sets) of file names. The -result is printed as a summary of the sets constructed showing how many file -names are in each set. -@end deffn - -@deffn Subcommand files query -The @code{files} command is like the @code{ss} command, but rather than printing -a summary, it displays the full list of matching file names. -@end deffn - -@deffn Subcommand f query -The @code{f} command is merely a shorthand notation for @code{files}. -@end deffn - -Database queries are simple expressions with operators like @code{and} -and @code{or}. Parentheses can be used to group operations. The complete -set of operators is summarized below: - -@table @code -@item @var{pattern} -Any pattern not recognized as one of the keywords in this table is treated -as an identifier to be searched for in the database. It is passed as an -argument to the default search program (normally @code{lid}, but @code{aid} -is used if the @code{-a} option was given when @code{iid} was started). -The result of this operation is a set of file names, and it is assigned a -unique set number. -@item lid -@code{lid} is a keyword. It is used to invoke @code{lid} with the list of -identifiers following it as arguments. This forces the use of @code{lid} -regardless of the state of the @code{-a} option (@pxref{Lid}). -@item aid -The @code{aid} keyword is like the @code{lid} keyword, but it forces the -use of the @code{aid} program (@pxref{Aid}). -@item match -The @code{match} operator invokes the @code{pid} program to do pattern -matching on file names rather than identifiers. The set generated contains -the file names that match the specified patterns (@pxref{Pid}). -@item or -The @code{or} operator takes two sets of file names as arguments and generates -a new set containing all the files from both sets. -@item and -The @code{and} operator takes two sets of file names and generates a new -set containing only files from both sets. -@item not -The @code{not} operator inverts a set of file names, producing the set of -all files not in the input set. -@item set number -A set number consists of the letter @code{s} followed immediately by a number. -This refers to one of the sets created by a previous query operation. During -one @code{iid} session, each query generates a unique set number, so any -previously generated set may be used as part of any new query by referring -to the set number. -@end table - -The @code{not} operator has the highest precedence with @code{and} -coming in the middle and @code{or} having the lowest precedence. The -operator names are recognized using case insensitive matching, so -@code{AND}, @code{and}, and @code{aNd} are all the same as far as -@code{iid} is concerned. If you wish to use a keyword as an operand to -one of the query programs, you must enclose it in quotes. Any patterns -containing shell special characters must also be properly quoted or -escaped, since the query commands are run by invoking them with the -shell. - -Summary of query expression syntax: - -@example -A <query> is: - <set number> - <identifier> - lid <identifier list> - aid <identifier list> - match <wild card list> - <query> or <query> - <query> and <query> - not <query> - ( <query> ) -@end example - -@node Sets, Show, Ss and Files commands, Iid -@section Sets - -@deffn Subcommand sets -@end deffn - -The @code{sets} command displays all the sets created so far. Each one -is described by the query command that generated it. - -@node Show, Begin, Sets, Iid -@section Show - -@deffn Subcommand show set -@end deffn - -@deffn Subcommand p set -@end deffn - -The @code{show} and @code{p} commands are equivalent. They both accept -a set number as an argument and run the program given in the @code{PAGER} -environment variable with the file names in that set as arguments. - -@node Begin, Help, Show, Iid -@section Begin - -@deffn Subcommand begin directory -@end deffn - -@deffn Subcommand b directory -@end deffn - -The @code{begin} command (and its abbreviated version @code{b}) is used -to begin a new @code{iid} session in a different directory (which presumably -contains a different database). It flushes all the sets created so far -and switches to the specified directory. It is equivalent to exiting @code{iid}, -changing directories in the shell, and running @code{iid} again. - -@node Help, Off, Begin, Iid -@section Help - -@deffn Subcommand help -@end deffn - -@deffn Subcommand h -@end deffn - -@deffn Subcommand ? -@end deffn - -The @code{help}, @code{h}, and @code{?} command are three different ways to -ask for help. They all invoke the @code{PAGER} program to display a short -help file. - -@node Off, Shell Commands as Queries, Help, Iid -@section Off - -@deffn Subcommand off -@end deffn - -@deffn Subcommand quit -@end deffn - -@deffn Subcommand q -@end deffn - -These three command (or just an end of file) all cause @code{iid} to exit. - -@node Shell Commands as Queries, Shell Escape, Off, Iid -@section Shell Commands as Queries - -When the first word on an @code{iid} command is not recognized as a -builtin @code{iid} command, @code{iid} assumes the command is a shell -command which will write a list of file names to @var{stdout}. This list -of file names is used to generate a new set of files. - -Any set numbers that appear as arguments to this command are expanded -into lists of file names prior to running the command. - -@node Shell Escape, , Shell Commands as Queries, Iid -@section Shell Escape - -If a command starts with a bang (@code{!}) character, the remainder of -the line is run as a shell command. Any set numbers that appear as -arguments to this command are expanded into lists of file names prior to -running the command. - -@node Other Tools, Command Index, Iid, Top -@chapter Other Tools - -This chapter describes some support tools that work with the other ID -programs. - -@menu -* GNU Emacs Interface:: Using gid.el -* Fid:: List identifiers in a file. -* Idx:: Extract identifiers from source file. -@end menu - -@node GNU Emacs Interface, Fid, Other Tools, Other Tools -@section GNU Emacs Interface - -The source distribution comes with a file named @file{gid.el}. This is -a GNU emacs interface to the @code{gid} tool. If you put the file where -emacs can find it (somewhere in your @code{EMACSLOADPATH}) and put -@code{(autoload 'gid "gid" nil t)} in your @file{.emacs} file, you will -be able to invoke the @code{gid} function using @kbd{M-x gid}. - -This function prompts you with the word the cursor is on. If you want -to search for a different pattern, simply delete the line and type the -pattern of interest. - -It runs @code{gid} in a @code{*compilation*} buffer, so the normal -@code{next-error} function can be used to visit all the places the -identifier is found (@pxref{Compilation,,,emacs,The GNU Emacs Manual}). - -@node Fid, Idx, GNU Emacs Interface, Other Tools -@section Fid - -@deffn Command fid [@code{-f@var{<file>}}] file1 [file2] -@table @code -@item -f@var{<file>} -Look in the named database. -@item @var{file1} -List the identifiers contained in file1 according to the database. -@item @var{file2} -If a second file is given, list only the identifiers both files have -in common. -@end table -@end deffn - -The @code{fid} program provides an inverse query. Instead of listing -files containing some identifier, it lists the identifiers found in -a file. - -@node Idx, , Fid, Other Tools -@section Idx - -@deffn Command idx [@code{-s@var{<directory>}}] [@code{-r@var{<directory>}}] [@code{-S@var{<scanarg>}}] files@dots{} -The @code{-s}, @code{-r}, and @code{-S} arguments to @code{idx} -are identical to the same arguments on @code{mkid} -(@pxref{Mkid Command Line Options}). -@end deffn - -The @code{idx} command is more of a test frame for scanners than a tool -designed to be independently useful. It takes the same scanner arguments -as @code{mkid}, but rather than building a database, it prints the -identifiers found to @var{stdout}, one per line. You can use it to try -out a scanner on a sample file to make sure it is extracting the -identifiers you believe it should extract. - -@node Command Index, , Other Tools, Top -@unnumbered Command Index - -@printindex fn - -@contents -@bye diff --git a/regex.c b/regex.c deleted file mode 100644 index 3900958..0000000 --- a/regex.c +++ /dev/null @@ -1,5244 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for - internationalization features.) - - Copyright (C) 1993, 1994 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) - #pragma alloca -#endif - -#define _GNU_SOURCE - -#if HAVE_CONFIG_H -#include <config.h> -#endif - -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include <sys/types.h> - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -/* Emacs uses `NULL' as a predicate. */ -#undef NULL - -#else /* not emacs */ - -#ifdef STDC_HEADERS -#include <stdlib.h> -#else -char *malloc (); -char *realloc (); -#endif - - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#ifndef INHIBIT_STRING_HEADER -#if HAVE_STRING_H || STDC_HEADERS -#include <string.h> -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif -#else -#include <strings.h> -#endif -#endif - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[c] - -#endif /* not emacs */ - -/* Get the interface, including the syntax bits. */ -#include "regex.h" - -/* isalpha etc. are used for the character classes. */ -#include <ctype.h> - -/* Jim Meyering writes: - - "... Some ctype macros are valid only for character codes that - isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when - using /bin/cc or gcc but without giving an ansi option). So, all - ctype uses should be through macros like ISPRINT... If - STDC_HEADERS is defined, then autoconf has verified that the ctype - macros don't need to be guarded with references to isascii. ... - Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." */ - -#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) -#define ISASCII(c) 1 -#else -#define ISASCII(c) isascii(c) -#endif - -#ifdef isblank -#define ISBLANK(c) (ISASCII (c) && isblank (c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif - -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -#ifndef NULL -#define NULL 0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -#ifndef alloca - -/* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include <alloca.h> -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ - -#endif /* not alloca */ - -#define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) - -#endif /* not REGEX_MALLOC */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -static int re_match_2_internal (); - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. */ - -typedef enum -{ - no_op = 0, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -#include <stdio.h> - -/* It is useful to test things that ``must'' be true when debugging. */ -#include <assert.h> - -static int debug = 0; - -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -extern void printchar (); - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - printchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - printchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - printf ("%d:\t", p - start); - - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - printchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c, last = -100; - register int in_range = 0; - - printf ("/charset [%s", - (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); - - assert (p + *p < pend); - - for (c = 0; c < 256; c++) - if (c / 8 < *p - && (p[1 + (c/8)] & (1 << (c % 8)))) - { - /* Are we starting a range? */ - if (last + 1 == c && ! in_range) - { - putchar ('-'); - in_range = 1; - } - /* Have we broken a range? */ - else if (last + 1 != c && in_range) - { - printchar (last); - in_range = 0; - } - - if (! in_range) - printchar (c); - - last = c; - } - - if (in_range) - printchar (last); - - putchar (']'); - - p += 1 + *p; - } - break; - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump to %d", p + mcnt - start); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump to %d", p + mcnt - start); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump to %d", p + mcnt - start); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump to %d", p + mcnt - start); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt to %d", p + mcnt - start); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump to %d", p + mcnt - start); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -#ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -#endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - - putchar ('\n'); - } - - printf ("%d:\tend of pattern.\n", p - start); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - unsigned this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); - } -} - -#else /* not DEBUG */ - -#undef assert -#define assert(e) - -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ - }; - -/* Avoiding alloca during matching, to placate r_alloc. */ - -/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the - searching and matching functions should not call alloca. On some - systems, alloca is implemented in terms of malloc, and if we're - using the relocating allocator routines, then malloc could cause a - relocation, which might (if the strings being searched are in the - ralloc heap) shift the data out from underneath the regexp - routines. - - Here's another reason to avoid allocation: Emacs - processes input from X in a signal handler; processing X input may - call malloc; if input arrives while a matching routine is calling - malloc, then we're scrod. But Emacs can't just block input while - calling matching routines; then we don't notice interrupts when - they come in. So, Emacs blocks input around all regexp calls - except the matching calls, which it leaves unprotected, in the - faith that they will not malloc. */ - -/* Normally, this is fine. */ -#define MATCH_MAY_ALLOCATE - -/* The match routines may not allocate if (1) they would do it with malloc - and (2) it's not safe for them to use malloc. */ -#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC)) -#undef MATCH_MAY_ALLOCATE -#endif - - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ -int re_max_failures = 2000; - -typedef unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#ifdef MATCH_MAY_ALLOCATE -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) -#else -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.avail = 0; \ - } while (0) -#endif - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item - -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - - - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) - - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - - - -/* How do we implement a missing MATCH_MAY_ALLOCATE? - We make the fail stack a global thing, and then grow it to - re_max_failures when we compile. */ -#ifndef MATCH_MAY_ALLOCATE -static fail_stack_type fail_stack; - -static const char ** regstart, ** regend; -static const char ** old_regstart, ** old_regend; -static const char **best_regstart, **best_regend; -static register_info_type *reg_info; -static const char **reg_dummy; -static register_info_type *reg_info_dummy; -#endif - - -/* Subroutine declarations and macros for regex_compile. */ - -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) - - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -#define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -/* Return, freeing storage we allocated. */ -#define FREE_STACK_RETURN(value) \ - return (free (compile_stack.stack), value) - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - int size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random temporary spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - FREE_STACK_RETURN (REG_ERANGE); - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (;;) - { - PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) - FREE_STACK_RETURN (REG_ECTYPE); - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - /* This was split into 3 if's to - avoid an arbitrary limit in some compiler. */ - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch))) - SET_LIST_BIT (ch); - if ( (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch))) - SET_LIST_BIT (ch); - if ( (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - FREE_STACK_RETURN (REG_ERPAREN); - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - FREE_STACK_RETURN (REG_ERPAREN); - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_EBRACE); - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at <jump count> <upper bound> - set_number_at <succeed_n count> <lower bound> - succeed_n <after jump addr> <succeed_n count> - <body of loop> - jump_n <succeed_n addr> <jump count> - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - BUF_PUSH (wordbeg); - break; - - case '>': - BUF_PUSH (wordend); - break; - - case 'b': - BUF_PUSH (wordbound); - break; - - case 'B': - BUF_PUSH (notwordbound); - break; - - case '`': - BUF_PUSH (begbuf); - break; - - case '\'': - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - FREE_STACK_RETURN (REG_ESUBREG); - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - FREE_STACK_RETURN (REG_EPAREN); - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: \n"); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - -#ifndef MATCH_MAY_ALLOCATE - /* Initialize the failure stack to the largest possible stack. This - isn't necessary unless we're trying to avoid calling alloca in - the search and match routines. */ - { - int num_regs = bufp->re_nsub + 1; - - /* Since DOUBLE_FAIL_STACK refuses to double only if the current size - is strictly greater than re_max_failures, the largest possible stack - is 2 * re_max_failures failure points. */ - if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) - { - fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); - -#ifdef emacs - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) xmalloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) xrealloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -#else /* not emacs */ - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) malloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) realloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -#endif /* not emacs */ - } - - /* Initialize some other variables the matcher uses. */ - RETALLOC_IF (regstart, num_regs, const char *); - RETALLOC_IF (regend, num_regs, const char *); - RETALLOC_IF (old_regstart, num_regs, const char *); - RETALLOC_IF (old_regend, num_regs, const char *); - RETALLOC_IF (best_regstart, num_regs, const char *); - RETALLOC_IF (best_regend, num_regs, const char *); - RETALLOC_IF (reg_info, num_regs, register_info_type); - RETALLOC_IF (reg_dummy, num_regs, const char *); - RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); - } -#endif - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - int syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ - range_start = ((const unsigned char *) p)[-2]; - range_end = ((const unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; -#ifdef MATCH_MAY_ALLOCATE - fail_stack_type fail_stack; -#endif -#ifndef REGEX_MALLOC - char *destination; -#endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) - { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - return 0; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - { - int fastmap_newline = fastmap['\n']; - - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = fastmap_newline; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; - - /* Otherwise, have to check alternative paths. */ - break; - } - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* not emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - return 0; -} /* re_compile_fastmap */ - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2_internal (bufp, string1, size1, string2, size2, - startpos, regs, stop); -#ifndef REGEX_MALLOC -#ifdef C_ALLOCA - alloca (0); -#endif -#endif - - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) \ - ? ((regoff_t) ((ptr) - string1)) \ - : ((regoff_t) ((ptr) - string2 + size1))) - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - - -/* Free everything we malloc. */ -#ifdef MATCH_MAY_ALLOCATE -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ - do { \ - FREE_VAR (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else /* not REGEX_MALLOC */ -/* This used to do alloca (0), but now we do that in the caller. */ -#define FREE_VARIABLES() /* Nothing */ -#endif /* not REGEX_MALLOC */ -#else -#define FREE_VARIABLES() /* Do nothing! */ -#endif /* not MATCH_MAY_ALLOCATE */ - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; -{ - int result = re_match_2_internal (bufp, NULL, 0, string, size, - pos, regs, size); - alloca (0); - return result; -} -#endif /* not emacs */ - - -/* re_match_2 matches the compiled pattern in BUFP against the - the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - int result = re_match_2_internal (bufp, string1, size1, string2, size2, - pos, regs, stop); - alloca (0); - return result; -} - -/* This is a separate function so that we can force an alloca cleanup - afterwards. */ -static int -re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* Mark the opcode just after a start_memory, so we can test for an - empty subpattern when we get to the stop_memory. */ - unsigned char *just_past_start_mem = 0; - - /* We use this to map every character in the string. */ - char *translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - fail_stack_type fail_stack; -#endif -#ifdef DEBUG - static unsigned failure_id = 0; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **regstart, **regend; -#endif - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **old_regstart, **old_regend; -#endif - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - register_info_type *reg_info; -#endif - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **best_regstart, **best_regend; -#endif - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* Used when we pop values we don't care about. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **reg_dummy; - register_info_type *reg_info_dummy; -#endif - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - -#ifdef MATCH_MAY_ALLOCATE - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } -#if defined (REGEX_MALLOC) - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* REGEX_MALLOC */ -#endif /* MATCH_MAY_ALLOCATE */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is: "); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { - DEBUG_PRINT2 ("\n0x%x: ", p); - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - /* 1 if this match ends in the same string (string1 or string2) - as the best previous match. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - /* 1 if this match is the best seen so far. */ - boolean best_match_p; - - /* AIX compiler got confused when this was combined - with the previous declaration. */ - if (same_str_p) - best_match_p = d > match_end; - else - best_match_p = !MATCHING_IN_FIRST_STRING; - - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - - /* If exceeds best match so far, save it. */ - if (!best_regs_set || best_match_p) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. And if - last match is real best match, don't restore second - best one. */ - else if (best_regs_set && !best_match_p) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - } - } - else - { - /* These braces fend off a "empty body in an else-statement" - warning under GCC when assert expands to nothing. */ - assert (bufp->regs_allocated == REGS_FIXED); - } - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING - ? ((regoff_t) (d - string1)) - : ((regoff_t) (d - string2 + size1))); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - FREE_VARIABLES (); - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - return mcnt; - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - just_past_start_mem = p; - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || just_past_start_mem == p - 1) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \<digit> has been turned into a `duplicate' command which is - followed by the numeric value of <digit> as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. - If what follows this loop is a ...+ construct, - look at what begins its body, since we will have to - match at least one of that. */ - while (1) - { - if (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; - else if (p2 + 6 < pend - && (re_opcode_t) *p2 == dummy_failure_jump) - p2 += 6; - else - break; - } - - p1 = p + mcnt; - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - else if ((re_opcode_t) *p2 == charset) - { -#ifdef DEBUG - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; -#endif - - if ((re_opcode_t) p1[3] == exactn - && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] - && (p2[1 + p1[4] / BYTEWIDTH] - & (1 << (p1[4] % BYTEWIDTH))))) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset_not) - { - int idx; - /* We win if the charset_not inside the loop - lists every character listed in the charset after. */ - for (idx = 0; idx < (int) p2[1]; idx++) - if (! (p2[2 + idx] == 0 - || (idx < (int) p1[4] - && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) - break; - - if (idx == p2[1]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - else if ((re_opcode_t) p1[3] == charset) - { - int idx; - /* We win if the charset inside the loop - has no overlap with the one after the loop. */ - for (idx = 0; - idx < (int) p2[1] && idx < (int) p1[4]; - idx++) - if ((p2[2 + idx] & p1[5 + idx]) != 0) - break; - - if (idx == p2[1] || idx == p1[4]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - - /* Unconditionally jump (without popping any failure points). */ - case jump: - unconditional_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%x).\n", p); - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); - } - else if (mcnt == 0) - { - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); - STORE_NUMBER (p1, mcnt); - break; - } - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; -#if 0 /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: -#endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate (s1, s2, len, translate) - unsigned char *s1, *s2; - register int len; - char *translate; -{ - register unsigned char *p1 = s1, *p2 = s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - int length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - return re_error_msg[(int) ret]; -} - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#ifdef _REGEX_RE_COMP - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; -} - - -int -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} -#endif /* _REGEX_RE_COMP */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - unsigned syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - preg->used = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) - return (int) REG_NOMATCH; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - free (regs.end); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - strncpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} - -#endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex.h b/regex.h deleted file mode 100644 index c6076c9..0000000 --- a/regex.h +++ /dev/null @@ -1,489 +0,0 @@ -#define _REGEX_RE_COMP - -/* Definitions for data structures and routines for the regular - expression library, version 0.12. - - Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ - -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ - -#ifdef VMS -/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it - should be there. */ -#include <stddef.h> -#endif - - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \<digit> matches <digit>. - If not set, then \<digit> is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -#undef RE_DUP_MAX -#endif -#define RE_DUP_MAX ((1 << 15) - 1) - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - - -/* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ -typedef enum -{ - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -struct re_pattern_buffer -{ -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -#define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - -#ifdef _REGEX_RE_COMP -/* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); -#endif - -/* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/scanners.c b/scanners.c deleted file mode 100644 index f2a5d44..0000000 --- a/scanners.c +++ /dev/null @@ -1,1216 +0,0 @@ -/* scanners.c -- file & directory name manipulations - Copyright (C) 1986, 1995 Greg McGary - VHIL portions Copyright (C) 1988 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> - -#include <config.h> -#include "strxtra.h" -#include "token.h" -#include "alloc.h" -#include "scanners.h" - -extern char const *program_name; - -static char const *get_token_VHIL __P((FILE *input_FILE, int *flags)); -static char const *get_token_c __P((FILE *input_FILE, int *flags)); -static void set_args_c __P((char const *lang_name, int op, char const *arg)); -static void set_ctype_c __P((char const *chars, int type)); -static void clear_ctype_c __P((char const *chars, int type)); -static void usage_c __P((char const *lang_name)); - -static char const *get_token_asm __P((FILE *input_FILE, int *flags)); -static void set_ctype_asm __P((char const *chars, int type)); -static void clear_ctype_asm __P((char const *chars, int type)); -static void usage_asm __P((char const *lang_name)); -static void set_args_asm __P((char const *lang_name, int op, char const *arg)); - -static char const *get_token_text __P((FILE *input_FILE, int *flags)); -static void set_ctype_text __P((char const *chars, int type)); -static void clear_ctype_text __P((char const *chars, int type)); -static void usage_text __P((char const *lang_name)); -static void set_args_text __P((char const *lang_name, int op, char const *arg)); - -/****************************************************************************/ - -typedef void (*set_args_t) __P((char const *lang_name, int op, char const *arg)); - -struct language -{ - char const *lang_name; - get_token_t lang_get_token; - set_args_t lang_set_args; - char const *lang_filter; - struct language *lang_next; -}; - -struct suffix -{ - char const *suff_suffix; - char const *suff_lang_name; - struct language *suff_language; - struct suffix *suff_next; -}; - -static struct suffix *get_suffix_entry (char const *suffix); -static struct language *get_lang_entry (char const *lang_name); -static void usage_scan (void); - -struct language languages_0[] = -{ - { "C", get_token_c, set_args_c, NULL }, - { "TeX", get_token_text, set_args_text, NULL }, - { "VHIL", get_token_VHIL, set_args_c, NULL }, - { "asm", get_token_asm, set_args_asm, NULL }, -/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/ - { "gzip", NULL, NULL, "zcat %s" }, - { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" }, - { "text", get_token_text, set_args_text, NULL }, -}; -struct language *languages = languages_0; - -/* - This is a rather incomplete list of default associations - between suffixes and languages. You may add more to the - default list, or you may define them dynamically with the - `-S<suff>=<lang>' argument to mkid(1) and idx(1). e.g. to - associate a `.ada' suffix with the Ada language, use - `-S.ada=ada' -*/ -struct suffix suffixes_0[] = -{ - { "", "text" }, - { ".1", "roff" }, - { ".2", "roff" }, - { ".3", "roff" }, - { ".4", "roff" }, - { ".5", "roff" }, - { ".6", "roff" }, - { ".7", "roff" }, - { ".8", "roff" }, - { ".C", "C" }, - { ".H", "C" }, - { ".Z", "gzip" }, - { ".c", "C" }, - { ".cc", "C" }, - { ".cpp", "C" }, - { ".cxx", "C" }, - { ".doc", "text" }, -/*{ ".el", "elisp" },*/ - { ".gz", "gzip" }, - { ".h", "C" }, - { ".hh", "C" }, - { ".hpp", "C" }, - { ".hxx", "C" }, - { ".l", "C" }, - { ".lex", "C" }, - { ".ltx", "TeX" }, - { ".p", "pas" }, - { ".pas", "pas" }, - { ".s", "asm" }, - { ".S", "asm" }, - { ".tex", "TeX" }, - { ".x", "VHIL" }, - { ".y", "C" }, - { ".yacc", "C" }, - { ".z", "gzip" }, -}; -struct suffix *suffixes = suffixes_0; - -void -init_scanners (void) -{ - struct language *lang; - struct language *lang_N = &languages_0[(sizeof (languages_0) / sizeof (languages_0[0])) - 1]; - struct suffix *suff; - struct suffix *suff_N = &suffixes_0[(sizeof (suffixes_0) / sizeof (suffixes_0[0])) - 1]; - - for (lang = languages; lang <= lang_N; ++lang) - lang->lang_next = lang + 1; - lang_N->lang_next = NULL; - - for (suff = suffixes; suff <= suff_N; ++suff) { - lang = get_lang_entry (suff->suff_lang_name); - if (lang) - suff->suff_language = lang; - suff->suff_next = suff + 1; - } - suff_N->suff_next = NULL; -} - -/* Return a suffix table entry for the given suffix. */ -static struct suffix * -get_suffix_entry (char const *suffix) -{ - struct suffix *stp; - - if (suffix == NULL) - suffix = ""; - - for (stp = suffixes; stp; stp = stp->suff_next) - if (strequ (stp->suff_suffix, suffix)) - return stp; - return NULL; -} - -static struct language * -get_lang_entry (char const *lang_name) -{ - struct language *ltp; - - if (lang_name == NULL) - lang_name = ""; - - for (ltp = languages; ltp; ltp = ltp->lang_next) - if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name)) - return ltp; - return ltp; -} - -char const * -get_lang_name (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_name; -} - -char const * -get_filter (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_filter; -} - -get_token_t -get_scanner (char const *lang) -{ - struct language *ltp; - - ltp = get_lang_entry (lang); - if (ltp == NULL) - return NULL; - return ltp->lang_get_token; -} - -void -set_scan_args (int op, char *arg) -{ - struct language *ltp, *ltp2; - struct suffix *stp; - char *lhs; - char *lhs2; - int count = 0; - - lhs = arg; - while (isalnum (*arg) || *arg == '.') - arg++; - - if (strequ (lhs, "?=?")) - { - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strnequ (lhs, "?=", 2)) - { - lhs += 2; - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - printf ("No scanner for language `%s'\n", lhs); - return; - } - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - if (stp->suff_language == ltp) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strequ (arg, "=?")) - { - lhs[strlen (lhs) - 2] = '\0'; - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - printf ("No scanner assigned to suffix `%s'\n", lhs); - return; - } - printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - printf ("\n"); - return; - } - - if (*arg == '=') - { - *arg++ = '\0'; - - ltp = get_lang_entry (arg); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg); - return; - } - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - stp = CALLOC (struct suffix, 1); - stp->suff_suffix = lhs; - stp->suff_language = ltp; - stp->suff_next = suffixes; - suffixes = stp; - } - else if (!strequ (arg, stp->suff_language->lang_name)) - { - fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name); - stp->suff_language = ltp; - } - return; - } - else if (*arg == '/') - { - *arg++ = '\0'; - ltp = get_lang_entry (lhs); - if (ltp->lang_next == NULL) - { - ltp = CALLOC (struct language, 1); - ltp->lang_name = lhs; - ltp->lang_get_token = get_token_text; - ltp->lang_set_args = set_args_text; - ltp->lang_filter = NULL; - ltp->lang_next = languages; - languages = ltp; - } - lhs2 = arg; - arg = strchr (arg, '/'); - if (arg == NULL) - ltp2 = ltp; - else - { - *arg++ = '\0'; - ltp2 = get_lang_entry (lhs2); - if (ltp2 == NULL) - { - fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2); - ltp2 = ltp; - } - } - ltp->lang_get_token = ltp2->lang_get_token; - ltp->lang_set_args = ltp2->lang_set_args; - if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter))) - fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter); - ltp->lang_filter = arg; - return; - } - - if (op == '+') - { - switch (op = *arg++) - { - case '+': - case '-': - case '?': - break; - default: - usage_scan (); - } - for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next) - (*ltp->lang_set_args) (NULL, op, arg); - return; - } - - if (*arg == '-' || *arg == '+' || *arg == '?') - { - op = *arg; - *arg++ = '\0'; - - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs); - return; - } - (*ltp->lang_set_args) (lhs, op, arg); - return; - } - - usage_scan (); -} - -static void -usage_scan (void) -{ - fprintf (stderr, "Usage: %s [-S<suffix>=<lang>] [+S(+|-)<arg>] [-S<lang>(+|-)<arg>] [-S<lang>/<lang>/<filter>]\n", program_name); - exit (1); -} - -/*************** C & C++ ****************************************************/ - -#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ -#define DG 0x0002 /* decimal digit [0-9] */ -#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ -#define C1 0x0008 /* C comment introduction char: / */ -#define C2 0x0010 /* C comment termination char: * */ -#define Q1 0x0020 /* single quote: ' */ -#define Q2 0x0040 /* double quote: " */ -#define ES 0x0080 /* escape char: \ */ -#define NL 0x0100 /* newline: \n */ -#define EF 0x0200 /* EOF */ -#define SK 0x0400 /* Make these chars valid for names within strings */ -#define VH 0x0800 /* VHIL comment introduction char: # */ -#define WS 0x1000 /* White space characters */ - -/* - character class membership macros: -*/ -#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ -#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ -#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ -#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ -#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ -#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ -#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ -/* - The `BORING' classes should be skipped over - until something interesting comes along... -*/ -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ -#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ -#define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */ -#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ -#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ - -static unsigned short ctype_c[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, -/*070*/ DG, DG, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -static int eat_underscore = 1; -static int scan_VHIL = 0; - -static char const * -get_token_VHIL (FILE *input_FILE, int *flags) -{ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - return get_token_c (input_FILE, flags); -} - -/* - Grab the next identifier from the C source - file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_c (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - static int new_line = 1; - unsigned short *rct = &ctype_c[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - if (new_line) - { - new_line = 0; - if (c == '.') - { - /* Auto-recognize vhil code when you see a '.' in column 1. - also ignore lines that start with a '.' */ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - if (c != '#') - goto next; - c = getc (input_FILE); - if (scan_VHIL && ISSPACE (c)) - { - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c == ' ' || c == '\t') - c = getc (input_FILE); - if (c == '\n') - { - new_line = 1; - goto top; - } - id = input_buffer; - if (c == '"') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '"') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (c == '<') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '>') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (ISID1ST (c)) - { - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *flags = TOK_NAME; - } - else - { - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - *id = '\0'; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "elif") /* ansi C */ - || (scan_VHIL && strequ (input_buffer, "elsif")) - || strequ (input_buffer, "undef")) - goto next; - while ((c != '\n') && (c != EOF)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - switch (c) - { - case '"': - id = input_buffer; - *id++ = c = getc (input_FILE); - for (;;) - { - while (ISQ2BORING (c)) - *id++ = c = getc (input_FILE); - if (c == '\\') - { - *id++ = c = getc (input_FILE); - continue; - } - else if (c != '"') - goto next; - break; - } - *--id = '\0'; - id = input_buffer; - while (ISSTRKEEP (*id)) - id++; - if (*id || id == input_buffer) - { - c = getc (input_FILE); - goto next; - } - *flags = TOK_STRING; - if (eat_underscore && input_buffer[0] == '_' && input_buffer[1]) - return &input_buffer[1]; - else - return input_buffer; - - case '\'': - c = getc (input_FILE); - for (;;) - { - while (ISQ1BORING (c)) - c = getc (input_FILE); - if (c == '\\') - { - c = getc (input_FILE); - continue; - } - else if (c == '\'') - c = getc (input_FILE); - goto next; - } - - case '/': - c = getc (input_FILE); - if (c == '/') - { /* Cope with C++ comment */ - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - else if (c != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - goto next; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - - case '\n': - new_line = 1; - goto top; - - case '#': - if (!scan_VHIL) - { - /* Auto-recognize vhil when find a # in the middle of a line. */ - set_args_c ("vhil", '+', "v"); - } - c = getc (input_FILE); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - default: - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISDIGIT (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - fprintf (stderr, "junk: `\\%3o'", c); - ungetc (c, input_FILE); - *id = '\0'; - *flags |= TOK_LITERAL; - return input_buffer; - } -} - -static void -set_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_c (char const *lang_name) -{ - fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name); - exit (1); -} - -static char document_c[] = "\ -The C scanner arguments take the form -Sc<arg>, where <arg>\n\ -is one of the following: (<cc> denotes one or more characters)\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\ - (+|-)s<cc> . . Allow <cc> in string ids, and (keep|ignore) those ids.\n\ - -v . . . . . . Skip vhil comments."; - -static void -set_args_c (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_c); - return; - } - switch (*arg++) - { - case 'u': - eat_underscore = (op == '+'); - break; - case 's': - if (op == '+') - set_ctype_c (arg, SK); - else - clear_ctype_c (arg, SK); - break; - case 'v': - set_ctype_c ("$", I1); - set_ctype_c ("#", VH); - set_ctype_c (" \t", WS); - scan_VHIL = 1; - break; - default: - if (lang_name) - usage_c (lang_name); - break; - } -} - -#undef I1 -#undef DG -#undef NM -#undef C1 -#undef C2 -#undef Q1 -#undef Q2 -#undef ES -#undef NL -#undef EF -#undef SK -#undef VH -#undef WS -#undef ISDIGIT -#undef ISNUMBER -#undef ISEOF -#undef ISID1ST -#undef ISIDREST -#undef ISSTRKEEP -#undef ISSPACE -#undef ISBORING -#undef ISCBORING -#undef ISVBORING -#undef ISQ1BORING -#undef ISQ2BORING - -/*************** Assembly ***************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define NL 0x04 /* newline: \n */ -#define CM 0x08 /* assembler comment char: usually # or | */ -#define IG 0x10 /* ignore `identifiers' with these chars in them */ -#define C1 0x20 /* C comment introduction char: / */ -#define C2 0x40 /* C comment termination char: * */ -#define EF 0x80 /* EOF */ - -/* Assembly Language character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISCOMMENT(c) ((rct)[c] & (CM)) -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) -#define ISCBORING(c) (!((rct)[c] & (EF|NL))) -#define ISCCBORING(c) (!((rct)[c] & (EF|C2))) -#define ISIGNORE(c) ((rct)[c] & (IG)) - -static unsigned char ctype_asm[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, - -}; - -static int cpp_on_asm = 1; - -/* - Grab the next identifier the assembly language - source file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_asm (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_asm[1]; - int c; - char *id = input_buffer; - static int new_line = 1; - -top: - c = getc (input_FILE); - if (cpp_on_asm > 0 && new_line) - { - new_line = 0; - if (c != '#') - goto next; - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c != '"' && c != '<') - c = getc (input_FILE); - id = input_buffer; - *id++ = c = getc (input_FILE); - while ((c = getc (input_FILE)) != '"' && c != '>') - *id++ = c; - *id = '\0'; - *flags = TOK_STRING; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "undef")) - goto next; - while (c != '\n') - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - if (ISCOMMENT (c)) - { - while (ISCBORING (c)) - c = getc (input_FILE); - new_line = 1; - } - - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - - if (c == '\n') - { - new_line = 1; - goto top; - } - - if (c == '/') - { - if ((c = getc (input_FILE)) != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - break; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - goto next; - } - - id = input_buffer; - if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE))) - { - ungetc (c, input_FILE); - return "_"; - } - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto next; - } - - *id = '\0'; - for (id = input_buffer; *id; id++) - if (ISIGNORE (*id)) - goto next; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_asm (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([-c<cc>] [-u] [(+|-)a<cc>] [(+|-)p] [(+|-)C])\n", program_name, lang_name); - exit (1); -} - -static char document_asm[] = "\ -The Assembler scanner arguments take the form -Sasm<arg>, where\n\ -<arg> is one of the following: (<cc> denotes one or more characters)\n\ - -c<cc> . . . . <cc> introduce(s) a comment until end-of-line.\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\ - (+|-)a<cc> . . Allow <cc> in ids, and (keep|ignore) those ids.\n\ - (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\ - (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)"; - -static void -set_args_asm (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_asm); - return; - } - switch (*arg++) - { - case 'a': - set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0)); - break; - case 'c': - set_ctype_asm (arg, CM); - break; - case 'u': - eat_underscore = (op == '+'); - break; - case 'p': - cpp_on_asm = (op == '+'); - break; - case 'C': - if (op == '+') - { - set_ctype_asm ("/", C1); - set_ctype_asm ("*", C2); - } - else - { - clear_ctype_asm ("/", C1); - clear_ctype_asm ("*", C2); - } - break; - default: - if (lang_name) - usage_asm (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef NL -#undef CM -#undef IG -#undef C1 -#undef C2 -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISCOMMENT -#undef ISBORING -#undef ISCBORING -#undef ISCCBORING -#undef ISIGNORE - -/*************** Text *******************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define SQ 0x04 /* squeeze these out (.,',-) */ -#define EF 0x80 /* EOF */ - -/* Text character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) -#define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) - -static unsigned char ctype_text[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -/* - Grab the next identifier the text source file opened with the - handle `input_FILE'. This state machine is built for speed, not - elegance. -*/ -static char const * -get_token_text (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_text[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - while (ISBORING (c)) - c = getc (input_FILE); - if (ISEOF (c)) - return NULL; - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - if (!ISIDSQUEEZE (c)) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto top; - } - - *id = '\0'; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_text (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([(+|-)a<cc>] [(+|-)s<cc>]\n", program_name, lang_name); - exit (1); -} - -static char document_text[] = "\ -The Text scanner arguments take the form -Stext<arg>, where\n\ -<arg> is one of the following: (<cc> denotes one or more characters)\n\ - (+|-)a<cc> . . Include (or exculde) <cc> in ids.\n\ - (+|-)s<cc> . . Squeeze (or don't squeeze) <cc> out of ids."; - -static void -set_args_text (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_text); - return; - } - switch (*arg++) - { - case 'a': - if (op == '+') - set_ctype_text (arg, I1); - else - clear_ctype_text (arg, I1); - break; - case 's': - if (op == '+') - set_ctype_text (arg, SQ); - else - clear_ctype_text (arg, SQ); - break; - default: - if (lang_name) - usage_text (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef SQ -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISBORING -#undef ISIDSQUEEZE diff --git a/scanners.h b/scanners.h deleted file mode 100644 index 7f6eb30..0000000 --- a/scanners.h +++ /dev/null @@ -1,30 +0,0 @@ -/* scanners.h -- defs for interface to scanners.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _scanners_h_ -#define _scanners_h_ - -typedef char const *(*get_token_t) __P((FILE*, int*)); - -char const *get_lang_name __P((char const *suffix)); -char const *get_filter __P((char const *suffix)); -get_token_t get_scanner __P((char const *lang_name)); -void set_scan_args __P((int op, char *arg)); -void init_scanners __P((void)); - -#endif /* not _scanners_h_ */ diff --git a/stamp-vti b/stamp-vti deleted file mode 100644 index 9788f70..0000000 --- a/stamp-vti +++ /dev/null @@ -1 +0,0 @@ -timestamp diff --git a/strcasecmp.c b/strcasecmp.c deleted file mode 100644 index cd038e3..0000000 --- a/strcasecmp.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 1987 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that this notice is preserved and that due credit is given - * to the University of California at Berkeley. The name of the University - * may not be used to endorse or promote products derived from this - * software without specific written prior permission. This software - * is provided ``as is'' without express or implied warranty. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)strcasecmp.c 5.5 (Berkeley) 11/24/87"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> -#include <string.h> - -/* - * This array is designed for mapping upper and lower case letter - * together for a case independent comparison. The mappings are -p * based upon ascii character sequences. - */ -static unsigned char charmap[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; - -int -strcasecmp(char const *s1, char const *s2) -{ - unsigned char u1, u2; - - for (;;) { - u1 = (unsigned char) *s1++; - u2 = (unsigned char) *s2++; - if (charmap[u1] != charmap[u2]) { - return charmap[u1] - charmap[u2]; - } - if (u1 == '\0') { - return 0; - } - } -} - diff --git a/strxtra.h b/strxtra.h deleted file mode 100644 index d992c03..0000000 --- a/strxtra.h +++ /dev/null @@ -1,41 +0,0 @@ -/* strxtra.c -- convenient string manipulation macros - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _strxtra_h_ -#define _strxtra_h_ - -#if HAVE_STDLIB_H -#include <stdlib.h> -#else /* not HAVE_STDLIB_H */ -#if HAVE_MALLOC_H -#include <malloc.h> -#endif /* HAVE_MALLOC_H */ -#endif /* not HAVE_STDLIB_H */ - -#define strequ(s1, s2) (strcmp ((s1), (s2)) == 0) -#define strnequ(s1, s2, n) (strncmp ((s1), (s2), (n)) == 0) -#define strcaseequ(s1, s2) (strcasecmp ((s1), (s2)) == 0) -#define strncaseequ(s1, s2, n) (strncasecmp ((s1), (s2), (n)) == 0) -#ifndef HAVE_STRDUP -#define strdup(s) (strcpy (calloc (1, strlen (s) + 1), (s))) -#else -char *strdup (); -#endif -#define strndup(s, n) (strncpy (calloc (1, (n)+1), (s), (n))) - -#endif /* not _strxtra_h_ */ diff --git a/texinfo.tex b/texinfo.tex deleted file mode 100644 index dfd57a9..0000000 --- a/texinfo.tex +++ /dev/null @@ -1,4421 +0,0 @@ -%% TeX macros to handle texinfo files - -% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 1994 Free Software Foundation, Inc. - -%This texinfo.tex file is free software; you can redistribute it and/or -%modify it under the terms of the GNU General Public License as -%published by the Free Software Foundation; either version 2, or (at -%your option) any later version. - -%This texinfo.tex file is distributed in the hope that it will be -%useful, but WITHOUT ANY WARRANTY; without even the implied warranty -%of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%General Public License for more details. - -%You should have received a copy of the GNU General Public License -%along with this texinfo.tex file; see the file COPYING. If not, write -%to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, -%USA. - - -%In other words, you are welcome to use, share and improve this program. -%You are forbidden to forbid anyone else to use, share and improve -%what you give them. Help stamp out software-hoarding! - - -% Send bug reports to bug-texinfo@prep.ai.mit.edu. -% Please include a *precise* test case in each bug report. - - -% Make it possible to create a .fmt file just by loading this file: -% if the underlying format is not loaded, start by loading it now. -% Added by gildea November 1993. -\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi - -% This automatically updates the version number based on RCS. -\def\deftexinfoversion$#1: #2 ${\def\texinfoversion{#2}} -\deftexinfoversion$Revision$ -\message{Loading texinfo package [Version \texinfoversion]:} - -% If in a .fmt file, print the version number -% and turn on active characters that we couldn't do earlier because -% they might have appeared in the input file name. -\everyjob{\message{[Texinfo version \texinfoversion]}\message{} - \catcode`+=\active \catcode`\_=\active} - -% Save some parts of plain tex whose names we will redefine. - -\let\ptextilde=\~ -\let\ptexlbrace=\{ -\let\ptexrbrace=\} -\let\ptexdots=\dots -\let\ptexdot=\. -\let\ptexstar=\* -\let\ptexend=\end -\let\ptexbullet=\bullet -\let\ptexb=\b -\let\ptexc=\c -\let\ptexi=\i -\let\ptext=\t -\let\ptexl=\l -\let\ptexL=\L - -% Be sure we're in horizontal mode when doing a tie, since we make space -% equivalent to this in @example-like environments. Otherwise, a space -% at the beginning of a line will start with \penalty -- and -% since \penalty is valid in vertical mode, we'd end up putting the -% penalty on the vertical list instead of in the new paragraph. -{\catcode`@ = 11 - \gdef\tie{\leavevmode\penalty\@M\ } -} -\let\~ = \tie % And make it available as @~. - -\message{Basics,} -\chardef\other=12 - -% If this character appears in an error message or help string, it -% starts a new line in the output. -\newlinechar = `^^J - -% Set up fixed words for English. -\ifx\putwordChapter\undefined{\gdef\putwordChapter{Chapter}}\fi% -\def\putwordInfo{Info}% -\ifx\putwordSee\undefined{\gdef\putwordSee{See}}\fi% -\ifx\putwordsee\undefined{\gdef\putwordsee{see}}\fi% -\ifx\putwordfile\undefined{\gdef\putwordfile{file}}\fi% -\ifx\putwordpage\undefined{\gdef\putwordpage{page}}\fi% -\ifx\putwordsection\undefined{\gdef\putwordsection{section}}\fi% -\ifx\putwordSection\undefined{\gdef\putwordSection{Section}}\fi% -\ifx\putwordTableofContents\undefined{\gdef\putwordTableofContents{Table of Contents}}\fi% -\ifx\putwordShortContents\undefined{\gdef\putwordShortContents{Short Contents}}\fi% -\ifx\putwordAppendix\undefined{\gdef\putwordAppendix{Appendix}}\fi% - -% Ignore a token. -% -\def\gobble#1{} - -\hyphenation{ap-pen-dix} -\hyphenation{mini-buf-fer mini-buf-fers} -\hyphenation{eshell} - -% Margin to add to right of even pages, to left of odd pages. -\newdimen \bindingoffset \bindingoffset=0pt -\newdimen \normaloffset \normaloffset=\hoffset -\newdimen\pagewidth \newdimen\pageheight -\pagewidth=\hsize \pageheight=\vsize - -% Sometimes it is convenient to have everything in the transcript file -% and nothing on the terminal. We don't just call \tracingall here, -% since that produces some useless output on the terminal. -% -\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% -\def\loggingall{\tracingcommands2 \tracingstats2 - \tracingpages1 \tracingoutput1 \tracinglostchars1 - \tracingmacros2 \tracingparagraphs1 \tracingrestores1 - \showboxbreadth\maxdimen\showboxdepth\maxdimen -}% - -%---------------------Begin change----------------------- -% -%%%% For @cropmarks command. -% Dimensions to add cropmarks at corners Added by P. A. MacKay, 12 Nov. 1986 -% -\newdimen\cornerlong \newdimen\cornerthick -\newdimen \topandbottommargin -\newdimen \outerhsize \newdimen \outervsize -\cornerlong=1pc\cornerthick=.3pt % These set size of cropmarks -\outerhsize=7in -%\outervsize=9.5in -% Alternative @smallbook page size is 9.25in -\outervsize=9.25in -\topandbottommargin=.75in -% -%---------------------End change----------------------- - -% \onepageout takes a vbox as an argument. Note that \pagecontents -% does insertions itself, but you have to call it yourself. -\chardef\PAGE=255 \output={\onepageout{\pagecontents\PAGE}} -\def\onepageout#1{\hoffset=\normaloffset -\ifodd\pageno \advance\hoffset by \bindingoffset -\else \advance\hoffset by -\bindingoffset\fi -{\escapechar=`\\\relax % makes sure backslash is used in output files. -\shipout\vbox{{\let\hsize=\pagewidth \makeheadline} \pagebody{#1}% -{\let\hsize=\pagewidth \makefootline}}}% -\advancepageno \ifnum\outputpenalty>-20000 \else\dosupereject\fi} - -%%%% For @cropmarks command %%%% - -% Here is a modification of the main output routine for Near East Publications -% This provides right-angle cropmarks at all four corners. -% The contents of the page are centerlined into the cropmarks, -% and any desired binding offset is added as an \hskip on either -% site of the centerlined box. (P. A. MacKay, 12 November, 1986) -% -\def\croppageout#1{\hoffset=0pt % make sure this doesn't mess things up -{\escapechar=`\\\relax % makes sure backslash is used in output files. - \shipout - \vbox to \outervsize{\hsize=\outerhsize - \vbox{\line{\ewtop\hfill\ewtop}} - \nointerlineskip - \line{\vbox{\moveleft\cornerthick\nstop} - \hfill - \vbox{\moveright\cornerthick\nstop}} - \vskip \topandbottommargin - \centerline{\ifodd\pageno\hskip\bindingoffset\fi - \vbox{ - {\let\hsize=\pagewidth \makeheadline} - \pagebody{#1} - {\let\hsize=\pagewidth \makefootline}} - \ifodd\pageno\else\hskip\bindingoffset\fi} - \vskip \topandbottommargin plus1fill minus1fill - \boxmaxdepth\cornerthick - \line{\vbox{\moveleft\cornerthick\nsbot} - \hfill - \vbox{\moveright\cornerthick\nsbot}} - \nointerlineskip - \vbox{\line{\ewbot\hfill\ewbot}} - }} - \advancepageno - \ifnum\outputpenalty>-20000 \else\dosupereject\fi} -% -% Do @cropmarks to get crop marks -\def\cropmarks{\let\onepageout=\croppageout } - -\newinsert\margin \dimen\margin=\maxdimen - -\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}} -{\catcode`\@ =11 -\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi -% marginal hacks, juha@viisa.uucp (Juha Takala) -\ifvoid\margin\else % marginal info is present - \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi -\dimen@=\dp#1 \unvbox#1 -\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi -\ifr@ggedbottom \kern-\dimen@ \vfil \fi} -} - -% -% Here are the rules for the cropmarks. Note that they are -% offset so that the space between them is truly \outerhsize or \outervsize -% (P. A. MacKay, 12 November, 1986) -% -\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} -\def\nstop{\vbox - {\hrule height\cornerthick depth\cornerlong width\cornerthick}} -\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} -\def\nsbot{\vbox - {\hrule height\cornerlong depth\cornerthick width\cornerthick}} - -% Parse an argument, then pass it to #1. The argument is the rest of -% the input line (except we remove a trailing comment). #1 should be a -% macro which expects an ordinary undelimited TeX argument. -% -\def\parsearg#1{% - \let\next = #1% - \begingroup - \obeylines - \futurelet\temp\parseargx -} - -% If the next token is an obeyed space (from an @example environment or -% the like), remove it and recurse. Otherwise, we're done. -\def\parseargx{% - % \obeyedspace is defined far below, after the definition of \sepspaces. - \ifx\obeyedspace\temp - \expandafter\parseargdiscardspace - \else - \expandafter\parseargline - \fi -} - -% Remove a single space (as the delimiter token to the macro call). -{\obeyspaces % - \gdef\parseargdiscardspace {\futurelet\temp\parseargx}} - -{\obeylines % - \gdef\parseargline#1^^M{% - \endgroup % End of the group started in \parsearg. - % - % First remove any @c comment, then any @comment. - % Result of each macro is put in \toks0. - \argremovec #1\c\relax % - \expandafter\argremovecomment \the\toks0 \comment\relax % - % - % Call the caller's macro, saved as \next in \parsearg. - \expandafter\next\expandafter{\the\toks0}% - }% -} - -% Since all \c{,omment} does is throw away the argument, we can let TeX -% do that for us. The \relax here is matched by the \relax in the call -% in \parseargline; it could be more or less anything, its purpose is -% just to delimit the argument to the \c. -\def\argremovec#1\c#2\relax{\toks0 = {#1}} -\def\argremovecomment#1\comment#2\relax{\toks0 = {#1}} - -% \argremovec{,omment} might leave us with trailing spaces, though; e.g., -% @end itemize @c foo -% will have two active spaces as part of the argument with the -% `itemize'. Here we remove all active spaces from #1, and assign the -% result to \toks0. -% -% This loses if there are any *other* active characters besides spaces -% in the argument -- _ ^ +, for example -- since they get expanded. -% Fortunately, Texinfo does not define any such commands. (If it ever -% does, the catcode of the characters in questionwill have to be changed -% here.) But this means we cannot call \removeactivespaces as part of -% \argremovec{,omment}, since @c uses \parsearg, and thus the argument -% that \parsearg gets might well have any character at all in it. -% -\def\removeactivespaces#1{% - \begingroup - \ignoreactivespaces - \edef\temp{#1}% - \global\toks0 = \expandafter{\temp}% - \endgroup -} - -% Change the active space to expand to nothing. -% -\begingroup - \obeyspaces - \gdef\ignoreactivespaces{\obeyspaces\let =\empty} -\endgroup - - -\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} - -%% These are used to keep @begin/@end levels from running away -%% Call \inENV within environments (after a \begingroup) -\newif\ifENV \ENVfalse \def\inENV{\ifENV\relax\else\ENVtrue\fi} -\def\ENVcheck{% -\ifENV\errmessage{Still within an environment. Type Return to continue.} -\endgroup\fi} % This is not perfect, but it should reduce lossage - -% @begin foo is the same as @foo, for now. -\newhelp\EMsimple{Type <Return> to continue.} - -\outer\def\begin{\parsearg\beginxxx} - -\def\beginxxx #1{% -\expandafter\ifx\csname #1\endcsname\relax -{\errhelp=\EMsimple \errmessage{Undefined command @begin #1}}\else -\csname #1\endcsname\fi} - -% @end foo executes the definition of \Efoo. -% -\def\end{\parsearg\endxxx} -\def\endxxx #1{% - \removeactivespaces{#1}% - \edef\endthing{\the\toks0}% - % - \expandafter\ifx\csname E\endthing\endcsname\relax - \expandafter\ifx\csname \endthing\endcsname\relax - % There's no \foo, i.e., no ``environment'' foo. - \errhelp = \EMsimple - \errmessage{Undefined command `@end \endthing'}% - \else - \unmatchedenderror\endthing - \fi - \else - % Everything's ok; the right environment has been started. - \csname E\endthing\endcsname - \fi -} - -% There is an environment #1, but it hasn't been started. Give an error. -% -\def\unmatchedenderror#1{% - \errhelp = \EMsimple - \errmessage{This `@end #1' doesn't have a matching `@#1'}% -} - -% Define the control sequence \E#1 to give an unmatched @end error. -% -\def\defineunmatchedend#1{% - \expandafter\def\csname E#1\endcsname{\unmatchedenderror{#1}}% -} - - -% Single-spacing is done by various environments (specifically, in -% \nonfillstart and \quotations). -\newskip\singlespaceskip \singlespaceskip = 12.5pt -\def\singlespace{% - % Why was this kern here? It messes up equalizing space above and below - % environments. --karl, 6may93 - %{\advance \baselineskip by -\singlespaceskip - %\kern \baselineskip}% - \setleading \singlespaceskip -} - -%% Simple single-character @ commands - -% @@ prints an @ -% Kludge this until the fonts are right (grr). -\def\@{{\tt \char '100}} - -% This is turned off because it was never documented -% and you can use @w{...} around a quote to suppress ligatures. -%% Define @` and @' to be the same as ` and ' -%% but suppressing ligatures. -%\def\`{{`}} -%\def\'{{'}} - -% Used to generate quoted braces. - -\def\mylbrace {{\tt \char '173}} -\def\myrbrace {{\tt \char '175}} -\let\{=\mylbrace -\let\}=\myrbrace - -% @: forces normal size whitespace following. -\def\:{\spacefactor=1000 } - -% @* forces a line break. -\def\*{\hfil\break\hbox{}\ignorespaces} - -% @. is an end-of-sentence period. -\def\.{.\spacefactor=3000 } - -% @enddots{} is an end-of-sentence ellipsis. -\gdef\enddots{$\mathinner{\ldotp\ldotp\ldotp\ldotp}$\spacefactor=3000} - -% @! is an end-of-sentence bang. -\gdef\!{!\spacefactor=3000 } - -% @? is an end-of-sentence query. -\gdef\?{?\spacefactor=3000 } - -% @w prevents a word break. Without the \leavevmode, @w at the -% beginning of a paragraph, when TeX is still in vertical mode, would -% produce a whole line of output instead of starting the paragraph. -\def\w#1{\leavevmode\hbox{#1}} - -% @group ... @end group forces ... to be all on one page, by enclosing -% it in a TeX vbox. We use \vtop instead of \vbox to construct the box -% to keep its height that of a normal line. According to the rules for -% \topskip (p.114 of the TeXbook), the glue inserted is -% max (\topskip - \ht (first item), 0). If that height is large, -% therefore, no glue is inserted, and the space between the headline and -% the text is small, which looks bad. -% -\def\group{\begingroup - \ifnum\catcode13=\active \else - \errhelp = \groupinvalidhelp - \errmessage{@group invalid in context where filling is enabled}% - \fi - % - % The \vtop we start below produces a box with normal height and large - % depth; thus, TeX puts \baselineskip glue before it, and (when the - % next line of text is done) \lineskip glue after it. (See p.82 of - % the TeXbook.) Thus, space below is not quite equal to space - % above. But it's pretty close. - \def\Egroup{% - \egroup % End the \vtop. - \endgroup % End the \group. - }% - % - \vtop\bgroup - % We have to put a strut on the last line in case the @group is in - % the midst of an example, rather than completely enclosing it. - % Otherwise, the interline space between the last line of the group - % and the first line afterwards is too small. But we can't put the - % strut in \Egroup, since there it would be on a line by itself. - % Hence this just inserts a strut at the beginning of each line. - \everypar = {\strut}% - % - % Since we have a strut on every line, we don't need any of TeX's - % normal interline spacing. - \offinterlineskip - % - % OK, but now we have to do something about blank - % lines in the input in @example-like environments, which normally - % just turn into \lisppar, which will insert no space now that we've - % turned off the interline space. Simplest is to make them be an - % empty paragraph. - \ifx\par\lisppar - \edef\par{\leavevmode \par}% - % - % Reset ^^M's definition to new definition of \par. - \obeylines - \fi - % - % Do @comment since we are called inside an environment such as - % @example, where each end-of-line in the input causes an - % end-of-line in the output. We don't want the end-of-line after - % the `@group' to put extra space in the output. Since @group - % should appear on a line by itself (according to the Texinfo - % manual), we don't worry about eating any user text. - \comment -} -% -% TeX puts in an \escapechar (i.e., `@') at the beginning of the help -% message, so this ends up printing `@group can only ...'. -% -\newhelp\groupinvalidhelp{% -group can only be used in environments such as @example,^^J% -where each line of input produces a line of output.} - -% @need space-in-mils -% forces a page break if there is not space-in-mils remaining. - -\newdimen\mil \mil=0.001in - -\def\need{\parsearg\needx} - -% Old definition--didn't work. -%\def\needx #1{\par % -%% This method tries to make TeX break the page naturally -%% if the depth of the box does not fit. -%{\baselineskip=0pt% -%\vtop to #1\mil{\vfil}\kern -#1\mil\penalty 10000 -%\prevdepth=-1000pt -%}} - -\def\needx#1{% - % Go into vertical mode, so we don't make a big box in the middle of a - % paragraph. - \par - % - % Don't add any leading before our big empty box, but allow a page - % break, since the best break might be right here. - \allowbreak - \nointerlineskip - \vtop to #1\mil{\vfil}% - % - % TeX does not even consider page breaks if a penalty added to the - % main vertical list is 10000 or more. But in order to see if the - % empty box we just added fits on the page, we must make it consider - % page breaks. On the other hand, we don't want to actually break the - % page after the empty box. So we use a penalty of 9999. - % - % There is an extremely small chance that TeX will actually break the - % page at this \penalty, if there are no other feasible breakpoints in - % sight. (If the user is using lots of big @group commands, which - % almost-but-not-quite fill up a page, TeX will have a hard time doing - % good page breaking, for example.) However, I could not construct an - % example where a page broke at this \penalty; if it happens in a real - % document, then we can reconsider our strategy. - \penalty9999 - % - % Back up by the size of the box, whether we did a page break or not. - \kern -#1\mil - % - % Do not allow a page break right after this kern. - \nobreak -} - -% @br forces paragraph break - -\let\br = \par - -% @dots{} output some dots - -\def\dots{$\ldots$} - -% @page forces the start of a new page - -\def\page{\par\vfill\supereject} - -% @exdent text.... -% outputs text on separate line in roman font, starting at standard page margin - -% This records the amount of indent in the innermost environment. -% That's how much \exdent should take out. -\newskip\exdentamount - -% This defn is used inside fill environments such as @defun. -\def\exdent{\parsearg\exdentyyy} -\def\exdentyyy #1{{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break}} - -% This defn is used inside nofill environments such as @example. -\def\nofillexdent{\parsearg\nofillexdentyyy} -\def\nofillexdentyyy #1{{\advance \leftskip by -\exdentamount -\leftline{\hskip\leftskip{\rm#1}}}} - -%\hbox{{\rm#1}}\hfil\break}} - -% @include file insert text of that file as input. - -\def\include{\parsearg\includezzz} -%Use \input\thisfile to avoid blank after \input, which may be an active -%char (in which case the blank would become the \input argument). -%The grouping keeps the value of \thisfile correct even when @include -%is nested. -\def\includezzz #1{\begingroup -\def\thisfile{#1}\input\thisfile -\endgroup} - -\def\thisfile{} - -% @center line outputs that line, centered - -\def\center{\parsearg\centerzzz} -\def\centerzzz #1{{\advance\hsize by -\leftskip -\advance\hsize by -\rightskip -\centerline{#1}}} - -% @sp n outputs n lines of vertical space - -\def\sp{\parsearg\spxxx} -\def\spxxx #1{\par \vskip #1\baselineskip} - -% @comment ...line which is ignored... -% @c is the same as @comment -% @ignore ... @end ignore is another way to write a comment - -\def\comment{\catcode 64=\other \catcode 123=\other \catcode 125=\other% -\parsearg \commentxxx} - -\def\commentxxx #1{\catcode 64=0 \catcode 123=1 \catcode 125=2 } - -\let\c=\comment - -% Prevent errors for section commands. -% Used in @ignore and in failing conditionals. -\def\ignoresections{% -\let\chapter=\relax -\let\unnumbered=\relax -\let\top=\relax -\let\unnumberedsec=\relax -\let\unnumberedsection=\relax -\let\unnumberedsubsec=\relax -\let\unnumberedsubsection=\relax -\let\unnumberedsubsubsec=\relax -\let\unnumberedsubsubsection=\relax -\let\section=\relax -\let\subsec=\relax -\let\subsubsec=\relax -\let\subsection=\relax -\let\subsubsection=\relax -\let\appendix=\relax -\let\appendixsec=\relax -\let\appendixsection=\relax -\let\appendixsubsec=\relax -\let\appendixsubsection=\relax -\let\appendixsubsubsec=\relax -\let\appendixsubsubsection=\relax -\let\contents=\relax -\let\smallbook=\relax -\let\titlepage=\relax -} - -% Used in nested conditionals, where we have to parse the Texinfo source -% and so want to turn off most commands, in case they are used -% incorrectly. -% -\def\ignoremorecommands{% - \let\defcv = \relax - \let\deffn = \relax - \let\deffnx = \relax - \let\defindex = \relax - \let\defivar = \relax - \let\defmac = \relax - \let\defmethod = \relax - \let\defop = \relax - \let\defopt = \relax - \let\defspec = \relax - \let\deftp = \relax - \let\deftypefn = \relax - \let\deftypefun = \relax - \let\deftypevar = \relax - \let\deftypevr = \relax - \let\defun = \relax - \let\defvar = \relax - \let\defvr = \relax - \let\ref = \relax - \let\xref = \relax - \let\printindex = \relax - \let\pxref = \relax - \let\settitle = \relax - \let\include = \relax - \let\lowersections = \relax - \let\down = \relax - \let\raisesections = \relax - \let\up = \relax - \let\set = \relax - \let\clear = \relax - \let\item = \relax - \let\message = \relax -} - -% Ignore @ignore ... @end ignore. -% -\def\ignore{\doignore{ignore}} - -% Also ignore @ifinfo, @ifhtml, @html, @menu, and @direntry text. -% -\def\ifinfo{\doignore{ifinfo}} -\def\ifhtml{\doignore{ifhtml}} -\def\html{\doignore{html}} -\def\menu{\doignore{menu}} -\def\direntry{\doignore{direntry}} - -% Ignore text until a line `@end #1'. -% -\def\doignore#1{\begingroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define a command to swallow text until we reach `@end #1'. - \long\def\doignoretext##1\end #1{\enddoignore}% - % - % Make sure that spaces turn into tokens that match what \doignoretext wants. - \catcode32 = 10 - % - % And now expand that command. - \doignoretext -} - -% What we do to finish off ignored text. -% -\def\enddoignore{\endgroup\ignorespaces}% - -\newif\ifwarnedobs\warnedobsfalse -\def\obstexwarn{% - \ifwarnedobs\relax\else - % We need to warn folks that they may have trouble with TeX 3.0. - % This uses \immediate\write16 rather than \message to get newlines. - \immediate\write16{} - \immediate\write16{***WARNING*** for users of Unix TeX 3.0!} - \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} - \immediate\write16{If you are running another version of TeX, relax.} - \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} - \immediate\write16{ Then upgrade your TeX installation if you can.} - \immediate\write16{If you are stuck with version 3.0, run the} - \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} - \immediate\write16{ to use a workaround.} - \immediate\write16{} - \warnedobstrue - \fi -} - -% **In TeX 3.0, setting text in \nullfont hangs tex. For a -% workaround (which requires the file ``dummy.tfm'' to be installed), -% uncomment the following line: -%%%%%\font\nullfont=dummy\let\obstexwarn=\relax - -% Ignore text, except that we keep track of conditional commands for -% purposes of nesting, up to an `@end #1' command. -% -\def\nestedignore#1{% - \obstexwarn - % We must actually expand the ignored text to look for the @end - % command, so that nested ignore constructs work. Thus, we put the - % text into a \vbox and then do nothing with the result. To minimize - % the change of memory overflow, we follow the approach outlined on - % page 401 of the TeXbook: make the current font be a dummy font. - % - \setbox0 = \vbox\bgroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define `@end #1' to end the box, which will in turn undefine the - % @end command again. - \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% - % - % We are going to be parsing Texinfo commands. Most cause no - % trouble when they are used incorrectly, but some commands do - % complicated argument parsing or otherwise get confused, so we - % undefine them. - % - % We can't do anything about stray @-signs, unfortunately; - % they'll produce `undefined control sequence' errors. - \ignoremorecommands - % - % Set the current font to be \nullfont, a TeX primitive, and define - % all the font commands to also use \nullfont. We don't use - % dummy.tfm, as suggested in the TeXbook, because not all sites - % might have that installed. Therefore, math mode will still - % produce output, but that should be an extremely small amount of - % stuff compared to the main input. - % - \nullfont - \let\tenrm = \nullfont \let\tenit = \nullfont \let\tensl = \nullfont - \let\tenbf = \nullfont \let\tentt = \nullfont \let\smallcaps = \nullfont - \let\tensf = \nullfont - % Similarly for index fonts (mostly for their use in - % smallexample) - \let\indrm = \nullfont \let\indit = \nullfont \let\indsl = \nullfont - \let\indbf = \nullfont \let\indtt = \nullfont \let\indsc = \nullfont - \let\indsf = \nullfont - % - % Don't complain when characters are missing from the fonts. - \tracinglostchars = 0 - % - % Don't bother to do space factor calculations. - \frenchspacing - % - % Don't report underfull hboxes. - \hbadness = 10000 - % - % Do minimal line-breaking. - \pretolerance = 10000 - % - % Do not execute instructions in @tex - \def\tex{\doignore{tex}} -} - -% @set VAR sets the variable VAR to an empty value. -% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. -% -% Since we want to separate VAR from REST-OF-LINE (which might be -% empty), we can't just use \parsearg; we have to insert a space of our -% own to delimit the rest of the line, and then take it out again if we -% didn't need it. -% -\def\set{\parsearg\setxxx} -\def\setxxx#1{\setyyy#1 \endsetyyy} -\def\setyyy#1 #2\endsetyyy{% - \def\temp{#2}% - \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty - \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. - \fi -} -% Can't use \xdef to pre-expand #2 and save some time, since \temp or -% \next or other control sequences that we've defined might get us into -% an infinite loop. Consider `@set foo @cite{bar}'. -\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}} - -% @clear VAR clears (i.e., unsets) the variable VAR. -% -\def\clear{\parsearg\clearxxx} -\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} - -% @value{foo} gets the text saved in variable foo. -% -\def\value#1{\expandafter - \ifx\csname SET#1\endcsname\relax - {\{No value for ``#1''\}} - \else \csname SET#1\endcsname \fi} - -% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined -% with @set. -% -\def\ifset{\parsearg\ifsetxxx} -\def\ifsetxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifsetfail - \else - \expandafter\ifsetsucceed - \fi -} -\def\ifsetsucceed{\conditionalsucceed{ifset}} -\def\ifsetfail{\nestedignore{ifset}} -\defineunmatchedend{ifset} - -% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been -% defined with @set, or has been undefined with @clear. -% -\def\ifclear{\parsearg\ifclearxxx} -\def\ifclearxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifclearsucceed - \else - \expandafter\ifclearfail - \fi -} -\def\ifclearsucceed{\conditionalsucceed{ifclear}} -\def\ifclearfail{\nestedignore{ifclear}} -\defineunmatchedend{ifclear} - -% @iftex always succeeds; we read the text following, through @end -% iftex). But `@end iftex' should be valid only after an @iftex. -% -\def\iftex{\conditionalsucceed{iftex}} -\defineunmatchedend{iftex} - -% We can't just want to start a group at @iftex (for example) and end it -% at @end iftex, since then @set commands inside the conditional have no -% effect (they'd get reverted at the end of the group). So we must -% define \Eiftex to redefine itself to be its previous value. (We can't -% just define it to fail again with an ``unmatched end'' error, since -% the @ifset might be nested.) -% -\def\conditionalsucceed#1{% - \edef\temp{% - % Remember the current value of \E#1. - \let\nece{prevE#1} = \nece{E#1}% - % - % At the `@end #1', redefine \E#1 to be its previous value. - \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% - }% - \temp -} - -% We need to expand lots of \csname's, but we don't want to expand the -% control sequences after we've constructed them. -% -\def\nece#1{\expandafter\noexpand\csname#1\endcsname} - -% @asis just yields its argument. Used with @table, for example. -% -\def\asis#1{#1} - -% @math means output in math mode. -% We don't use $'s directly in the definition of \math because control -% sequences like \math are expanded when the toc file is written. Then, -% we read the toc file back, the $'s will be normal characters (as they -% should be, according to the definition of Texinfo). So we must use a -% control sequence to switch into and out of math mode. -% -% This isn't quite enough for @math to work properly in indices, but it -% seems unlikely it will ever be needed there. -% -\let\implicitmath = $ -\def\math#1{\implicitmath #1\implicitmath} - -% @bullet and @minus need the same treatment as @math, just above. -\def\bullet{\implicitmath\ptexbullet\implicitmath} -\def\minus{\implicitmath-\implicitmath} - -\def\node{\ENVcheck\parsearg\nodezzz} -\def\nodezzz#1{\nodexxx [#1,]} -\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} -\let\nwnode=\node -\let\lastnode=\relax - -\def\donoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\setref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\unnumbnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\unnumbsetref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\appendixnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\appendixsetref{\lastnode}\fi -\global\let\lastnode=\relax} - -\let\refill=\relax - -% @setfilename is done at the beginning of every texinfo file. -% So open here the files we need to have open while reading the input. -% This makes it possible to make a .fmt file for texinfo. -\def\setfilename{% - \readauxfile - \opencontents - \openindices - \fixbackslash % Turn off hack to swallow `\input texinfo'. - \global\let\setfilename=\comment % Ignore extra @setfilename cmds. - \comment % Ignore the actual filename. -} - -\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} - -\def\inforef #1{\inforefzzz #1,,,,**} -\def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, - node \samp{\ignorespaces#1{}}} - -\message{fonts,} - -% Font-change commands. - -% Texinfo supports the sans serif font style, which plain TeX does not. -% So we set up a \sf analogous to plain's \rm, etc. -\newfam\sffam -\def\sf{\fam=\sffam \tensf} -\let\li = \sf % Sometimes we call it \li, not \sf. - -%% Try out Computer Modern fonts at \magstephalf -\let\mainmagstep=\magstephalf - -% Set the font macro #1 to the font named #2, adding on the -% specified font prefix (normally `cm'). -\def\setfont#1#2{\font#1=\fontprefix#2} - -% Use cm as the default font prefix. -% To specify the font prefix, you must define \fontprefix -% before you read in texinfo.tex. -\ifx\fontprefix\undefined -\def\fontprefix{cm} -\fi - -\ifx\bigger\relax -\let\mainmagstep=\magstep1 -\setfont\textrm{r12} -\setfont\texttt{tt12} -\else -\setfont\textrm{r10 scaled \mainmagstep} -\setfont\texttt{tt10 scaled \mainmagstep} -\fi -% Instead of cmb10, you many want to use cmbx10. -% cmbx10 is a prettier font on its own, but cmb10 -% looks better when embedded in a line with cmr10. -\setfont\textbf{b10 scaled \mainmagstep} -\setfont\textit{ti10 scaled \mainmagstep} -\setfont\textsl{sl10 scaled \mainmagstep} -\setfont\textsf{ss10 scaled \mainmagstep} -\setfont\textsc{csc10 scaled \mainmagstep} -\font\texti=cmmi10 scaled \mainmagstep -\font\textsy=cmsy10 scaled \mainmagstep - -% A few fonts for @defun, etc. -\setfont\defbf{bx10 scaled \magstep1} %was 1314 -\setfont\deftt{tt10 scaled \magstep1} -\def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf} - -% Fonts for indices and small examples. -% We actually use the slanted font rather than the italic, -% because texinfo normally uses the slanted fonts for that. -% Do not make many font distinctions in general in the index, since they -% aren't very useful. -\setfont\ninett{tt9} -\setfont\indrm{r9} -\setfont\indit{sl9} -\let\indsl=\indit -\let\indtt=\ninett -\let\indsf=\indrm -\let\indbf=\indrm -\setfont\indsc{csc10 at 9pt} -\font\indi=cmmi9 -\font\indsy=cmsy9 - -% Fonts for headings -\setfont\chaprm{bx12 scaled \magstep2} -\setfont\chapit{ti12 scaled \magstep2} -\setfont\chapsl{sl12 scaled \magstep2} -\setfont\chaptt{tt12 scaled \magstep2} -\setfont\chapsf{ss12 scaled \magstep2} -\let\chapbf=\chaprm -\setfont\chapsc{csc10 scaled\magstep3} -\font\chapi=cmmi12 scaled \magstep2 -\font\chapsy=cmsy10 scaled \magstep3 - -\setfont\secrm{bx12 scaled \magstep1} -\setfont\secit{ti12 scaled \magstep1} -\setfont\secsl{sl12 scaled \magstep1} -\setfont\sectt{tt12 scaled \magstep1} -\setfont\secsf{ss12 scaled \magstep1} -\setfont\secbf{bx12 scaled \magstep1} -\setfont\secsc{csc10 scaled\magstep2} -\font\seci=cmmi12 scaled \magstep1 -\font\secsy=cmsy10 scaled \magstep2 - -% \setfont\ssecrm{bx10 scaled \magstep1} % This size an font looked bad. -% \setfont\ssecit{cmti10 scaled \magstep1} % The letters were too crowded. -% \setfont\ssecsl{sl10 scaled \magstep1} -% \setfont\ssectt{tt10 scaled \magstep1} -% \setfont\ssecsf{ss10 scaled \magstep1} - -%\setfont\ssecrm{b10 scaled 1315} % Note the use of cmb rather than cmbx. -%\setfont\ssecit{ti10 scaled 1315} % Also, the size is a little larger than -%\setfont\ssecsl{sl10 scaled 1315} % being scaled magstep1. -%\setfont\ssectt{tt10 scaled 1315} -%\setfont\ssecsf{ss10 scaled 1315} - -%\let\ssecbf=\ssecrm - -\setfont\ssecrm{bx12 scaled \magstephalf} -\setfont\ssecit{ti12 scaled \magstephalf} -\setfont\ssecsl{sl12 scaled \magstephalf} -\setfont\ssectt{tt12 scaled \magstephalf} -\setfont\ssecsf{ss12 scaled \magstephalf} -\setfont\ssecbf{bx12 scaled \magstephalf} -\setfont\ssecsc{csc10 scaled \magstep1} -\font\sseci=cmmi12 scaled \magstephalf -\font\ssecsy=cmsy10 scaled \magstep1 -% The smallcaps and symbol fonts should actually be scaled \magstep1.5, -% but that is not a standard magnification. - -% Fonts for title page: -\setfont\titlerm{bx12 scaled \magstep3} -\let\authorrm = \secrm - -% In order for the font changes to affect most math symbols and letters, -% we have to define the \textfont of the standard families. Since -% texinfo doesn't allow for producing subscripts and superscripts, we -% don't bother to reset \scriptfont and \scriptscriptfont (which would -% also require loading a lot more fonts). -% -\def\resetmathfonts{% - \textfont0 = \tenrm \textfont1 = \teni \textfont2 = \tensy - \textfont\itfam = \tenit \textfont\slfam = \tensl \textfont\bffam = \tenbf - \textfont\ttfam = \tentt \textfont\sffam = \tensf -} - - -% The font-changing commands redefine the meanings of \tenSTYLE, instead -% of just \STYLE. We do this so that font changes will continue to work -% in math mode, where it is the current \fam that is relevant in most -% cases, not the current. Plain TeX does, for example, -% \def\bf{\fam=\bffam \tenbf} By redefining \tenbf, we obviate the need -% to redefine \bf itself. -\def\textfonts{% - \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl - \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc - \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy - \resetmathfonts} -\def\chapfonts{% - \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl - \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc - \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy - \resetmathfonts} -\def\secfonts{% - \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl - \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc - \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy - \resetmathfonts} -\def\subsecfonts{% - \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl - \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc - \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy - \resetmathfonts} -\def\indexfonts{% - \let\tenrm=\indrm \let\tenit=\indit \let\tensl=\indsl - \let\tenbf=\indbf \let\tentt=\indtt \let\smallcaps=\indsc - \let\tensf=\indsf \let\teni=\indi \let\tensy=\indsy - \resetmathfonts} - -% Set up the default fonts, so we can use them for creating boxes. -% -\textfonts - -% Count depth in font-changes, for error checks -\newcount\fontdepth \fontdepth=0 - -% Fonts for short table of contents. -\setfont\shortcontrm{r12} -\setfont\shortcontbf{bx12} -\setfont\shortcontsl{sl12} - -%% Add scribe-like font environments, plus @l for inline lisp (usually sans -%% serif) and @ii for TeX italic - -% \smartitalic{ARG} outputs arg in italics, followed by an italic correction -% unless the following character is such as not to need one. -\def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else\/\fi\fi\fi} -\def\smartitalic#1{{\sl #1}\futurelet\next\smartitalicx} - -\let\i=\smartitalic -\let\var=\smartitalic -\let\dfn=\smartitalic -\let\emph=\smartitalic -\let\cite=\smartitalic - -\def\b#1{{\bf #1}} -\let\strong=\b - -% We can't just use \exhyphenpenalty, because that only has effect at -% the end of a paragraph. Restore normal hyphenation at the end of the -% group within which \nohyphenation is presumably called. -% -\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} -\def\restorehyphenation{\hyphenchar\font = `- } - -\def\t#1{% - {\tt \nohyphenation \rawbackslash \frenchspacing #1}% - \null -} -\let\ttfont = \t -%\def\samp #1{`{\tt \rawbackslash \frenchspacing #1}'\null} -\def\samp #1{`\tclose{#1}'\null} -\def\key #1{{\tt \nohyphenation \uppercase{#1}}\null} -\def\ctrl #1{{\tt \rawbackslash \hat}#1} - -\let\file=\samp - -% @code is a modification of @t, -% which makes spaces the same size as normal in the surrounding text. -\def\tclose#1{% - {% - % Change normal interword space to be same as for the current font. - \spaceskip = \fontdimen2\font - % - % Switch to typewriter. - \tt - % - % But `\ ' produces the large typewriter interword space. - \def\ {{\spaceskip = 0pt{} }}% - % - % Turn off hyphenation. - \nohyphenation - % - \rawbackslash - \frenchspacing - #1% - }% - \null -} - -% We *must* turn on hyphenation at `-' and `_' in \code. -% Otherwise, it is too hard to avoid overful hboxes -% in the Emacs manual, the Library manual, etc. - -% Unfortunately, TeX uses one parameter (\hyphenchar) to control -% both hyphenation at - and hyphenation within words. -% We must therefore turn them both off (\tclose does that) -% and arrange explicitly to hyphenate an a dash. -% -- rms. -{ -\catcode`\-=\active -\catcode`\_=\active -\global\def\code{\begingroup \catcode`\-=\active \let-\codedash \catcode`\_=\active \let_\codeunder \codex} -% The following is used by \doprintindex to insure that long function names -% wrap around. It is necessary for - and _ to be active before the index is -% read from the file, as \entry parses the arguments long before \code is -% ever called. -- mycroft -\global\def\indexbreaks{\catcode`\-=\active \let-\realdash \catcode`\_=\active \let_\realunder} -} -\def\realdash{-} -\def\realunder{_} -\def\codedash{-\discretionary{}{}{}} -\def\codeunder{\normalunderscore\discretionary{}{}{}} -\def\codex #1{\tclose{#1}\endgroup} - -%\let\exp=\tclose %Was temporary - -% @kbd is like @code, except that if the argument is just one @key command, -% then @kbd has no effect. - -\def\xkey{\key} -\def\kbdfoo#1#2#3\par{\def\one{#1}\def\three{#3}\def\threex{??}% -\ifx\one\xkey\ifx\threex\three \key{#2}% -\else\tclose{\look}\fi -\else\tclose{\look}\fi} - -% Typeset a dimension, e.g., `in' or `pt'. The only reason for the -% argument is to make the input look right: @dmn{pt} instead of -% @dmn{}pt. -% -\def\dmn#1{\thinspace #1} - -\def\kbd#1{\def\look{#1}\expandafter\kbdfoo\look??\par} - -\def\l#1{{\li #1}\null} % - -\def\r#1{{\rm #1}} % roman font -% Use of \lowercase was suggested. -\def\sc#1{{\smallcaps#1}} % smallcaps font -\def\ii#1{{\it #1}} % italic font - -\message{page headings,} - -\newskip\titlepagetopglue \titlepagetopglue = 1.5in -\newskip\titlepagebottomglue \titlepagebottomglue = 2pc - -% First the title page. Must do @settitle before @titlepage. -\def\titlefont#1{{\titlerm #1}} - -\newif\ifseenauthor -\newif\iffinishedtitlepage - -\def\shorttitlepage{\parsearg\shorttitlepagezzz} -\def\shorttitlepagezzz #1{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}% - \endgroup\page\hbox{}\page} - -\def\titlepage{\begingroup \parindent=0pt \textfonts - \let\subtitlerm=\tenrm -% I deinstalled the following change because \cmr12 is undefined. -% This change was not in the ChangeLog anyway. --rms. -% \let\subtitlerm=\cmr12 - \def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}% - % - \def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines}% - % - % Leave some space at the very top of the page. - \vglue\titlepagetopglue - % - % Now you can print the title using @title. - \def\title{\parsearg\titlezzz}% - \def\titlezzz##1{\leftline{\titlefont{##1}} - % print a rule at the page bottom also. - \finishedtitlepagefalse - \vskip4pt \hrule height 4pt width \hsize \vskip4pt}% - % No rule at page bottom unless we print one at the top with @title. - \finishedtitlepagetrue - % - % Now you can put text using @subtitle. - \def\subtitle{\parsearg\subtitlezzz}% - \def\subtitlezzz##1{{\subtitlefont \rightline{##1}}}% - % - % @author should come last, but may come many times. - \def\author{\parsearg\authorzzz}% - \def\authorzzz##1{\ifseenauthor\else\vskip 0pt plus 1filll\seenauthortrue\fi - {\authorfont \leftline{##1}}}% - % - % Most title ``pages'' are actually two pages long, with space - % at the top of the second. We don't want the ragged left on the second. - \let\oldpage = \page - \def\page{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - \oldpage - \let\page = \oldpage - \hbox{}}% -% \def\page{\oldpage \hbox{}} -} - -\def\Etitlepage{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - % It is important to do the page break before ending the group, - % because the headline and footline are only empty inside the group. - % If we use the new definition of \page, we always get a blank page - % after the title page, which we certainly don't want. - \oldpage - \endgroup - \HEADINGSon -} - -\def\finishtitlepage{% - \vskip4pt \hrule height 2pt width \hsize - \vskip\titlepagebottomglue - \finishedtitlepagetrue -} - -%%% Set up page headings and footings. - -\let\thispage=\folio - -\newtoks \evenheadline % Token sequence for heading line of even pages -\newtoks \oddheadline % Token sequence for heading line of odd pages -\newtoks \evenfootline % Token sequence for footing line of even pages -\newtoks \oddfootline % Token sequence for footing line of odd pages - -% Now make Tex use those variables -\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline - \else \the\evenheadline \fi}} -\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline - \else \the\evenfootline \fi}\HEADINGShook} -\let\HEADINGShook=\relax - -% Commands to set those variables. -% For example, this is what @headings on does -% @evenheading @thistitle|@thispage|@thischapter -% @oddheading @thischapter|@thispage|@thistitle -% @evenfooting @thisfile|| -% @oddfooting ||@thisfile - -\def\evenheading{\parsearg\evenheadingxxx} -\def\oddheading{\parsearg\oddheadingxxx} -\def\everyheading{\parsearg\everyheadingxxx} - -\def\evenfooting{\parsearg\evenfootingxxx} -\def\oddfooting{\parsearg\oddfootingxxx} -\def\everyfooting{\parsearg\everyfootingxxx} - -{\catcode`\@=0 % - -\gdef\evenheadingxxx #1{\evenheadingyyy #1@|@|@|@|\finish} -\gdef\evenheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddheadingxxx #1{\oddheadingyyy #1@|@|@|@|\finish} -\gdef\oddheadingyyy #1@|#2@|#3@|#4\finish{% -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyheadingxxx #1{\everyheadingyyy #1@|@|@|@|\finish} -\gdef\everyheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\evenfootingxxx #1{\evenfootingyyy #1@|@|@|@|\finish} -\gdef\evenfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddfootingxxx #1{\oddfootingyyy #1@|@|@|@|\finish} -\gdef\oddfootingyyy #1@|#2@|#3@|#4\finish{% -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyfootingxxx #1{\everyfootingyyy #1@|@|@|@|\finish} -\gdef\everyfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} -% -}% unbind the catcode of @. - -% @headings double turns headings on for double-sided printing. -% @headings single turns headings on for single-sided printing. -% @headings off turns them off. -% @headings on same as @headings double, retained for compatibility. -% @headings after turns on double-sided headings after this page. -% @headings doubleafter turns on double-sided headings after this page. -% @headings singleafter turns on single-sided headings after this page. -% By default, they are off. - -\def\headings #1 {\csname HEADINGS#1\endcsname} - -\def\HEADINGSoff{ -\global\evenheadline={\hfil} \global\evenfootline={\hfil} -\global\oddheadline={\hfil} \global\oddfootline={\hfil}} -\HEADINGSoff -% When we turn headings on, set the page number to 1. -% For double-sided printing, put current file name in lower left corner, -% chapter name on inside top of right hand pages, document -% title on inside top of left hand pages, and page numbers on outside top -% edge of all pages. -\def\HEADINGSdouble{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -% For single-sided printing, chapter title goes across top left of page, -% page number on top right. -\def\HEADINGSsingle{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -\def\HEADINGSon{\HEADINGSdouble} - -\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} -\let\HEADINGSdoubleafter=\HEADINGSafter -\def\HEADINGSdoublex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} -\def\HEADINGSsinglex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -% Subroutines used in generating headings -% Produces Day Month Year style of output. -\def\today{\number\day\space -\ifcase\month\or -January\or February\or March\or April\or May\or June\or -July\or August\or September\or October\or November\or December\fi -\space\number\year} - -% Use this if you want the Month Day, Year style of output. -%\def\today{\ifcase\month\or -%January\or February\or March\or April\or May\or June\or -%July\or August\or September\or October\or November\or December\fi -%\space\number\day, \number\year} - -% @settitle line... specifies the title of the document, for headings -% It generates no output of its own - -\def\thistitle{No Title} -\def\settitle{\parsearg\settitlezzz} -\def\settitlezzz #1{\gdef\thistitle{#1}} - -\message{tables,} - -% @tabs -- simple alignment - -% These don't work. For one thing, \+ is defined as outer. -% So these macros cannot even be defined. - -%\def\tabs{\parsearg\tabszzz} -%\def\tabszzz #1{\settabs\+#1\cr} -%\def\tabline{\parsearg\tablinezzz} -%\def\tablinezzz #1{\+#1\cr} -%\def\&{&} - -% Tables -- @table, @ftable, @vtable, @item(x), @kitem(x), @xitem(x). - -% default indentation of table text -\newdimen\tableindent \tableindent=.8in -% default indentation of @itemize and @enumerate text -\newdimen\itemindent \itemindent=.3in -% margin between end of table item and start of table text. -\newdimen\itemmargin \itemmargin=.1in - -% used internally for \itemindent minus \itemmargin -\newdimen\itemmax - -% Note @table, @vtable, and @vtable define @item, @itemx, etc., with -% these defs. -% They also define \itemindex -% to index the item name in whatever manner is desired (perhaps none). - -\newif\ifitemxneedsnegativevskip - -\def\itemxpar{\par\ifitemxneedsnegativevskip\vskip-\parskip\nobreak\fi} - -\def\internalBitem{\smallbreak \parsearg\itemzzz} -\def\internalBitemx{\itemxpar \parsearg\itemzzz} - -\def\internalBxitem "#1"{\def\xitemsubtopix{#1} \smallbreak \parsearg\xitemzzz} -\def\internalBxitemx "#1"{\def\xitemsubtopix{#1} \itemxpar \parsearg\xitemzzz} - -\def\internalBkitem{\smallbreak \parsearg\kitemzzz} -\def\internalBkitemx{\itemxpar \parsearg\kitemzzz} - -\def\kitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \lastfunction}}% - \itemzzz {#1}} - -\def\xitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \xitemsubtopic}}% - \itemzzz {#1}} - -\def\itemzzz #1{\begingroup % - \advance\hsize by -\rightskip - \advance\hsize by -\tableindent - \setbox0=\hbox{\itemfont{#1}}% - \itemindex{#1}% - \nobreak % This prevents a break before @itemx. - % - % Be sure we are not still in the middle of a paragraph. - %{\parskip = 0in - %\par - %}% - % - % If the item text does not fit in the space we have, put it on a line - % by itself, and do not allow a page break either before or after that - % line. We do not start a paragraph here because then if the next - % command is, e.g., @kindex, the whatsit would get put into the - % horizontal list on a line by itself, resulting in extra blank space. - \ifdim \wd0>\itemmax - % - % Make this a paragraph so we get the \parskip glue and wrapping, - % but leave it ragged-right. - \begingroup - \advance\leftskip by-\tableindent - \advance\hsize by\tableindent - \advance\rightskip by0pt plus1fil - \leavevmode\unhbox0\par - \endgroup - % - % We're going to be starting a paragraph, but we don't want the - % \parskip glue -- logically it's part of the @item we just started. - \nobreak \vskip-\parskip - % - % Stop a page break at the \parskip glue coming up. Unfortunately - % we can't prevent a possible page break at the following - % \baselineskip glue. - \nobreak - \endgroup - \itemxneedsnegativevskipfalse - \else - % The item text fits into the space. Start a paragraph, so that the - % following text (if any) will end up on the same line. Since that - % text will be indented by \tableindent, we make the item text be in - % a zero-width box. - \noindent - \rlap{\hskip -\tableindent\box0}\ignorespaces% - \endgroup% - \itemxneedsnegativevskiptrue% - \fi -} - -\def\item{\errmessage{@item while not in a table}} -\def\itemx{\errmessage{@itemx while not in a table}} -\def\kitem{\errmessage{@kitem while not in a table}} -\def\kitemx{\errmessage{@kitemx while not in a table}} -\def\xitem{\errmessage{@xitem while not in a table}} -\def\xitemx{\errmessage{@xitemx while not in a table}} - -%% Contains a kludge to get @end[description] to work -\def\description{\tablez{\dontindex}{1}{}{}{}{}} - -\def\table{\begingroup\inENV\obeylines\obeyspaces\tablex} -{\obeylines\obeyspaces% -\gdef\tablex #1^^M{% -\tabley\dontindex#1 \endtabley}} - -\def\ftable{\begingroup\inENV\obeylines\obeyspaces\ftablex} -{\obeylines\obeyspaces% -\gdef\ftablex #1^^M{% -\tabley\fnitemindex#1 \endtabley -\def\Eftable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\vtable{\begingroup\inENV\obeylines\obeyspaces\vtablex} -{\obeylines\obeyspaces% -\gdef\vtablex #1^^M{% -\tabley\vritemindex#1 \endtabley -\def\Evtable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\dontindex #1{} -\def\fnitemindex #1{\doind {fn}{\code{#1}}}% -\def\vritemindex #1{\doind {vr}{\code{#1}}}% - -{\obeyspaces % -\gdef\tabley#1#2 #3 #4 #5 #6 #7\endtabley{\endgroup% -\tablez{#1}{#2}{#3}{#4}{#5}{#6}}} - -\def\tablez #1#2#3#4#5#6{% -\aboveenvbreak % -\begingroup % -\def\Edescription{\Etable}% Neccessary kludge. -\let\itemindex=#1% -\ifnum 0#3>0 \advance \leftskip by #3\mil \fi % -\ifnum 0#4>0 \tableindent=#4\mil \fi % -\ifnum 0#5>0 \advance \rightskip by #5\mil \fi % -\def\itemfont{#2}% -\itemmax=\tableindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \tableindent % -\exdentamount=\tableindent -\parindent = 0pt -\parskip = \smallskipamount -\ifdim \parskip=0pt \parskip=2pt \fi% -\def\Etable{\endgraf\afterenvbreak\endgroup}% -\let\item = \internalBitem % -\let\itemx = \internalBitemx % -\let\kitem = \internalBkitem % -\let\kitemx = \internalBkitemx % -\let\xitem = \internalBxitem % -\let\xitemx = \internalBxitemx % -} - -% This is the counter used by @enumerate, which is really @itemize - -\newcount \itemno - -\def\itemize{\parsearg\itemizezzz} - -\def\itemizezzz #1{% - \begingroup % ended by the @end itemsize - \itemizey {#1}{\Eitemize} -} - -\def\itemizey #1#2{% -\aboveenvbreak % -\itemmax=\itemindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \itemindent % -\exdentamount=\itemindent -\parindent = 0pt % -\parskip = \smallskipamount % -\ifdim \parskip=0pt \parskip=2pt \fi% -\def#2{\endgraf\afterenvbreak\endgroup}% -\def\itemcontents{#1}% -\let\item=\itemizeitem} - -% Set sfcode to normal for the chars that usually have another value. -% These are `.?!:;,' -\def\frenchspacing{\sfcode46=1000 \sfcode63=1000 \sfcode33=1000 - \sfcode58=1000 \sfcode59=1000 \sfcode44=1000 } - -% \splitoff TOKENS\endmark defines \first to be the first token in -% TOKENS, and \rest to be the remainder. -% -\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% - -% Allow an optional argument of an uppercase letter, lowercase letter, -% or number, to specify the first label in the enumerated list. No -% argument is the same as `1'. -% -\def\enumerate{\parsearg\enumeratezzz} -\def\enumeratezzz #1{\enumeratey #1 \endenumeratey} -\def\enumeratey #1 #2\endenumeratey{% - \begingroup % ended by the @end enumerate - % - % If we were given no argument, pretend we were given `1'. - \def\thearg{#1}% - \ifx\thearg\empty \def\thearg{1}\fi - % - % Detect if the argument is a single token. If so, it might be a - % letter. Otherwise, the only valid thing it can be is a number. - % (We will always have one token, because of the test we just made. - % This is a good thing, since \splitoff doesn't work given nothing at - % all -- the first parameter is undelimited.) - \expandafter\splitoff\thearg\endmark - \ifx\rest\empty - % Only one token in the argument. It could still be anything. - % A ``lowercase letter'' is one whose \lccode is nonzero. - % An ``uppercase letter'' is one whose \lccode is both nonzero, and - % not equal to itself. - % Otherwise, we assume it's a number. - % - % We need the \relax at the end of the \ifnum lines to stop TeX from - % continuing to look for a <number>. - % - \ifnum\lccode\expandafter`\thearg=0\relax - \numericenumerate % a number (we hope) - \else - % It's a letter. - \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax - \lowercaseenumerate % lowercase letter - \else - \uppercaseenumerate % uppercase letter - \fi - \fi - \else - % Multiple tokens in the argument. We hope it's a number. - \numericenumerate - \fi -} - -% An @enumerate whose labels are integers. The starting integer is -% given in \thearg. -% -\def\numericenumerate{% - \itemno = \thearg - \startenumeration{\the\itemno}% -} - -% The starting (lowercase) letter is in \thearg. -\def\lowercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more lowercase letters in @enumerate; get a bigger - alphabet}% - \fi - \char\lccode\itemno - }% -} - -% The starting (uppercase) letter is in \thearg. -\def\uppercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more uppercase letters in @enumerate; get a bigger - alphabet} - \fi - \char\uccode\itemno - }% -} - -% Call itemizey, adding a period to the first argument and supplying the -% common last two arguments. Also subtract one from the initial value in -% \itemno, since @item increments \itemno. -% -\def\startenumeration#1{% - \advance\itemno by -1 - \itemizey{#1.}\Eenumerate\flushcr -} - -% @alphaenumerate and @capsenumerate are abbreviations for giving an arg -% to @enumerate. -% -\def\alphaenumerate{\enumerate{a}} -\def\capsenumerate{\enumerate{A}} -\def\Ealphaenumerate{\Eenumerate} -\def\Ecapsenumerate{\Eenumerate} - -% Definition of @item while inside @itemize. - -\def\itemizeitem{% -\advance\itemno by 1 -{\let\par=\endgraf \smallbreak}% -\ifhmode \errmessage{\in hmode at itemizeitem}\fi -{\parskip=0in \hskip 0pt -\hbox to 0pt{\hss \itemcontents\hskip \itemmargin}% -\vadjust{\penalty 1200}}% -\flushcr} - -% @multitable macros -% Amy Hendrickson, 8/18/94 -% -% @multitable ... @endmultitable will make as many columns as desired. -% Contents of each column will wrap at width given in preamble. Width -% can be specified either with sample text given in a template line, -% or in percent of \hsize, the current width of text on page. - -% Table can continue over pages but will only break between lines. - -% To make preamble: -% -% Either define widths of columns in terms of percent of \hsize: -% @multitable @percentofhsize .2 .3 .5 -% @item ... -% -% Numbers following @percentofhsize are the percent of the total -% current hsize to be used for each column. You may use as many -% columns as desired. - -% Or use a template: -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item ... -% using the widest term desired in each column. - - -% Each new table line starts with @item, each subsequent new column -% starts with @tab. Empty columns may be produced by supplying @tab's -% with nothing between them for as many times as empty columns are needed, -% ie, @tab@tab@tab will produce two empty columns. - -% @item, @tab, @multicolumn or @endmulticolumn do not need to be on their -% own lines, but it will not hurt if they are. - -% Sample multitable: - -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item first col stuff @tab second col stuff @tab third col -% @item -% first col stuff -% @tab -% second col stuff -% @tab -% third col -% @item first col stuff @tab second col stuff -% @tab Many paragraphs of text may be used in any column. -% -% They will wrap at the width determined by the template. -% @item@tab@tab This will be in third column. -% @endmultitable - -% Default dimensions may be reset by user. -% @intableparskip will set vertical space between paragraphs in table. -% @intableparindent will set paragraph indent in table. -% @spacebetweencols will set horizontal space to be left between columns. -% @spacebetweenlines will set vertical space to be left between lines. - -%%%% -% Dimensions - -\newdimen\intableparskip -\newdimen\intableparindent -\newdimen\spacebetweencols -\newdimen\spacebetweenlines -\intableparskip=0pt -\intableparindent=6pt -\spacebetweencols=12pt -\spacebetweenlines=12pt - -%%%% -% Macros used to set up halign preamble: -\let\endsetuptable\relax -\def\xendsetuptable{\endsetuptable} -\let\percentofhsize\relax -\def\xpercentofhsize{\percentofhsize} -\newif\ifsetpercent - -\newcount\colcount -\def\setuptable#1{\def\firstarg{#1}% -\ifx\firstarg\xendsetuptable\let\go\relax% -\else - \ifx\firstarg\xpercentofhsize\global\setpercenttrue% - \else - \ifsetpercent - \if#1.\else% - \global\advance\colcount by1 % - \expandafter\xdef\csname col\the\colcount\endcsname{.#1\hsize}% - \fi - \else - \global\advance\colcount by1 - \setbox0=\hbox{#1}% - \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% - \fi% - \fi% - \let\go\setuptable% -\fi\go} -%%%% -% multitable syntax -\def\tab{&} - -%%%% -% @multitable ... @endmultitable definitions: - -\def\multitable#1\item{\bgroup -\let\item\cr -\tolerance=9500 -\hbadness=9500 -\parskip=\intableparskip -\parindent=\intableparindent -\overfullrule=0pt -\global\colcount=0\relax% -\def\Emultitable{\global\setpercentfalse\global\everycr{}\cr\egroup\egroup}% - % To parse everything between @multitable and @item : -\def\one{#1}\expandafter\setuptable\one\endsetuptable - % Need to reset this to 0 after \setuptable. -\global\colcount=0\relax% - % - % This preamble sets up a generic column definition, which will - % be used as many times as user calls for columns. - % \vtop will set a single line and will also let text wrap and - % continue for many paragraphs if desired. -\halign\bgroup&\global\advance\colcount by 1\relax% -\vtop{\hsize=\expandafter\csname col\the\colcount\endcsname - % In order to keep entries from bumping into each other - % we will add a \leftskip of \spacebetweencols to all columns after - % the first one. - % If a template has been used, we will add \spacebetweencols - % to the width of each template entry. - % If user has set preamble in terms of percent of \hsize - % we will use that dimension as the width of the column, and - % the \leftskip will keep entries from bumping into each other. - % Table will start at left margin and final column will justify at - % right margin. -\ifnum\colcount=1 -\else - \ifsetpercent - \else - % If user has <not> set preamble in terms of percent of \hsize - % we will advance \hsize by \spacebetweencols - \advance\hsize by \spacebetweencols - \fi - % In either case we will make \leftskip=\spacebetweencols: -\leftskip=\spacebetweencols -\fi -\noindent##}\cr% - % \everycr will reset column counter, \colcount, at the end of - % each line. Every column entry will cause \colcount to advance by one. - % The table preamble - % looks at the current \colcount to find the correct column width. -\global\everycr{\noalign{\nointerlineskip\vskip\spacebetweenlines -\filbreak%% keeps underfull box messages off when table breaks over pages. -\global\colcount=0\relax}}} - -\message{indexing,} -% Index generation facilities - -% Define \newwrite to be identical to plain tex's \newwrite -% except not \outer, so it can be used within \newindex. -{\catcode`\@=11 -\gdef\newwrite{\alloc@7\write\chardef\sixt@@n}} - -% \newindex {foo} defines an index named foo. -% It automatically defines \fooindex such that -% \fooindex ...rest of line... puts an entry in the index foo. -% It also defines \fooindfile to be the number of the output channel for -% the file that accumulates this index. The file's extension is foo. -% The name of an index should be no more than 2 characters long -% for the sake of vms. - -\def\newindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#1}} -} - -% @defindex foo == \newindex{foo} - -\def\defindex{\parsearg\newindex} - -% Define @defcodeindex, like @defindex except put all entries in @code. - -\def\newcodeindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#1}} -} - -\def\defcodeindex{\parsearg\newcodeindex} - -% @synindex foo bar makes index foo feed into index bar. -% Do this instead of @defindex foo if you don't want it as a separate index. -\def\synindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#2}}% -} - -% @syncodeindex foo bar similar, but put all entries made for index foo -% inside @code. -\def\syncodeindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#2}}% -} - -% Define \doindex, the driver for all \fooindex macros. -% Argument #1 is generated by the calling \fooindex macro, -% and it is "foo", the name of the index. - -% \doindex just uses \parsearg; it calls \doind for the actual work. -% This is because \doind is more useful to call from other macros. - -% There is also \dosubind {index}{topic}{subtopic} -% which makes an entry in a two-level index such as the operation index. - -\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer} -\def\singleindexer #1{\doind{\indexname}{#1}} - -% like the previous two, but they put @code around the argument. -\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer} -\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} - -\def\indexdummies{% -% Take care of the plain tex accent commands. -\def\"{\realbackslash "}% -\def\`{\realbackslash `}% -\def\'{\realbackslash '}% -\def\^{\realbackslash ^}% -\def\~{\realbackslash ~}% -\def\={\realbackslash =}% -\def\b{\realbackslash b}% -\def\c{\realbackslash c}% -\def\d{\realbackslash d}% -\def\u{\realbackslash u}% -\def\v{\realbackslash v}% -\def\H{\realbackslash H}% -% Take care of the plain tex special European modified letters. -\def\oe{\realbackslash oe}% -\def\ae{\realbackslash ae}% -\def\aa{\realbackslash aa}% -\def\OE{\realbackslash OE}% -\def\AE{\realbackslash AE}% -\def\AA{\realbackslash AA}% -\def\o{\realbackslash o}% -\def\O{\realbackslash O}% -\def\l{\realbackslash l}% -\def\L{\realbackslash L}% -\def\ss{\realbackslash ss}% -% Take care of texinfo commands likely to appear in an index entry. -\def\_{{\realbackslash _}}% -\def\w{\realbackslash w }% -\def\bf{\realbackslash bf }% -\def\rm{\realbackslash rm }% -\def\sl{\realbackslash sl }% -\def\sf{\realbackslash sf}% -\def\tt{\realbackslash tt}% -\def\gtr{\realbackslash gtr}% -\def\less{\realbackslash less}% -\def\hat{\realbackslash hat}% -\def\char{\realbackslash char}% -\def\TeX{\realbackslash TeX}% -\def\dots{\realbackslash dots }% -\def\copyright{\realbackslash copyright }% -\def\tclose##1{\realbackslash tclose {##1}}% -\def\code##1{\realbackslash code {##1}}% -\def\samp##1{\realbackslash samp {##1}}% -\def\t##1{\realbackslash r {##1}}% -\def\r##1{\realbackslash r {##1}}% -\def\i##1{\realbackslash i {##1}}% -\def\b##1{\realbackslash b {##1}}% -\def\cite##1{\realbackslash cite {##1}}% -\def\key##1{\realbackslash key {##1}}% -\def\file##1{\realbackslash file {##1}}% -\def\var##1{\realbackslash var {##1}}% -\def\kbd##1{\realbackslash kbd {##1}}% -\def\dfn##1{\realbackslash dfn {##1}}% -\def\emph##1{\realbackslash emph {##1}}% -} - -% \indexnofonts no-ops all font-change commands. -% This is used when outputting the strings to sort the index by. -\def\indexdummyfont#1{#1} -\def\indexdummytex{TeX} -\def\indexdummydots{...} - -\def\indexnofonts{% -% Just ignore accents. -\let\"=\indexdummyfont -\let\`=\indexdummyfont -\let\'=\indexdummyfont -\let\^=\indexdummyfont -\let\~=\indexdummyfont -\let\==\indexdummyfont -\let\b=\indexdummyfont -\let\c=\indexdummyfont -\let\d=\indexdummyfont -\let\u=\indexdummyfont -\let\v=\indexdummyfont -\let\H=\indexdummyfont -% Take care of the plain tex special European modified letters. -\def\oe{oe}% -\def\ae{ae}% -\def\aa{aa}% -\def\OE{OE}% -\def\AE{AE}% -\def\AA{AA}% -\def\o{o}% -\def\O{O}% -\def\l{l}% -\def\L{L}% -\def\ss{ss}% -\let\w=\indexdummyfont -\let\t=\indexdummyfont -\let\r=\indexdummyfont -\let\i=\indexdummyfont -\let\b=\indexdummyfont -\let\emph=\indexdummyfont -\let\strong=\indexdummyfont -\let\cite=\indexdummyfont -\let\sc=\indexdummyfont -%Don't no-op \tt, since it isn't a user-level command -% and is used in the definitions of the active chars like <, >, |... -%\let\tt=\indexdummyfont -\let\tclose=\indexdummyfont -\let\code=\indexdummyfont -\let\file=\indexdummyfont -\let\samp=\indexdummyfont -\let\kbd=\indexdummyfont -\let\key=\indexdummyfont -\let\var=\indexdummyfont -\let\TeX=\indexdummytex -\let\dots=\indexdummydots -} - -% To define \realbackslash, we must make \ not be an escape. -% We must first make another character (@) an escape -% so we do not become unable to do a definition. - -{\catcode`\@=0 \catcode`\\=\other -@gdef@realbackslash{\}} - -\let\indexbackslash=0 %overridden during \printindex. - -\let\SETmarginindex=\relax %initialize! -% workhorse for all \fooindexes -% #1 is name of index, #2 is stuff to put there -\def\doind #1#2{% -% Put the index entry in the margin if desired. -\ifx\SETmarginindex\relax\else% -\insert\margin{\hbox{\vrule height8pt depth3pt width0pt #2}}% -\fi% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% Expand all macros now EXCEPT \folio -\def\rawbackslashxx{\indexbackslash}% \indexbackslash isn't defined now -% so it will be output as is; and it will print as backslash in the indx. -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}}}% -\temp }% -}\penalty\count10}} - -\def\dosubind #1#2#3{% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% -\def\rawbackslashxx{\indexbackslash}% -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2 #3}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}{#3}}}% -\temp }% -}\penalty\count10}} - -% The index entry written in the file actually looks like -% \entry {sortstring}{page}{topic} -% or -% \entry {sortstring}{page}{topic}{subtopic} -% The texindex program reads in these files and writes files -% containing these kinds of lines: -% \initial {c} -% before the first topic whose initial is c -% \entry {topic}{pagelist} -% for a topic that is used without subtopics -% \primary {topic} -% for the beginning of a topic that is used with subtopics -% \secondary {subtopic}{pagelist} -% for each subtopic. - -% Define the user-accessible indexing commands -% @findex, @vindex, @kindex, @cindex. - -\def\findex {\fnindex} -\def\kindex {\kyindex} -\def\cindex {\cpindex} -\def\vindex {\vrindex} -\def\tindex {\tpindex} -\def\pindex {\pgindex} - -\def\cindexsub {\begingroup\obeylines\cindexsub} -{\obeylines % -\gdef\cindexsub "#1" #2^^M{\endgroup % -\dosubind{cp}{#2}{#1}}} - -% Define the macros used in formatting output of the sorted index material. - -% This is what you call to cause a particular index to get printed. -% Write -% @unnumbered Function Index -% @printindex fn - -\def\printindex{\parsearg\doprintindex} - -\def\doprintindex#1{% - \tex - \dobreak \chapheadingskip {10000} - \catcode`\%=\other\catcode`\&=\other\catcode`\#=\other - \catcode`\$=\other - \catcode`\~=\other - \indexbreaks - % - % The following don't help, since the chars were translated - % when the raw index was written, and their fonts were discarded - % due to \indexnofonts. - %\catcode`\"=\active - %\catcode`\^=\active - %\catcode`\_=\active - %\catcode`\|=\active - %\catcode`\<=\active - %\catcode`\>=\active - % % - \def\indexbackslash{\rawbackslashxx} - \indexfonts\rm \tolerance=9500 \advance\baselineskip -1pt - \begindoublecolumns - % - % See if the index file exists and is nonempty. - \openin 1 \jobname.#1s - \ifeof 1 - % \enddoublecolumns gets confused if there is no text in the index, - % and it loses the chapter title and the aux file entries for the - % index. The easiest way to prevent this problem is to make sure - % there is some text. - (Index is nonexistent) - \else - % - % If the index file exists but is empty, then \openin leaves \ifeof - % false. We have to make TeX try to read something from the file, so - % it can discover if there is anything in it. - \read 1 to \temp - \ifeof 1 - (Index is empty) - \else - \input \jobname.#1s - \fi - \fi - \closein 1 - \enddoublecolumns - \Etex -} - -% These macros are used by the sorted index file itself. -% Change them to control the appearance of the index. - -% Same as \bigskipamount except no shrink. -% \balancecolumns gets confused if there is any shrink. -\newskip\initialskipamount \initialskipamount 12pt plus4pt - -\def\initial #1{% -{\let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt -\ifdim\lastskip<\initialskipamount -\removelastskip \penalty-200 \vskip \initialskipamount\fi -\line{\secbf#1\hfill}\kern 2pt\penalty10000}} - -% This typesets a paragraph consisting of #1, dot leaders, and then #2 -% flush to the right margin. It is used for index and table of contents -% entries. The paragraph is indented by \leftskip. -% -\def\entry #1#2{\begingroup - % - % Start a new paragraph if necessary, so our assignments below can't - % affect previous text. - \par - % - % Do not fill out the last line with white space. - \parfillskip = 0in - % - % No extra space above this paragraph. - \parskip = 0in - % - % Do not prefer a separate line ending with a hyphen to fewer lines. - \finalhyphendemerits = 0 - % - % \hangindent is only relevant when the entry text and page number - % don't both fit on one line. In that case, bob suggests starting the - % dots pretty far over on the line. Unfortunately, a large - % indentation looks wrong when the entry text itself is broken across - % lines. So we use a small indentation and put up with long leaders. - % - % \hangafter is reset to 1 (which is the value we want) at the start - % of each paragraph, so we need not do anything with that. - \hangindent=2em - % - % When the entry text needs to be broken, just fill out the first line - % with blank space. - \rightskip = 0pt plus1fil - % - % Start a ``paragraph'' for the index entry so the line breaking - % parameters we've set above will have an effect. - \noindent - % - % Insert the text of the index entry. TeX will do line-breaking on it. - #1% - % The following is kluged to not output a line of dots in the index if - % there are no page numbers. The next person who breaks this will be - % cursed by a Unix daemon. - \def\tempa{{\rm }}% - \def\tempb{#2}% - \edef\tempc{\tempa}% - \edef\tempd{\tempb}% - \ifx\tempc\tempd\ \else% - % - % If we must, put the page number on a line of its own, and fill out - % this line with blank space. (The \hfil is overwhelmed with the - % fill leaders glue in \indexdotfill if the page number does fit.) - \hfil\penalty50 - \null\nobreak\indexdotfill % Have leaders before the page number. - % - % The `\ ' here is removed by the implicit \unskip that TeX does as - % part of (the primitive) \par. Without it, a spurious underfull - % \hbox ensues. - \ #2% The page number ends the paragraph. - \fi% - \par -\endgroup} - -% Like \dotfill except takes at least 1 em. -\def\indexdotfill{\cleaders - \hbox{$\mathsurround=0pt \mkern1.5mu ${\it .}$ \mkern1.5mu$}\hskip 1em plus 1fill} - -\def\primary #1{\line{#1\hfil}} - -\newskip\secondaryindent \secondaryindent=0.5cm - -\def\secondary #1#2{ -{\parfillskip=0in \parskip=0in -\hangindent =1in \hangafter=1 -\noindent\hskip\secondaryindent\hbox{#1}\indexdotfill #2\par -}} - -%% Define two-column mode, which is used in indexes. -%% Adapted from the TeXbook, page 416. -\catcode `\@=11 - -\newbox\partialpage - -\newdimen\doublecolumnhsize - -\def\begindoublecolumns{\begingroup - % Grab any single-column material above us. - \output = {\global\setbox\partialpage - =\vbox{\unvbox255\kern -\topskip \kern \baselineskip}}% - \eject - % - % Now switch to the double-column output routine. - \output={\doublecolumnout}% - % - % Change the page size parameters. We could do this once outside this - % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 - % format, but then we repeat the same computation. Repeating a couple - % of assignments once per index is clearly meaningless for the - % execution time, so we may as well do it once. - % - % First we halve the line length, less a little for the gutter between - % the columns. We compute the gutter based on the line length, so it - % changes automatically with the paper format. The magic constant - % below is chosen so that the gutter has the same value (well, +- < - % 1pt) as it did when we hard-coded it. - % - % We put the result in a separate register, \doublecolumhsize, so we - % can restore it in \pagesofar, after \hsize itself has (potentially) - % been clobbered. - % - \doublecolumnhsize = \hsize - \advance\doublecolumnhsize by -.04154\hsize - \divide\doublecolumnhsize by 2 - \hsize = \doublecolumnhsize - % - % Double the \vsize as well. (We don't need a separate register here, - % since nobody clobbers \vsize.) - \vsize = 2\vsize - \doublecolumnpagegoal -} - -\def\enddoublecolumns{\eject \endgroup \pagegoal=\vsize \unvbox\partialpage} - -\def\doublecolumnsplit{\splittopskip=\topskip \splitmaxdepth=\maxdepth - \global\dimen@=\pageheight \global\advance\dimen@ by-\ht\partialpage - \global\setbox1=\vsplit255 to\dimen@ \global\setbox0=\vbox{\unvbox1} - \global\setbox3=\vsplit255 to\dimen@ \global\setbox2=\vbox{\unvbox3} - \ifdim\ht0>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi - \ifdim\ht2>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi -} -\def\doublecolumnpagegoal{% - \dimen@=\vsize \advance\dimen@ by-2\ht\partialpage \global\pagegoal=\dimen@ -} -\def\pagesofar{\unvbox\partialpage % - \hsize=\doublecolumnhsize % have to restore this since output routine - \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}} -\def\doublecolumnout{% - \setbox5=\copy255 - {\vbadness=10000 \doublecolumnsplit} - \ifvbox255 - \setbox0=\vtop to\dimen@{\unvbox0} - \setbox2=\vtop to\dimen@{\unvbox2} - \onepageout\pagesofar \unvbox255 \penalty\outputpenalty - \else - \setbox0=\vbox{\unvbox5} - \ifvbox0 - \dimen@=\ht0 \advance\dimen@ by\topskip \advance\dimen@ by-\baselineskip - \divide\dimen@ by2 \splittopskip=\topskip \splitmaxdepth=\maxdepth - {\vbadness=10000 - \loop \global\setbox5=\copy0 - \setbox1=\vsplit5 to\dimen@ - \setbox3=\vsplit5 to\dimen@ - \ifvbox5 \global\advance\dimen@ by1pt \repeat - \setbox0=\vbox to\dimen@{\unvbox1} - \setbox2=\vbox to\dimen@{\unvbox3} - \global\setbox\partialpage=\vbox{\pagesofar} - \doublecolumnpagegoal - } - \fi - \fi -} - -\catcode `\@=\other -\message{sectioning,} -% Define chapters, sections, etc. - -\newcount \chapno -\newcount \secno \secno=0 -\newcount \subsecno \subsecno=0 -\newcount \subsubsecno \subsubsecno=0 - -% This counter is funny since it counts through charcodes of letters A, B, ... -\newcount \appendixno \appendixno = `\@ -\def\appendixletter{\char\the\appendixno} - -\newwrite \contentsfile -% This is called from \setfilename. -\def\opencontents{\openout \contentsfile = \jobname.toc} - -% Each @chapter defines this as the name of the chapter. -% page headings and footings can use it. @section does likewise - -\def\thischapter{} \def\thissection{} -\def\seccheck#1{\if \pageno<0 % -\errmessage{@#1 not allowed after generating table of contents}\fi -% -} - -\def\chapternofonts{% -\let\rawbackslash=\relax% -\let\frenchspacing=\relax% -\def\result{\realbackslash result} -\def\equiv{\realbackslash equiv} -\def\expansion{\realbackslash expansion} -\def\print{\realbackslash print} -\def\TeX{\realbackslash TeX} -\def\dots{\realbackslash dots} -\def\copyright{\realbackslash copyright} -\def\tt{\realbackslash tt} -\def\bf{\realbackslash bf } -\def\w{\realbackslash w} -\def\less{\realbackslash less} -\def\gtr{\realbackslash gtr} -\def\hat{\realbackslash hat} -\def\char{\realbackslash char} -\def\tclose##1{\realbackslash tclose {##1}} -\def\code##1{\realbackslash code {##1}} -\def\samp##1{\realbackslash samp {##1}} -\def\r##1{\realbackslash r {##1}} -\def\b##1{\realbackslash b {##1}} -\def\key##1{\realbackslash key {##1}} -\def\file##1{\realbackslash file {##1}} -\def\kbd##1{\realbackslash kbd {##1}} -% These are redefined because @smartitalic wouldn't work inside xdef. -\def\i##1{\realbackslash i {##1}} -\def\cite##1{\realbackslash cite {##1}} -\def\var##1{\realbackslash var {##1}} -\def\emph##1{\realbackslash emph {##1}} -\def\dfn##1{\realbackslash dfn {##1}} -} - -\newcount\absseclevel % used to calculate proper heading level -\newcount\secbase\secbase=0 % @raise/lowersections modify this count - -% @raisesections: treat @section as chapter, @subsection as section, etc. -\def\raisesections{\global\advance\secbase by -1} -\let\up=\raisesections % original BFox name - -% @lowersections: treat @chapter as section, @section as subsection, etc. -\def\lowersections{\global\advance\secbase by 1} -\let\down=\lowersections % original BFox name - -% Choose a numbered-heading macro -% #1 is heading level if unmodified by @raisesections or @lowersections -% #2 is text for heading -\def\numhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \chapterzzz{#2} -\or - \seczzz{#2} -\or - \numberedsubseczzz{#2} -\or - \numberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \chapterzzz{#2} - \else - \numberedsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses appendix heading levels -\def\apphead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \appendixzzz{#2} -\or - \appendixsectionzzz{#2} -\or - \appendixsubseczzz{#2} -\or - \appendixsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \appendixzzz{#2} - \else - \appendixsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses numberless heading levels -\def\unnmhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \unnumberedzzz{#2} -\or - \unnumberedseczzz{#2} -\or - \unnumberedsubseczzz{#2} -\or - \unnumberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \unnumberedzzz{#2} - \else - \unnumberedsubsubseczzz{#2} - \fi -\fi -} - - -\def\thischaptername{No Chapter Title} -\outer\def\chapter{\parsearg\chapteryyy} -\def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz -\def\chapterzzz #1{\seccheck{chapter}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \chapno by 1 \message{\putwordChapter \the\chapno}% -\chapmacro {#1}{\the\chapno}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -% We don't substitute the actual chapter name into \thischapter -% because we don't want its macros evaluated now. -\xdef\thischapter{\putwordChapter{} \the\chapno: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry {#1}{\the\chapno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec -}} - -\outer\def\appendix{\parsearg\appendixyyy} -\def\appendixyyy #1{\apphead0{#1}} % normally apphead0 calls appendixzzz -\def\appendixzzz #1{\seccheck{appendix}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \appendixno by 1 \message{Appendix \appendixletter}% -\chapmacro {#1}{\putwordAppendix{} \appendixletter}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -\xdef\thischapter{\putwordAppendix{} \appendixletter: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry - {#1}{\putwordAppendix{} \appendixletter}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\global\let\section = \appendixsec -\global\let\subsection = \appendixsubsec -\global\let\subsubsection = \appendixsubsubsec -}} - -\outer\def\top{\parsearg\unnumberedyyy} -\outer\def\unnumbered{\parsearg\unnumberedyyy} -\def\unnumberedyyy #1{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz -\def\unnumberedzzz #1{\seccheck{unnumbered}% -\secno=0 \subsecno=0 \subsubsecno=0 -% -% This used to be simply \message{#1}, but TeX fully expands the -% argument to \message. Therefore, if #1 contained @-commands, TeX -% expanded them. For example, in `@unnumbered The @cite{Book}', TeX -% expanded @cite (which turns out to cause errors because \cite is meant -% to be executed, not expanded). -% -% Anyway, we don't want the fully-expanded definition of @cite to appear -% as a result of the \message, we just want `@cite' itself. We use -% \the<toks register> to achieve this: TeX expands \the<toks> only once, -% simply yielding the contents of the <toks register>. -\toks0 = {#1}\message{(\the\toks0)}% -% -\unnumbchapmacro {#1}% -\gdef\thischapter{#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbchapentry {#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\global\let\section = \unnumberedsec -\global\let\subsection = \unnumberedsubsec -\global\let\subsubsection = \unnumberedsubsubsec -}} - -\outer\def\numberedsec{\parsearg\secyyy} -\def\secyyy #1{\numhead1{#1}} % normally calls seczzz -\def\seczzz #1{\seccheck{section}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\the\chapno}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\the\chapno}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appenixsection{\parsearg\appendixsecyyy} -\outer\def\appendixsec{\parsearg\appendixsecyyy} -\def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz -\def\appendixsectionzzz #1{\seccheck{appendixsection}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\appendixletter}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\appendixletter}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsec{\parsearg\unnumberedsecyyy} -\def\unnumberedsecyyy #1{\unnmhead1{#1}} % normally calls unnumberedseczzz -\def\unnumberedseczzz #1{\seccheck{unnumberedsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsec{\parsearg\numberedsubsecyyy} -\def\numberedsubsecyyy #1{\numhead2{#1}} % normally calls numberedsubseczzz -\def\numberedsubseczzz #1{\seccheck{subsection}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\the\chapno}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\the\chapno}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsec{\parsearg\appendixsubsecyyy} -\def\appendixsubsecyyy #1{\apphead2{#1}} % normally calls appendixsubseczzz -\def\appendixsubseczzz #1{\seccheck{appendixsubsec}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\appendixletter}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\appendixletter}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsec{\parsearg\unnumberedsubsecyyy} -\def\unnumberedsubsecyyy #1{\unnmhead2{#1}} %normally calls unnumberedsubseczzz -\def\unnumberedsubseczzz #1{\seccheck{unnumberedsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsubsec{\parsearg\numberedsubsubsecyyy} -\def\numberedsubsubsecyyy #1{\numhead3{#1}} % normally numberedsubsubseczzz -\def\numberedsubsubseczzz #1{\seccheck{subsubsection}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry % - {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno} - {\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsubsec{\parsearg\appendixsubsubsecyyy} -\def\appendixsubsubsecyyy #1{\apphead3{#1}} % normally appendixsubsubseczzz -\def\appendixsubsubseczzz #1{\seccheck{appendixsubsubsec}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry{#1}% - {\appendixletter} - {\the\secno}{\the\subsecno}{\the\subsubsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsubsec{\parsearg\unnumberedsubsubsecyyy} -\def\unnumberedsubsubsecyyy #1{\unnmhead3{#1}} %normally unnumberedsubsubseczzz -\def\unnumberedsubsubseczzz #1{\seccheck{unnumberedsubsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -% These are variants which are not "outer", so they can appear in @ifinfo. -% Actually, they should now be obsolete; ordinary section commands should work. -\def\infotop{\parsearg\unnumberedzzz} -\def\infounnumbered{\parsearg\unnumberedzzz} -\def\infounnumberedsec{\parsearg\unnumberedseczzz} -\def\infounnumberedsubsec{\parsearg\unnumberedsubseczzz} -\def\infounnumberedsubsubsec{\parsearg\unnumberedsubsubseczzz} - -\def\infoappendix{\parsearg\appendixzzz} -\def\infoappendixsec{\parsearg\appendixseczzz} -\def\infoappendixsubsec{\parsearg\appendixsubseczzz} -\def\infoappendixsubsubsec{\parsearg\appendixsubsubseczzz} - -\def\infochapter{\parsearg\chapterzzz} -\def\infosection{\parsearg\sectionzzz} -\def\infosubsection{\parsearg\subsectionzzz} -\def\infosubsubsection{\parsearg\subsubsectionzzz} - -% These macros control what the section commands do, according -% to what kind of chapter we are in (ordinary, appendix, or unnumbered). -% Define them by default for a numbered chapter. -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec - -% Define @majorheading, @heading and @subheading - -% NOTE on use of \vbox for chapter headings, section headings, and -% such: -% 1) We use \vbox rather than the earlier \line to permit -% overlong headings to fold. -% 2) \hyphenpenalty is set to 10000 because hyphenation in a -% heading is obnoxious; this forbids it. -% 3) Likewise, headings look best if no \parindent is used, and -% if justification is not attempted. Hence \raggedright. - - -\def\majorheading{\parsearg\majorheadingzzz} -\def\majorheadingzzz #1{% -{\advance\chapheadingskip by 10pt \chapbreak }% -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\chapheading{\parsearg\chapheadingzzz} -\def\chapheadingzzz #1{\chapbreak % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\heading{\parsearg\secheadingi} - -\def\subheading{\parsearg\subsecheadingi} - -\def\subsubheading{\parsearg\subsubsecheadingi} - -% These macros generate a chapter, section, etc. heading only -% (including whitespace, linebreaking, etc. around it), -% given all the information in convenient, parsed form. - -%%% Args are the skip and penalty (usually negative) -\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} - -\def\setchapterstyle #1 {\csname CHAPF#1\endcsname} - -%%% Define plain chapter starts, and page on/off switching for it -% Parameter controlling skip before chapter headings (if needed) - -\newskip \chapheadingskip \chapheadingskip = 30pt plus 8pt minus 4pt - -\def\chapbreak{\dobreak \chapheadingskip {-4000}} -\def\chappager{\par\vfill\supereject} -\def\chapoddpage{\chappager \ifodd\pageno \else \hbox to 0pt{} \chappager\fi} - -\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} - -\def\CHAPPAGoff{ -\global\let\pchapsepmacro=\chapbreak -\global\let\pagealignmacro=\chappager} - -\def\CHAPPAGon{ -\global\let\pchapsepmacro=\chappager -\global\let\pagealignmacro=\chappager -\global\def\HEADINGSon{\HEADINGSsingle}} - -\def\CHAPPAGodd{ -\global\let\pchapsepmacro=\chapoddpage -\global\let\pagealignmacro=\chapoddpage -\global\def\HEADINGSon{\HEADINGSdouble}} - -\CHAPPAGon - -\def\CHAPFplain{ -\global\let\chapmacro=\chfplain -\global\let\unnumbchapmacro=\unnchfplain} - -\def\chfplain #1#2{% - \pchapsepmacro - {% - \chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #2\enspace #1}% - }% - \bigskip - \penalty5000 -} - -\def\unnchfplain #1{% -\pchapsepmacro % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} -\CHAPFplain % The default - -\def\unnchfopen #1{% -\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} - -\def\chfopen #1#2{\chapoddpage {\chapfonts -\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}% -\par\penalty 5000 % -} - -\def\CHAPFopen{ -\global\let\chapmacro=\chfopen -\global\let\unnumbchapmacro=\unnchfopen} - -% Parameter controlling skip before section headings. - -\newskip \subsecheadingskip \subsecheadingskip = 17pt plus 8pt minus 4pt -\def\subsecheadingbreak{\dobreak \subsecheadingskip {-500}} - -\newskip \secheadingskip \secheadingskip = 21pt plus 8pt minus 4pt -\def\secheadingbreak{\dobreak \secheadingskip {-1000}} - -% @paragraphindent is defined for the Info formatting commands only. -\let\paragraphindent=\comment - -% Section fonts are the base font at magstep2, which produces -% a size a bit more than 14 points in the default situation. - -\def\secheading #1#2#3{\secheadingi {#2.#3\enspace #1}} -\def\plainsecheading #1{\secheadingi {#1}} -\def\secheadingi #1{{\advance \secheadingskip by \parskip % -\secheadingbreak}% -{\secfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - - -% Subsection fonts are the base font at magstep1, -% which produces a size of 12 points. - -\def\subsecheading #1#2#3#4{\subsecheadingi {#2.#3.#4\enspace #1}} -\def\subsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - -\def\subsubsecfonts{\subsecfonts} % Maybe this should change: - % Perhaps make sssec fonts scaled - % magstep half -\def\subsubsecheading #1#2#3#4#5{\subsubsecheadingi {#2.#3.#4.#5\enspace #1}} -\def\subsubsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsubsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000} - - -\message{toc printing,} - -% Finish up the main text and prepare to read what we've written -% to \contentsfile. - -\newskip\contentsrightmargin \contentsrightmargin=1in -\def\startcontents#1{% - \pagealignmacro - \immediate\closeout \contentsfile - \ifnum \pageno>0 - \pageno = -1 % Request roman numbered pages. - \fi - % Don't need to put `Contents' or `Short Contents' in the headline. - % It is abundantly clear what they are. - \unnumbchapmacro{#1}\def\thischapter{}% - \begingroup % Set up to handle contents files properly. - \catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11 - \catcode`\^=7 % to see ^^e4 as \"a etc. juha@piuha.ydi.vtt.fi - \raggedbottom % Worry more about breakpoints than the bottom. - \advance\hsize by -\contentsrightmargin % Don't use the full line length. -} - - -% Normal (long) toc. -\outer\def\contents{% - \startcontents{\putwordTableofContents}% - \input \jobname.toc - \endgroup - \vfill \eject -} - -% And just the chapters. -\outer\def\summarycontents{% - \startcontents{\putwordShortContents}% - % - \let\chapentry = \shortchapentry - \let\unnumbchapentry = \shortunnumberedentry - % We want a true roman here for the page numbers. - \secfonts - \let\rm=\shortcontrm \let\bf=\shortcontbf \let\sl=\shortcontsl - \rm - \advance\baselineskip by 1pt % Open it up a little. - \def\secentry ##1##2##3##4{} - \def\unnumbsecentry ##1##2{} - \def\subsecentry ##1##2##3##4##5{} - \def\unnumbsubsecentry ##1##2{} - \def\subsubsecentry ##1##2##3##4##5##6{} - \def\unnumbsubsubsecentry ##1##2{} - \input \jobname.toc - \endgroup - \vfill \eject -} -\let\shortcontents = \summarycontents - -% These macros generate individual entries in the table of contents. -% The first argument is the chapter or section name. -% The last argument is the page number. -% The arguments in between are the chapter number, section number, ... - -% Chapter-level things, for both the long and short contents. -\def\chapentry#1#2#3{\dochapentry{#2\labelspace#1}{#3}} - -% See comments in \dochapentry re vbox and related settings -\def\shortchapentry#1#2#3{% - \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno{#3}}% -} - -% Typeset the label for a chapter or appendix for the short contents. -% The arg is, e.g. `Appendix A' for an appendix, or `3' for a chapter. -% We could simplify the code here by writing out an \appendixentry -% command in the toc file for appendices, instead of using \chapentry -% for both, but it doesn't seem worth it. -\setbox0 = \hbox{\shortcontrm \putwordAppendix } -\newdimen\shortappendixwidth \shortappendixwidth = \wd0 - -\def\shortchaplabel#1{% - % We typeset #1 in a box of constant width, regardless of the text of - % #1, so the chapter titles will come out aligned. - \setbox0 = \hbox{#1}% - \dimen0 = \ifdim\wd0 > \shortappendixwidth \shortappendixwidth \else 0pt \fi - % - % This space should be plenty, since a single number is .5em, and the - % widest letter (M) is 1em, at least in the Computer Modern fonts. - % (This space doesn't include the extra space that gets added after - % the label; that gets put in in \shortchapentry above.) - \advance\dimen0 by 1.1em - \hbox to \dimen0{#1\hfil}% -} - -\def\unnumbchapentry#1#2{\dochapentry{#1}{#2}} -\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno{#2}}} - -% Sections. -\def\secentry#1#2#3#4{\dosecentry{#2.#3\labelspace#1}{#4}} -\def\unnumbsecentry#1#2{\dosecentry{#1}{#2}} - -% Subsections. -\def\subsecentry#1#2#3#4#5{\dosubsecentry{#2.#3.#4\labelspace#1}{#5}} -\def\unnumbsubsecentry#1#2{\dosubsecentry{#1}{#2}} - -% And subsubsections. -\def\subsubsecentry#1#2#3#4#5#6{% - \dosubsubsecentry{#2.#3.#4.#5\labelspace#1}{#6}} -\def\unnumbsubsubsecentry#1#2{\dosubsubsecentry{#1}{#2}} - - -% This parameter controls the indentation of the various levels. -\newdimen\tocindent \tocindent = 3pc - -% Now for the actual typesetting. In all these, #1 is the text and #2 is the -% page number. -% -% If the toc has to be broken over pages, we would want to be at chapters -% if at all possible; hence the \penalty. -\def\dochapentry#1#2{% - \penalty-300 \vskip\baselineskip - \begingroup - \chapentryfonts - \tocentry{#1}{\dopageno{#2}}% - \endgroup - \nobreak\vskip .25\baselineskip -} - -\def\dosecentry#1#2{\begingroup - \secentryfonts \leftskip=\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsecentry#1#2{\begingroup - \subsecentryfonts \leftskip=2\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsubsecentry#1#2{\begingroup - \subsubsecentryfonts \leftskip=3\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -% Final typesetting of a toc entry; we use the same \entry macro as for -% the index entries, but we want to suppress hyphenation here. (We -% can't do that in the \entry macro, since index entries might consist -% of hyphenated-identifiers-that-do-not-fit-on-a-line-and-nothing-else.) -% -\def\tocentry#1#2{\begingroup - \hyphenpenalty = 10000 - \entry{#1}{#2}% -\endgroup} - -% Space between chapter (or whatever) number and the title. -\def\labelspace{\hskip1em \relax} - -\def\dopageno#1{{\rm #1}} -\def\doshortpageno#1{{\rm #1}} - -\def\chapentryfonts{\secfonts \rm} -\def\secentryfonts{\textfonts} -\let\subsecentryfonts = \textfonts -\let\subsubsecentryfonts = \textfonts - - -\message{environments,} - -% Since these characters are used in examples, it should be an even number of -% \tt widths. Each \tt character is 1en, so two makes it 1em. -% Furthermore, these definitions must come after we define our fonts. -\newbox\dblarrowbox \newbox\longdblarrowbox -\newbox\pushcharbox \newbox\bullbox -\newbox\equivbox \newbox\errorbox - -\let\ptexequiv = \equiv - -%{\tentt -%\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil} -%\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil} -%\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil} -%\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil} -% Adapted from the manmac format (p.420 of TeXbook) -%\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex -% depth .1ex\hfil} -%} - -\def\point{$\star$} - -\def\result{\leavevmode\raise.15ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} -\def\expansion{\leavevmode\raise.1ex\hbox to 1em{\hfil$\mapsto$\hfil}} -\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} - -\def\equiv{\leavevmode\lower.1ex\hbox to 1em{\hfil$\ptexequiv$\hfil}} - -% Adapted from the TeXbook's \boxit. -{\tentt \global\dimen0 = 3em}% Width of the box. -\dimen2 = .55pt % Thickness of rules -% The text. (`r' is open on the right, `e' somewhat less so on the left.) -\setbox0 = \hbox{\kern-.75pt \tensf error\kern-1.5pt} - -\global\setbox\errorbox=\hbox to \dimen0{\hfil - \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. - \advance\hsize by -2\dimen2 % Rules. - \vbox{ - \hrule height\dimen2 - \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. - \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. - \kern3pt\vrule width\dimen2}% Space to right. - \hrule height\dimen2} - \hfil} - -% The @error{} command. -\def\error{\leavevmode\lower.7ex\copy\errorbox} - -% @tex ... @end tex escapes into raw Tex temporarily. -% One exception: @ is still an escape character, so that @end tex works. -% But \@ or @@ will get a plain tex @ character. - -\def\tex{\begingroup -\catcode `\\=0 \catcode `\{=1 \catcode `\}=2 -\catcode `\$=3 \catcode `\&=4 \catcode `\#=6 -\catcode `\^=7 \catcode `\_=8 \catcode `\~=13 \let~=\tie -\catcode `\%=14 -\catcode 43=12 -\catcode`\"=12 -\catcode`\==12 -\catcode`\|=12 -\catcode`\<=12 -\catcode`\>=12 -\escapechar=`\\ -% -\let\~=\ptextilde -\let\{=\ptexlbrace -\let\}=\ptexrbrace -\let\.=\ptexdot -\let\*=\ptexstar -\let\dots=\ptexdots -\def\@{@}% -\let\bullet=\ptexbullet -\let\b=\ptexb \let\c=\ptexc \let\i=\ptexi \let\t=\ptext \let\l=\ptexl -\let\L=\ptexL -% -\let\Etex=\endgroup} - -% Define @lisp ... @endlisp. -% @lisp does a \begingroup so it can rebind things, -% including the definition of @endlisp (which normally is erroneous). - -% Amount to narrow the margins by for @lisp. -\newskip\lispnarrowing \lispnarrowing=0.4in - -% This is the definition that ^^M gets inside @lisp, @example, and other -% such environments. \null is better than a space, since it doesn't -% have any width. -\def\lisppar{\null\endgraf} - -% Make each space character in the input produce a normal interword -% space in the output. Don't allow a line break at this space, as this -% is used only in environments like @example, where each line of input -% should produce a line of output anyway. -% -{\obeyspaces % -\gdef\sepspaces{\obeyspaces\let =\tie}} - -% Define \obeyedspace to be our active space, whatever it is. This is -% for use in \parsearg. -{\sepspaces% -\global\let\obeyedspace= } - -% This space is always present above and below environments. -\newskip\envskipamount \envskipamount = 0pt - -% Make spacing and below environment symmetrical. We use \parskip here -% to help in doing that, since in @example-like environments \parskip -% is reset to zero; thus the \afterenvbreak inserts no space -- but the -% start of the next paragraph will insert \parskip -% -\def\aboveenvbreak{{\advance\envskipamount by \parskip -\endgraf \ifdim\lastskip<\envskipamount -\removelastskip \penalty-50 \vskip\envskipamount \fi}} - -\let\afterenvbreak = \aboveenvbreak - -% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins. -\let\nonarrowing=\relax - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \cartouche: draw rectangle w/rounded corners around argument -\font\circle=lcircle10 -\newdimen\circthick -\newdimen\cartouter\newdimen\cartinner -\newskip\normbskip\newskip\normpskip\newskip\normlskip -\circthick=\fontdimen8\circle -% -\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth -\def\ctr{{\hskip 6pt\circle\char'010}} -\def\cbl{{\circle\char'012\hskip -6pt}} -\def\cbr{{\hskip 6pt\circle\char'011}} -\def\carttop{\hbox to \cartouter{\hskip\lskip - \ctl\leaders\hrule height\circthick\hfil\ctr - \hskip\rskip}} -\def\cartbot{\hbox to \cartouter{\hskip\lskip - \cbl\leaders\hrule height\circthick\hfil\cbr - \hskip\rskip}} -% -\newskip\lskip\newskip\rskip - -\long\def\cartouche{% -\begingroup - \lskip=\leftskip \rskip=\rightskip - \leftskip=0pt\rightskip=0pt %we want these *outside*. - \cartinner=\hsize \advance\cartinner by-\lskip - \advance\cartinner by-\rskip - \cartouter=\hsize - \advance\cartouter by 18pt % allow for 3pt kerns on either -% side, and for 6pt waste from -% each corner char - \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip - % Flag to tell @lisp, etc., not to narrow margin. - \let\nonarrowing=\comment - \vbox\bgroup - \baselineskip=0pt\parskip=0pt\lineskip=0pt - \carttop - \hbox\bgroup - \hskip\lskip - \vrule\kern3pt - \vbox\bgroup - \hsize=\cartinner - \kern3pt - \begingroup - \baselineskip=\normbskip - \lineskip=\normlskip - \parskip=\normpskip - \vskip -\parskip -\def\Ecartouche{% - \endgroup - \kern3pt - \egroup - \kern3pt\vrule - \hskip\rskip - \egroup - \cartbot - \egroup -\endgroup -}} - - -% This macro is called at the beginning of all the @example variants, -% inside a group. -\def\nonfillstart{% - \aboveenvbreak - \inENV % This group ends at the end of the body - \hfuzz = 12pt % Don't be fussy - \sepspaces % Make spaces be word-separators rather than space tokens. - \singlespace - \let\par = \lisppar % don't ignore blank lines - \obeylines % each line of input is a line of output - \parskip = 0pt - \parindent = 0pt - \emergencystretch = 0pt % don't try to avoid overfull boxes - % @cartouche defines \nonarrowing to inhibit narrowing - % at next level down. - \ifx\nonarrowing\relax - \advance \leftskip by \lispnarrowing - \exdentamount=\lispnarrowing - \let\exdent=\nofillexdent - \let\nonarrowing=\relax - \fi -} - -% To ending an @example-like environment, we first end the paragraph -% (via \afterenvbreak's vertical glue), and then the group. That way we -% keep the zero \parskip that the environments set -- \parskip glue -% will be inserted at the beginning of the next paragraph in the -% document, after the environment. -% -\def\nonfillfinish{\afterenvbreak\endgroup}% - -% This macro is -\def\lisp{\begingroup - \nonfillstart - \let\Elisp = \nonfillfinish - \tt - \rawbackslash % have \ input char produce \ char from current font - \gobble -} - -% Define the \E... control sequence only if we are inside the -% environment, so the error checking in \end will work. -% -% We must call \lisp last in the definition, since it reads the -% return following the @example (or whatever) command. -% -\def\example{\begingroup \def\Eexample{\nonfillfinish\endgroup}\lisp} -\def\smallexample{\begingroup \def\Esmallexample{\nonfillfinish\endgroup}\lisp} -\def\smalllisp{\begingroup \def\Esmalllisp{\nonfillfinish\endgroup}\lisp} - -% @smallexample and @smalllisp. This is not used unless the @smallbook -% command is given. Originally contributed by Pavel@xerox. -% -\def\smalllispx{\begingroup - \nonfillstart - \let\Esmalllisp = \nonfillfinish - \let\Esmallexample = \nonfillfinish - % - % Smaller interline space and fonts for small examples. - \setleading{10pt}% - \indexfonts \tt - \rawbackslash % make \ output the \ character from the current font (tt) - \gobble -} - -% This is @display; same as @lisp except use roman font. -% -\def\display{\begingroup - \nonfillstart - \let\Edisplay = \nonfillfinish - \gobble -} - -% This is @format; same as @display except don't narrow margins. -% -\def\format{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eformat = \nonfillfinish - \gobble -} - -% @flushleft (same as @format) and @flushright. -% -\def\flushleft{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushleft = \nonfillfinish - \gobble -} -\def\flushright{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushright = \nonfillfinish - \advance\leftskip by 0pt plus 1fill - \gobble} - -% @quotation does normal linebreaking (hence we can't use \nonfillstart) -% and narrows the margins. -% -\def\quotation{% - \begingroup\inENV %This group ends at the end of the @quotation body - {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip - \singlespace - \parindent=0pt - % We have retained a nonzero parskip for the environment, since we're - % doing normal filling. So to avoid extra space below the environment... - \def\Equotation{\parskip = 0pt \nonfillfinish}% - % - % @cartouche defines \nonarrowing to inhibit narrowing at next level down. - \ifx\nonarrowing\relax - \advance\leftskip by \lispnarrowing - \advance\rightskip by \lispnarrowing - \exdentamount = \lispnarrowing - \let\nonarrowing = \relax - \fi -} - -\message{defuns,} -% Define formatter for defuns -% First, allow user to change definition object font (\df) internally -\def\setdeffont #1 {\csname DEF#1\endcsname} - -\newskip\defbodyindent \defbodyindent=.4in -\newskip\defargsindent \defargsindent=50pt -\newskip\deftypemargin \deftypemargin=12pt -\newskip\deflastargmargin \deflastargmargin=18pt - -\newcount\parencount -% define \functionparens, which makes ( and ) and & do special things. -% \functionparens affects the group it is contained in. -\def\activeparens{% -\catcode`\(=\active \catcode`\)=\active \catcode`\&=\active -\catcode`\[=\active \catcode`\]=\active} - -% Make control sequences which act like normal parenthesis chars. -\let\lparen = ( \let\rparen = ) - -{\activeparens % Now, smart parens don't turn on until &foo (see \amprm) - -% Be sure that we always have a definition for `(', etc. For example, -% if the fn name has parens in it, \boldbrax will not be in effect yet, -% so TeX would otherwise complain about undefined control sequence. -\global\let(=\lparen \global\let)=\rparen -\global\let[=\lbrack \global\let]=\rbrack - -\gdef\functionparens{\boldbrax\let&=\amprm\parencount=0 } -\gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} -% This is used to turn on special parens -% but make & act ordinary (given that it's active). -\gdef\boldbraxnoamp{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb\let&=\ampnr} - -% Definitions of (, ) and & used in args for functions. -% This is the definition of ( outside of all parentheses. -\gdef\oprm#1 {{\rm\char`\(}#1 \bf \let(=\opnested % -\global\advance\parencount by 1 } -% -% This is the definition of ( when already inside a level of parens. -\gdef\opnested{\char`\(\global\advance\parencount by 1 } -% -\gdef\clrm{% Print a paren in roman if it is taking us back to depth of 0. -% also in that case restore the outer-level definition of (. -\ifnum \parencount=1 {\rm \char `\)}\sl \let(=\oprm \else \char `\) \fi -\global\advance \parencount by -1 } -% If we encounter &foo, then turn on ()-hacking afterwards -\gdef\amprm#1 {{\rm\}\let(=\oprm \let)=\clrm\ } -% -\gdef\normalparens{\boldbrax\let&=\ampnr} -} % End of definition inside \activeparens -%% These parens (in \boldbrax) actually are a little bolder than the -%% contained text. This is especially needed for [ and ] -\def\opnr{{\sf\char`\(}} \def\clnr{{\sf\char`\)}} \def\ampnr{\&} -\def\lbrb{{\bf\char`\[}} \def\rbrb{{\bf\char`\]}} - -% First, defname, which formats the header line itself. -% #1 should be the function name. -% #2 should be the type of definition, such as "Function". - -\def\defname #1#2{% -% Get the values of \leftskip and \rightskip as they were -% outside the @def... -\dimen2=\leftskip -\advance\dimen2 by -\defbodyindent -\dimen3=\rightskip -\advance\dimen3 by -\defbodyindent -\noindent % -\setbox0=\hbox{\hskip \deflastargmargin{\rm #2}\hskip \deftypemargin}% -\dimen0=\hsize \advance \dimen0 by -\wd0 % compute size for first line -\dimen1=\hsize \advance \dimen1 by -\defargsindent %size for continuations -\parshape 2 0in \dimen0 \defargsindent \dimen1 % -% Now output arg 2 ("Function" or some such) -% ending at \deftypemargin from the right margin, -% but stuck inside a box of width 0 so it does not interfere with linebreaking -{% Adjust \hsize to exclude the ambient margins, -% so that \rightline will obey them. -\advance \hsize by -\dimen2 \advance \hsize by -\dimen3 -\rlap{\rightline{{\rm #2}\hskip \deftypemargin}}}% -% Make all lines underfull and no complaints: -\tolerance=10000 \hbadness=10000 -\advance\leftskip by -\defbodyindent -\exdentamount=\defbodyindent -{\df #1}\enskip % Generate function name -} - -% Actually process the body of a definition -% #1 should be the terminating control sequence, such as \Edefun. -% #2 should be the "another name" control sequence, such as \defunx. -% #3 should be the control sequence that actually processes the header, -% such as \defunheader. - -\def\defparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\activeparens\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % 61 is `=' -\obeylines\activeparens\spacesplit#3} - -\def\defmethparsebody #1#2#3#4 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#4}}} - -\def\defopparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\activeparens\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#5}}} - -% These parsing functions are similar to the preceding ones -% except that they do not make parens into active characters. -% These are used for "variables" since they have no arguments. - -\def\defvarparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % -\obeylines\spacesplit#3} - -% This is used for \def{tp,vr}parsebody. It could probably be used for -% some of the others, too, with some judicious conditionals. -% -\def\parsebodycommon#1#2#3{% - \begingroup\inENV % - \medbreak % - % Define the end token that this defining construct specifies - % so that it will exit this group. - \def#1{\endgraf\endgroup\medbreak}% - \def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% - \parindent=0in - \advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent - \exdentamount=\defbodyindent - \begingroup\obeylines -} - -\def\defvrparsebody#1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{#3{#4}}% -} - -% This loses on `@deftp {Data Type} {struct termios}' -- it thinks the -% type is just `struct', because we lose the braces in `{struct -% termios}' when \spacesplit reads its undelimited argument. Sigh. -% \let\deftpparsebody=\defvrparsebody -% -% So, to get around this, we put \empty in with the type name. That -% way, TeX won't find exactly `{...}' as an undelimited argument, and -% won't strip off the braces. -% -\def\deftpparsebody #1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{\parsetpheaderline{#3{#4}}}\empty -} - -% Fine, but then we have to eventually remove the \empty *and* the -% braces (if any). That's what this does, putting the result in \tptemp. -% -\def\removeemptybraces\empty#1\relax{\def\tptemp{#1}}% - -% After \spacesplit has done its work, this is called -- #1 is the final -% thing to call, #2 the type name (which starts with \empty), and #3 -% (which might be empty) the arguments. -% -\def\parsetpheaderline#1#2#3{% - \removeemptybraces#2\relax - #1{\tptemp}{#3}% -}% - -\def\defopvarparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\spacesplit{#3{#5}}} - -% Split up #2 at the first space token. -% call #1 with two arguments: -% the first is all of #2 before the space token, -% the second is all of #2 after that space token. -% If #2 contains no space token, all of it is passed as the first arg -% and the second is passed as empty. - -{\obeylines -\gdef\spacesplit#1#2^^M{\endgroup\spacesplitfoo{#1}#2 \relax\spacesplitfoo}% -\long\gdef\spacesplitfoo#1#2 #3#4\spacesplitfoo{% -\ifx\relax #3% -#1{#2}{}\else #1{#2}{#3#4}\fi}} - -% So much for the things common to all kinds of definitions. - -% Define @defun. - -% First, define the processing that is wanted for arguments of \defun -% Use this to expand the args and terminate the paragraph they make up - -\def\defunargs #1{\functionparens \sl -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -\hyphenchar\tensl=0 -#1% -\hyphenchar\tensl=45 -\ifnum\parencount=0 \else \errmessage{unbalanced parens in @def arguments}\fi% -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -\def\deftypefunargs #1{% -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -% Use \boldbraxnoamp, not \functionparens, so that & is not special. -\boldbraxnoamp -\tclose{#1}% avoid \code because of side effects on active chars -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -% Do complete processing of one @defun or @defunx line already parsed. - -% @deffn Command forward-char nchars - -\def\deffn{\defmethparsebody\Edeffn\deffnx\deffnheader} - -\def\deffnheader #1#2#3{\doind {fn}{\code{#2}}% -\begingroup\defname {#2}{#1}\defunargs{#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defun == @deffn Function - -\def\defun{\defparsebody\Edefun\defunx\defunheader} - -\def\defunheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Function}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefun int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefun{\defparsebody\Edeftypefun\deftypefunx\deftypefunheader} - -% #1 is the data type. #2 is the name and args. -\def\deftypefunheader #1#2{\deftypefunheaderx{#1}#2 \relax} -% #1 is the data type, #2 the name, #3 the args. -\def\deftypefunheaderx #1#2 #3\relax{% -\doind {fn}{\code{#2}}% Make entry in function index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Function}% -\deftypefunargs {#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefn {Library Function} int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefn{\defmethparsebody\Edeftypefn\deftypefnx\deftypefnheader} - -% \defheaderxcond#1\relax$$$ -% puts #1 in @code, followed by a space, but does nothing if #1 is null. -\def\defheaderxcond#1#2$$${\ifx#1\relax\else\code{#1#2} \fi} - -% #1 is the classification. #2 is the data type. #3 is the name and args. -\def\deftypefnheader #1#2#3{\deftypefnheaderx{#1}{#2}#3 \relax} -% #1 is the classification, #2 the data type, #3 the name, #4 the args. -\def\deftypefnheaderx #1#2#3 #4\relax{% -\doind {fn}{\code{#3}}% Make entry in function index -\begingroup -\normalparens % notably, turn off `&' magic, which prevents -% at least some C++ text from working -\defname {\defheaderxcond#2\relax$$$#3}{#1}% -\deftypefunargs {#4}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defmac == @deffn Macro - -\def\defmac{\defparsebody\Edefmac\defmacx\defmacheader} - -\def\defmacheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Macro}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defspec == @deffn Special Form - -\def\defspec{\defparsebody\Edefspec\defspecx\defspecheader} - -\def\defspecheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Special Form}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% This definition is run if you use @defunx -% anywhere other than immediately after a @defun or @defunx. - -\def\deffnx #1 {\errmessage{@deffnx in invalid context}} -\def\defunx #1 {\errmessage{@defunx in invalid context}} -\def\defmacx #1 {\errmessage{@defmacx in invalid context}} -\def\defspecx #1 {\errmessage{@defspecx in invalid context}} -\def\deftypefnx #1 {\errmessage{@deftypefnx in invalid context}} -\def\deftypeunx #1 {\errmessage{@deftypeunx in invalid context}} - -% @defmethod, and so on - -% @defop {Funny Method} foo-class frobnicate argument - -\def\defop #1 {\def\defoptype{#1}% -\defopparsebody\Edefop\defopx\defopheader\defoptype} - -\def\defopheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% Make entry in function index -\begingroup\defname {#2}{\defoptype{} on #1}% -\defunargs {#3}\endgroup % -} - -% @defmethod == @defop Method - -\def\defmethod{\defmethparsebody\Edefmethod\defmethodx\defmethodheader} - -\def\defmethodheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% entry in function index -\begingroup\defname {#2}{Method on #1}% -\defunargs {#3}\endgroup % -} - -% @defcv {Class Option} foo-class foo-flag - -\def\defcv #1 {\def\defcvtype{#1}% -\defopvarparsebody\Edefcv\defcvx\defcvarheader\defcvtype} - -\def\defcvarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{\defcvtype{} of #1}% -\defvarargs {#3}\endgroup % -} - -% @defivar == @defcv {Instance Variable} - -\def\defivar{\defvrparsebody\Edefivar\defivarx\defivarheader} - -\def\defivarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{Instance Variable of #1}% -\defvarargs {#3}\endgroup % -} - -% These definitions are run if you use @defmethodx, etc., -% anywhere other than immediately after a @defmethod, etc. - -\def\defopx #1 {\errmessage{@defopx in invalid context}} -\def\defmethodx #1 {\errmessage{@defmethodx in invalid context}} -\def\defcvx #1 {\errmessage{@defcvx in invalid context}} -\def\defivarx #1 {\errmessage{@defivarx in invalid context}} - -% Now @defvar - -% First, define the processing that is wanted for arguments of @defvar. -% This is actually simple: just print them in roman. -% This must expand the args and terminate the paragraph they make up -\def\defvarargs #1{\normalparens #1% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000} - -% @defvr Counter foo-count - -\def\defvr{\defvrparsebody\Edefvr\defvrx\defvrheader} - -\def\defvrheader #1#2#3{\doind {vr}{\code{#2}}% -\begingroup\defname {#2}{#1}\defvarargs{#3}\endgroup} - -% @defvar == @defvr Variable - -\def\defvar{\defvarparsebody\Edefvar\defvarx\defvarheader} - -\def\defvarheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{Variable}% -\defvarargs {#2}\endgroup % -} - -% @defopt == @defvr {User Option} - -\def\defopt{\defvarparsebody\Edefopt\defoptx\defoptheader} - -\def\defoptheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{User Option}% -\defvarargs {#2}\endgroup % -} - -% @deftypevar int foobar - -\def\deftypevar{\defvarparsebody\Edeftypevar\deftypevarx\deftypevarheader} - -% #1 is the data type. #2 is the name. -\def\deftypevarheader #1#2{% -\doind {vr}{\code{#2}}% Make entry in variables index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Variable}% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% @deftypevr {Global Flag} int enable - -\def\deftypevr{\defvrparsebody\Edeftypevr\deftypevrx\deftypevrheader} - -\def\deftypevrheader #1#2#3{\doind {vr}{\code{#3}}% -\begingroup\defname {\defheaderxcond#2\relax$$$#3}{#1} -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% This definition is run if you use @defvarx -% anywhere other than immediately after a @defvar or @defvarx. - -\def\defvrx #1 {\errmessage{@defvrx in invalid context}} -\def\defvarx #1 {\errmessage{@defvarx in invalid context}} -\def\defoptx #1 {\errmessage{@defoptx in invalid context}} -\def\deftypevarx #1 {\errmessage{@deftypevarx in invalid context}} -\def\deftypevrx #1 {\errmessage{@deftypevrx in invalid context}} - -% Now define @deftp -% Args are printed in bold, a slight difference from @defvar. - -\def\deftpargs #1{\bf \defvarargs{#1}} - -% @deftp Class window height width ... - -\def\deftp{\deftpparsebody\Edeftp\deftpx\deftpheader} - -\def\deftpheader #1#2#3{\doind {tp}{\code{#2}}% -\begingroup\defname {#2}{#1}\deftpargs{#3}\endgroup} - -% This definition is run if you use @deftpx, etc -% anywhere other than immediately after a @deftp, etc. - -\def\deftpx #1 {\errmessage{@deftpx in invalid context}} - -\message{cross reference,} -% Define cross-reference macros -\newwrite \auxfile - -\newif\ifhavexrefs % True if xref values are known. -\newif\ifwarnedxrefs % True if we warned once that they aren't known. - -% \setref{foo} defines a cross-reference point named foo. - -\def\setref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ysectionnumberandtype}} - -\def\unnumbsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ynothing}} - -\def\appendixsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Yappendixletterandtype}} - -% \xref, \pxref, and \ref generate cross-references to specified points. -% For \xrefX, #1 is the node name, #2 the name of the Info -% cross-reference, #3 the printed node name, #4 the name of the Info -% file, #5 the name of the printed manual. All but the node name can be -% omitted. -% -\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]} -\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]} -\def\ref#1{\xrefX[#1,,,,,,,]} -\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup - \def\printedmanual{\ignorespaces #5}% - \def\printednodename{\ignorespaces #3}% - \setbox1=\hbox{\printedmanual}% - \setbox0=\hbox{\printednodename}% - \ifdim \wd0 = 0pt - % No printed node name was explicitly given. - \ifx\SETxref-automatic-section-title\relax % - % Use the actual chapter/section title appear inside - % the square brackets. Use the real section title if we have it. - \ifdim \wd1>0pt% - % It is in another manual, so we don't have it. - \def\printednodename{\ignorespaces #1}% - \else - \ifhavexrefs - % We know the real title if we have the xref values. - \def\printednodename{\refx{#1-title}}% - \else - % Otherwise just copy the Info node name. - \def\printednodename{\ignorespaces #1}% - \fi% - \fi - \def\printednodename{#1-title}% - \else - % Use the node name inside the square brackets. - \def\printednodename{\ignorespaces #1}% - \fi - \fi - % - % If we use \unhbox0 and \unhbox1 to print the node names, TeX does not - % insert empty discretionaries after hyphens, which means that it will - % not find a line break at a hyphen in a node names. Since some manuals - % are best written with fairly long node names, containing hyphens, this - % is a loss. Therefore, we give the text of the node name again, so it - % is as if TeX is seeing it for the first time. - \ifdim \wd1 > 0pt - \putwordsection{} ``\printednodename'' in \cite{\printedmanual}% - \else - % _ (for example) has to be the character _ for the purposes of the - % control sequence corresponding to the node, but it has to expand - % into the usual \leavevmode...\vrule stuff for purposes of - % printing. So we \turnoffactive for the \refx-snt, back on for the - % printing, back off for the \refx-pg. - {\turnoffactive \refx{#1-snt}{}}% - \space [\printednodename],\space - \turnoffactive \putwordpage\tie\refx{#1-pg}{}% - \fi -\endgroup} - -% \dosetq is the interface for calls from other macros - -% Use \turnoffactive so that punctuation chars such as underscore -% work in node names. -\def\dosetq #1#2{{\let\folio=0 \turnoffactive \auxhat% -\edef\next{\write\auxfile{\internalsetq {#1}{#2}}}% -\next}} - -% \internalsetq {foo}{page} expands into -% CHARACTERS 'xrdef {foo}{...expansion of \Ypage...} -% When the aux file is read, ' is the escape character - -\def\internalsetq #1#2{'xrdef {#1}{\csname #2\endcsname}} - -% Things to be expanded by \internalsetq - -\def\Ypagenumber{\folio} - -\def\Ytitle{\thissection} - -\def\Ynothing{} - -\def\Ysectionnumberandtype{% -\ifnum\secno=0 \putwordChapter\xreftie\the\chapno % -\else \ifnum \subsecno=0 \putwordSection\xreftie\the\chapno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\def\Yappendixletterandtype{% -\ifnum\secno=0 \putwordAppendix\xreftie'char\the\appendixno{}% -\else \ifnum \subsecno=0 \putwordSection\xreftie'char\the\appendixno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\gdef\xreftie{'tie} - -% Use TeX 3.0's \inputlineno to get the line number, for better error -% messages, but if we're using an old version of TeX, don't do anything. -% -\ifx\inputlineno\thisisundefined - \let\linenumber = \empty % Non-3.0. -\else - \def\linenumber{\the\inputlineno:\space} -\fi - -% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME. -% If its value is nonempty, SUFFIX is output afterward. - -\def\refx#1#2{% - \expandafter\ifx\csname X#1\endcsname\relax - % If not defined, say something at least. - $\langle$un\-de\-fined$\rangle$% - \ifhavexrefs - \message{\linenumber Undefined cross reference `#1'.}% - \else - \ifwarnedxrefs\else - \global\warnedxrefstrue - \message{Cross reference values unknown; you must run TeX again.}% - \fi - \fi - \else - % It's defined, so just use it. - \csname X#1\endcsname - \fi - #2% Output the suffix in any case. -} - -% Read the last existing aux file, if any. No error if none exists. - -% This is the macro invoked by entries in the aux file. -\def\xrdef #1#2{ -{\catcode`\'=\other\expandafter \gdef \csname X#1\endcsname {#2}}} - -\def\readauxfile{% -\begingroup -\catcode `\^^@=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\^^C=\other -\catcode `\^^D=\other -\catcode `\^^E=\other -\catcode `\^^F=\other -\catcode `\^^G=\other -\catcode `\^^H=\other -\catcode `\=\other -\catcode `\^^L=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode 26=\other -\catcode `\^^[=\other -\catcode `\^^\=\other -\catcode `\^^]=\other -\catcode `\^^^=\other -\catcode `\^^_=\other -\catcode `\@=\other -\catcode `\^=\other -\catcode `\~=\other -\catcode `\[=\other -\catcode `\]=\other -\catcode`\"=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode `\$=\other -\catcode `\#=\other -\catcode `\&=\other -% `\+ does not work, so use 43. -\catcode 43=\other -% Make the characters 128-255 be printing characters -{% - \count 1=128 - \def\loop{% - \catcode\count 1=\other - \advance\count 1 by 1 - \ifnum \count 1<256 \loop \fi - }% -}% -% the aux file uses ' as the escape. -% Turn off \ as an escape so we do not lose on -% entries which were dumped with control sequences in their names. -% For example, 'xrdef {$\leq $-fun}{page ...} made by @defun ^^ -% Reference to such entries still does not work the way one would wish, -% but at least they do not bomb out when the aux file is read in. -\catcode `\{=1 \catcode `\}=2 -\catcode `\%=\other -\catcode `\'=0 -\catcode`\^=7 % to make ^^e4 etc usable in xref tags -\catcode `\\=\other -\openin 1 \jobname.aux -\ifeof 1 \else \closein 1 \input \jobname.aux \global\havexrefstrue -\global\warnedobstrue -\fi -% Open the new aux file. Tex will close it automatically at exit. -\openout \auxfile=\jobname.aux -\endgroup} - - -% Footnotes. - -\newcount \footnoteno - -% The trailing space in the following definition for supereject is -% vital for proper filling; pages come out unaligned when you do a -% pagealignmacro call if that space before the closing brace is -% removed. -\def\supereject{\par\penalty -20000\footnoteno =0 } - -% @footnotestyle is meaningful for info output only.. -\let\footnotestyle=\comment - -\let\ptexfootnote=\footnote - -{\catcode `\@=11 -% -% Auto-number footnotes. Otherwise like plain. -\gdef\footnote{% - \global\advance\footnoteno by \@ne - \edef\thisfootno{$^{\the\footnoteno}$}% - % - % In case the footnote comes at the end of a sentence, preserve the - % extra spacing after we do the footnote number. - \let\@sf\empty - \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\/\fi - % - % Remove inadvertent blank space before typesetting the footnote number. - \unskip - \thisfootno\@sf - \footnotezzz -}% - -% Don't bother with the trickery in plain.tex to not require the -% footnote text as a parameter. Our footnotes don't need to be so general. -% -\long\gdef\footnotezzz#1{\insert\footins{% - % We want to typeset this text as a normal paragraph, even if the - % footnote reference occurs in (for example) a display environment. - % So reset some parameters. - \interlinepenalty\interfootnotelinepenalty - \splittopskip\ht\strutbox % top baseline for broken footnotes - \splitmaxdepth\dp\strutbox - \floatingpenalty\@MM - \leftskip\z@skip - \rightskip\z@skip - \spaceskip\z@skip - \xspaceskip\z@skip - \parindent\defaultparindent - % - % Hang the footnote text off the number. - \hang - \textindent{\thisfootno}% - % - % Don't crash into the line above the footnote text. Since this - % expands into a box, it must come within the paragraph, lest it - % provide a place where TeX can split the footnote. - \footstrut - #1\strut}% -} - -}%end \catcode `\@=11 - -% Set the baselineskip to #1, and the lineskip and strut size -% correspondingly. There is no deep meaning behind these magic numbers -% used as factors; they just match (closely enough) what Knuth defined. -% -\def\lineskipfactor{.08333} -\def\strutheightpercent{.70833} -\def\strutdepthpercent {.29167} -% -\def\setleading#1{% - \normalbaselineskip = #1\relax - \normallineskip = \lineskipfactor\normalbaselineskip - \normalbaselines - \setbox\strutbox =\hbox{% - \vrule width0pt height\strutheightpercent\baselineskip - depth \strutdepthpercent \baselineskip - }% -} - -% @| inserts a changebar to the left of the current line. It should -% surround any changed text. This approach does *not* work if the -% change spans more than two lines of output. To handle that, we would -% have adopt a much more difficult approach (putting marks into the main -% vertical list for the beginning and end of each change). -% -\def\|{% - % \vadjust can only be used in horizontal mode. - \leavevmode - % - % Append this vertical mode material after the current line in the output. - \vadjust{% - % We want to insert a rule with the height and depth of the current - % leading; that is exactly what \strutbox is supposed to record. - \vskip-\baselineskip - % - % \vadjust-items are inserted at the left edge of the type. So - % the \llap here moves out into the left-hand margin. - \llap{% - % - % For a thicker or thinner bar, change the `1pt'. - \vrule height\baselineskip width1pt - % - % This is the space between the bar and the text. - \hskip 12pt - }% - }% -} - -% For a final copy, take out the rectangles -% that mark overfull boxes (in case you have decided -% that the text looks ok even though it passes the margin). -% -\def\finalout{\overfullrule=0pt} - - -% End of control word definitions. - -\message{and turning on texinfo input format.} - -\def\openindices{% - \newindex{cp}% - \newcodeindex{fn}% - \newcodeindex{vr}% - \newcodeindex{tp}% - \newcodeindex{ky}% - \newcodeindex{pg}% -} - -% Set some numeric style parameters, for 8.5 x 11 format. - -%\hsize = 6.5in -\newdimen\defaultparindent \defaultparindent = 15pt -\parindent = \defaultparindent -\parskip 18pt plus 1pt -\setleading{15pt} -\advance\topskip by 1.2cm - -% Prevent underfull vbox error messages. -\vbadness=10000 - -% Following George Bush, just get rid of widows and orphans. -\widowpenalty=10000 -\clubpenalty=10000 - -% Use TeX 3.0's \emergencystretch to help line breaking, but if we're -% using an old version of TeX, don't do anything. We want the amount of -% stretch added to depend on the line length, hence the dependence on -% \hsize. This makes it come to about 9pt for the 8.5x11 format. -% -\ifx\emergencystretch\thisisundefined - % Allow us to assign to \emergencystretch anyway. - \def\emergencystretch{\dimen0}% -\else - \emergencystretch = \hsize - \divide\emergencystretch by 45 -\fi - -% Use @smallbook to reset parameters for 7x9.5 format (or else 7x9.25) -\def\smallbook{ - -% These values for secheadingskip and subsecheadingskip are -% experiments. RJC 7 Aug 1992 -\global\secheadingskip = 17pt plus 6pt minus 3pt -\global\subsecheadingskip = 14pt plus 6pt minus 3pt - -\global\lispnarrowing = 0.3in -\setleading{12pt} -\advance\topskip by -1cm -\global\parskip 3pt plus 1pt -\global\hsize = 5in -\global\vsize=7.5in -\global\tolerance=700 -\global\hfuzz=1pt -\global\contentsrightmargin=0pt -\global\deftypemargin=0pt -\global\defbodyindent=.5cm - -\global\pagewidth=\hsize -\global\pageheight=\vsize - -\global\let\smalllisp=\smalllispx -\global\let\smallexample=\smalllispx -\global\def\Esmallexample{\Esmalllisp} -} - -% Use @afourpaper to print on European A4 paper. -\def\afourpaper{ -\global\tolerance=700 -\global\hfuzz=1pt -\setleading{12pt} -\global\parskip 15pt plus 1pt - -\global\vsize= 53\baselineskip -\advance\vsize by \topskip -%\global\hsize= 5.85in % A4 wide 10pt -\global\hsize= 6.5in -\global\outerhsize=\hsize -\global\advance\outerhsize by 0.5in -\global\outervsize=\vsize -\global\advance\outervsize by 0.6in - -\global\pagewidth=\hsize -\global\pageheight=\vsize -} - -% Allow control of the text dimensions. Parameters in order: textheight; -% textwidth; \voffset; \hoffset (!); binding offset. All require a dimension; -% header is additional; added length extends the bottom of the page. - -\def\changepagesizes#1#2#3#4#5{ - \global\vsize= #1 - \advance\vsize by \topskip - \global\voffset= #3 - \global\hsize= #2 - \global\outerhsize=\hsize - \global\advance\outerhsize by 0.5in - \global\outervsize=\vsize - \global\advance\outervsize by 0.6in - \global\pagewidth=\hsize - \global\pageheight=\vsize - \global\normaloffset= #4 - \global\bindingoffset= #5} - -% This layout is compatible with Latex on A4 paper. - -\def\afourlatex{\changepagesizes{22cm}{15cm}{7mm}{4.6mm}{5mm}} - -% Use @afourwide to print on European A4 paper in wide format. -\def\afourwide{\afourpaper -\changepagesizes{9.5in}{6.5in}{\hoffset}{\normaloffset}{\bindingoffset}} - -% Define macros to output various characters with catcode for normal text. -\catcode`\"=\other -\catcode`\~=\other -\catcode`\^=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode`\+=\other -\def\normaldoublequote{"} -\def\normaltilde{~} -\def\normalcaret{^} -\def\normalunderscore{_} -\def\normalverticalbar{|} -\def\normalless{<} -\def\normalgreater{>} -\def\normalplus{+} - -% This macro is used to make a character print one way in ttfont -% where it can probably just be output, and another way in other fonts, -% where something hairier probably needs to be done. -% -% #1 is what to print if we are indeed using \tt; #2 is what to print -% otherwise. Since all the Computer Modern typewriter fonts have zero -% interword stretch (and shrink), and it is reasonable to expect all -% typewriter fonts to have this, we can check that font parameter. -% -\def\ifusingtt#1#2{\ifdim \fontdimen3\the\font=0pt #1\else #2\fi} - -% Turn off all special characters except @ -% (and those which the user can use as if they were ordinary). -% Most of these we simply print from the \tt font, but for some, we can -% use math or other variants that look better in normal text. - -\catcode`\"=\active -\def\activedoublequote{{\tt \char '042}} -\let"=\activedoublequote -\catcode`\~=\active -\def~{{\tt \char '176}} -\chardef\hat=`\^ -\catcode`\^=\active -\def\auxhat{\def^{'hat}} -\def^{{\tt \hat}} - -\catcode`\_=\active -\def_{\ifusingtt\normalunderscore\_} -% Subroutine for the previous macro. -\def\_{\lvvmode \kern.06em \vbox{\hrule width.3em height.1ex}} - -% \lvvmode is equivalent in function to \leavevmode. -% Using \leavevmode runs into trouble when written out to -% an index file due to the expansion of \leavevmode into ``\unhbox -% \voidb@x'' ---which looks to TeX like ``\unhbox \voidb\x'' due to our -% magic tricks with @. -\def\lvvmode{\vbox to 0pt{}} - -\catcode`\|=\active -\def|{{\tt \char '174}} -\chardef \less=`\< -\catcode`\<=\active -\def<{{\tt \less}} -\chardef \gtr=`\> -\catcode`\>=\active -\def>{{\tt \gtr}} -\catcode`\+=\active -\def+{{\tt \char 43}} -%\catcode 27=\active -%\def^^[{$\diamondsuit$} - -% Set up an active definition for =, but don't enable it most of the time. -{\catcode`\==\active -\global\def={{\tt \char 61}}} - -\catcode`+=\active -\catcode`\_=\active - -% If a .fmt file is being used, characters that might appear in a file -% name cannot be active until we have parsed the command line. -% So turn them off again, and have \everyjob (or @setfilename) turn them on. -% \otherifyactive is called near the end of this file. -\def\otherifyactive{\catcode`+=\other \catcode`\_=\other} - -\catcode`\@=0 - -% \rawbackslashxx output one backslash character in current font -\global\chardef\rawbackslashxx=`\\ -%{\catcode`\\=\other -%@gdef@rawbackslashxx{\}} - -% \rawbackslash redefines \ as input to do \rawbackslashxx. -{\catcode`\\=\active -@gdef@rawbackslash{@let\=@rawbackslashxx }} - -% \normalbackslash outputs one backslash in fixed width font. -\def\normalbackslash{{\tt\rawbackslashxx}} - -% Say @foo, not \foo, in error messages. -\escapechar=`\@ - -% \catcode 17=0 % Define control-q -\catcode`\\=\active - -% Used sometimes to turn off (effectively) the active characters -% even after parsing them. -@def@turnoffactive{@let"=@normaldoublequote -@let\=@realbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus} - -@def@normalturnoffactive{@let"=@normaldoublequote -@let\=@normalbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus} - -% Make _ and + \other characters, temporarily. -% This is canceled by @fixbackslash. -@otherifyactive - -% If a .fmt file is being used, we don't want the `\input texinfo' to show up. -% That is what \eatinput is for; after that, the `\' should revert to printing -% a backslash. -% -@gdef@eatinput input texinfo{@fixbackslash} -@global@let\ = @eatinput - -% On the other hand, perhaps the file did not have a `\input texinfo'. Then -% the first `\{ in the file would cause an error. This macro tries to fix -% that, assuming it is called before the first `\' could plausibly occur. -% Also back turn on active characters that might appear in the input -% file name, in case not using a pre-dumped format. -% -@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi - @catcode`+=@active @catcode`@_=@active} - -%% These look ok in all fonts, so just make them not special. The @rm below -%% makes sure that the current font starts out as the newly loaded cmr10 -@catcode`@$=@other @catcode`@%=@other @catcode`@&=@other @catcode`@#=@other - -@textfonts -@rm - -@c Local variables: -@c page-delimiter: "^\\\\message" -@c End: diff --git a/token.c b/token.c deleted file mode 100644 index abcadaa..0000000 --- a/token.c +++ /dev/null @@ -1,50 +0,0 @@ -/* token.c -- misc. access functions for mkid database tokens - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <config.h> -#include "token.h" - -unsigned int -tok_flags (char const *buf) -{ - return *(unsigned char const *)&buf[strlen (buf) + 1]; -} - -#define TOK_COUNT_ADDR(buf) ((unsigned char const *)(TOK_FLAGS_ADDR (buf) + 1)) -#define TOK_HITS_ADDR(buf) ((unsigned char const *)(TOK_COUNT_ADDR (buf) + 2)) - -unsigned short -tok_count (char const *buf) -{ - unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1]; - unsigned char const *addr = flags + 1; - unsigned short count = *addr; - if (*flags & TOK_SHORT_COUNT) - count += (*++addr << 8); - return count; -} - -unsigned char const * -tok_hits_addr (char const *buf) -{ - unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1]; - unsigned char const *addr = flags + 2; - if (*flags & TOK_SHORT_COUNT) - addr++; - return addr; -} diff --git a/token.h b/token.h deleted file mode 100644 index f364de6..0000000 --- a/token.h +++ /dev/null @@ -1,40 +0,0 @@ -/* token.h -- defs for interface to token.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _token_h_ -#define _token_h_ - -/* token flags (struct token is in mkid.c) */ -#define TOK_VECTOR 0x01 /* 1 = hits are stored as a vector - 0 = hits are stored as a 8-way tree of bits - mkid chooses whichever is more compact. - vector is more compact for tokens with few hits */ -#define TOK_NUMBER 0x02 /* occurs as a number */ -#define TOK_NAME 0x04 /* occurs as a name */ -#define TOK_STRING 0x08 /* occurs in a string */ -#define TOK_LITERAL 0x10 /* occurs as a literal */ -#define TOK_COMMENT 0x20 /* occurs in a comment */ -#define TOK_UNUSED_1 0x40 -#define TOK_SHORT_COUNT 0x80 /* count is two bytes */ - -#define tok_string(buf) (buf) -unsigned int tok_flags __P((char const *buf)); -unsigned short tok_count __P((char const *buf)); -unsigned char const *tok_hits_addr __P((char const *buf)); - -#endif /* not _token_h_ */ diff --git a/version.texi b/version.texi deleted file mode 100644 index 65e414e..0000000 --- a/version.texi +++ /dev/null @@ -1 +0,0 @@ -@set VERSION 3.0.9 |