From 2f9b216275fea538dcb208c11f778ae48702c722 Mon Sep 17 00:00:00 2001 From: Greg McGary Date: Tue, 26 Jan 1999 17:56:25 +0000 Subject: Remove obsolete files --- alloc.h | 34 - alloca.c | 492 ------ ansi2knr.1 | 19 - ansi2knr.c | 474 ------ bitops.c | 116 -- bitops.h | 31 - fid.1 | 26 - fid.c | 186 --- filenames.c | 530 ------ filenames.h | 36 - getopt.c | 748 --------- getopt.h | 129 -- getopt1.c | 180 -- hash.h | 124 -- id.info | 1433 ---------------- id.texinfo | 1615 ------------------ idarg.h | 33 - idfile.c | 246 --- idfile.h | 102 -- idx.c | 95 -- iid.1 | 235 --- iid.c | 2329 -------------------------- iid.help | 92 -- iid.y | 1359 --------------- lid.1 | 211 --- lid.c | 1365 --------------- misc.c | 126 -- misc.h | 38 - mkid.1 | 187 --- mkid.c | 999 ----------- mkid.info | 1097 ------------ mkid.texinfo | 957 ----------- regex.c | 5244 ---------------------------------------------------------- regex.h | 489 ------ scanners.c | 1216 -------------- scanners.h | 30 - stamp-vti | 1 - strcasecmp.c | 76 - strxtra.h | 41 - texinfo.tex | 4421 ------------------------------------------------- token.c | 50 - token.h | 40 - version.texi | 1 - 43 files changed, 27253 deletions(-) delete mode 100644 alloc.h delete mode 100644 alloca.c delete mode 100644 ansi2knr.1 delete mode 100644 ansi2knr.c delete mode 100644 bitops.c delete mode 100644 bitops.h delete mode 100644 fid.1 delete mode 100644 fid.c delete mode 100644 filenames.c delete mode 100644 filenames.h delete mode 100644 getopt.c delete mode 100644 getopt.h delete mode 100644 getopt1.c delete mode 100644 hash.h delete mode 100644 id.info delete mode 100644 id.texinfo delete mode 100644 idarg.h delete mode 100644 idfile.c delete mode 100644 idfile.h delete mode 100644 idx.c delete mode 100644 iid.1 delete mode 100644 iid.c delete mode 100644 iid.help delete mode 100644 iid.y delete mode 100644 lid.1 delete mode 100644 lid.c delete mode 100644 misc.c delete mode 100644 misc.h delete mode 100644 mkid.1 delete mode 100644 mkid.c delete mode 100644 mkid.info delete mode 100644 mkid.texinfo delete mode 100644 regex.c delete mode 100644 regex.h delete mode 100644 scanners.c delete mode 100644 scanners.h delete mode 100644 stamp-vti delete mode 100644 strcasecmp.c delete mode 100644 strxtra.h delete mode 100644 texinfo.tex delete mode 100644 token.c delete mode 100644 token.h delete mode 100644 version.texi diff --git a/alloc.h b/alloc.h deleted file mode 100644 index c430091..0000000 --- a/alloc.h +++ /dev/null @@ -1,34 +0,0 @@ -/* alloc.h -- convenient interface macros for malloc(3) & friends - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _alloc_h_ -#define _alloc_h_ - -#if HAVE_STDLIB_H -#include -#else /* not HAVE_STDLIB_H */ -#if HAVE_MALLOC_H -#include -#endif /* HAVE_MALLOC_H */ -#endif /* not HAVE_STDLIB_H */ - -#define CALLOC(type, n) ((type *) calloc (sizeof (type), (n))) -#define MALLOC(type, n) ((type *) malloc (sizeof (type) * (n))) -#define REALLOC(old, type, n) ((type *) realloc ((old), sizeof (type) * (n))) - -#endif /* not _alloc_h_ */ diff --git a/alloca.c b/alloca.c deleted file mode 100644 index 7020f32..0000000 --- a/alloca.c +++ /dev/null @@ -1,492 +0,0 @@ -/* alloca.c -- allocate automatically reclaimed memory - (Mostly) portable public-domain implementation -- D A Gwyn - - This implementation of the PWB library alloca function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - J.Otto Tennant contributed the Cray support. - - There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#ifdef emacs -#include "blockinput.h" -#endif - -/* If compiling with GCC 2, this file's not needed. */ -#if !defined (__GNUC__) || __GNUC__ < 2 - -/* If someone has defined alloca as a macro, - there must be some other way alloca is supposed to work. */ -#ifndef alloca - -#ifdef emacs -#ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -#ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -#endif /* STACK_DIRECTION undefined */ -#endif /* static */ -#endif /* emacs */ - -/* If your stack is a linked list of frames, you have to - provide an "address metric" ADDRESS_FUNCTION macro. */ - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) -long i00afunc (); -#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) -#else -#define ADDRESS_FUNCTION(arg) &(arg) -#endif - -#if __STDC__ -typedef void *pointer; -#else -typedef char *pointer; -#endif - -#define NULL 0 - -/* Different portions of Emacs need to call different versions of - malloc. The Emacs executable needs alloca to call xmalloc, because - ordinary malloc isn't protected from input signals. On the other - hand, the utilities in lib-src need alloca to call malloc; some of - them are very simple, and don't have an xmalloc routine. - - Non-Emacs programs expect this to call use xmalloc. - - Callers below should use malloc. */ - -#ifndef emacs -#define malloc xmalloc -#endif -extern pointer malloc (); - -/* Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ - -#ifndef STACK_DIRECTION -#define STACK_DIRECTION 0 /* Direction unknown. */ -#endif - -#if STACK_DIRECTION != 0 - -#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ - -#else /* STACK_DIRECTION == 0; need run-time code. */ - -static int stack_dir; /* 1 or -1 once known. */ -#define STACK_DIR stack_dir - -static void -find_stack_direction () -{ - static char *addr = NULL; /* Address of first `dummy', once known. */ - auto char dummy; /* To get stack address. */ - - if (addr == NULL) - { /* Initial entry. */ - addr = ADDRESS_FUNCTION (dummy); - - find_stack_direction (); /* Recurse once. */ - } - else - { - /* Second entry. */ - if (ADDRESS_FUNCTION (dummy) > addr) - stack_dir = 1; /* Stack grew upward. */ - else - stack_dir = -1; /* Stack grew downward. */ - } -} - -#endif /* STACK_DIRECTION == 0 */ - -/* An "alloca header" is used to: - (a) chain together all alloca'ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc - alignment chunk size. The following default should work okay. */ - -#ifndef ALIGN_SIZE -#define ALIGN_SIZE sizeof(double) -#endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* To force sizeof(header). */ - struct - { - union hdr *next; /* For chaining headers. */ - char *deep; /* For stack depth measure. */ - } h; -} header; - -static header *last_alloca_header = NULL; /* -> last alloca header. */ - -/* Return a pointer to at least SIZE bytes of storage, - which will be automatically reclaimed upon exit from - the procedure that called alloca. Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. */ - -pointer -alloca (size) - unsigned size; -{ - auto char probe; /* Probes stack depth: */ - register char *depth = ADDRESS_FUNCTION (probe); - -#if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* Unknown growth direction. */ - find_stack_direction (); -#endif - - /* Reclaim garbage, defined as all alloca'd storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* Traverses linked list. */ - -#ifdef emacs - BLOCK_INPUT; -#endif - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free ((pointer) hp); /* Collect garbage. */ - - hp = np; /* -> next header. */ - } - else - break; /* Rest are not deeper. */ - - last_alloca_header = hp; /* -> last valid storage. */ - -#ifdef emacs - UNBLOCK_INPUT; -#endif - } - - if (size == 0) - return NULL; /* No allocation required. */ - - /* Allocate combined header + user data storage. */ - - { - register pointer new = malloc (sizeof (header) + size); - /* Address of header. */ - - ((header *) new)->h.next = last_alloca_header; - ((header *) new)->h.deep = depth; - - last_alloca_header = (header *) new; - - /* User storage begins just after header. */ - - return (pointer) ((char *) new + sizeof (header)); - } -} - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) - -#ifdef DEBUG_I00AFUNC -#include -#endif - -#ifndef CRAY_STACK -#define CRAY_STACK -#ifndef CRAY2 -/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ -struct stack_control_header - { - long shgrow:32; /* Number of times stack has grown. */ - long shaseg:32; /* Size of increments to stack. */ - long shhwm:32; /* High water mark of stack. */ - long shsize:32; /* Current size of stack (all segments). */ - }; - -/* The stack segment linkage control information occurs at - the high-address end of a stack segment. (The stack - grows from low addresses to high addresses.) The initial - part of the stack segment linkage control information is - 0200 (octal) words. This provides for register storage - for the routine which overflows the stack. */ - -struct stack_segment_linkage - { - long ss[0200]; /* 0200 overflow words. */ - long sssize:32; /* Number of words in this segment. */ - long ssbase:32; /* Offset to stack base. */ - long:32; - long sspseg:32; /* Offset to linkage control of previous - segment of stack. */ - long:32; - long sstcpt:32; /* Pointer to task common address block. */ - long sscsnm; /* Private control structure number for - microtasking. */ - long ssusr1; /* Reserved for user. */ - long ssusr2; /* Reserved for user. */ - long sstpid; /* Process ID for pid based multi-tasking. */ - long ssgvup; /* Pointer to multitasking thread giveup. */ - long sscray[7]; /* Reserved for Cray Research. */ - long ssa0; - long ssa1; - long ssa2; - long ssa3; - long ssa4; - long ssa5; - long ssa6; - long ssa7; - long sss0; - long sss1; - long sss2; - long sss3; - long sss4; - long sss5; - long sss6; - long sss7; - }; - -#else /* CRAY2 */ -/* The following structure defines the vector of words - returned by the STKSTAT library routine. */ -struct stk_stat - { - long now; /* Current total stack size. */ - long maxc; /* Amount of contiguous space which would - be required to satisfy the maximum - stack demand to date. */ - long high_water; /* Stack high-water mark. */ - long overflows; /* Number of stack overflow ($STKOFEN) calls. */ - long hits; /* Number of internal buffer hits. */ - long extends; /* Number of block extensions. */ - long stko_mallocs; /* Block allocations by $STKOFEN. */ - long underflows; /* Number of stack underflow calls ($STKRETN). */ - long stko_free; /* Number of deallocations by $STKRETN. */ - long stkm_free; /* Number of deallocations by $STKMRET. */ - long segments; /* Current number of stack segments. */ - long maxs; /* Maximum number of stack segments so far. */ - long pad_size; /* Stack pad size. */ - long current_address; /* Current stack segment address. */ - long current_size; /* Current stack segment size. This - number is actually corrupted by STKSTAT to - include the fifteen word trailer area. */ - long initial_address; /* Address of initial segment. */ - long initial_size; /* Size of initial segment. */ - }; - -/* The following structure describes the data structure which trails - any stack segment. I think that the description in 'asdef' is - out of date. I only describe the parts that I am sure about. */ - -struct stk_trailer - { - long this_address; /* Address of this block. */ - long this_size; /* Size of this block (does not include - this trailer). */ - long unknown2; - long unknown3; - long link; /* Address of trailer block of previous - segment. */ - long unknown5; - long unknown6; - long unknown7; - long unknown8; - long unknown9; - long unknown10; - long unknown11; - long unknown12; - long unknown13; - long unknown14; - }; - -#endif /* CRAY2 */ -#endif /* not CRAY_STACK */ - -#ifdef CRAY2 -/* Determine a "stack measure" for an arbitrary ADDRESS. - I doubt that "lint" will like this much. */ - -static long -i00afunc (long *address) -{ - struct stk_stat status; - struct stk_trailer *trailer; - long *block, size; - long result = 0; - - /* We want to iterate through all of the segments. The first - step is to get the stack status structure. We could do this - more quickly and more directly, perhaps, by referencing the - $LM00 common block, but I know that this works. */ - - STKSTAT (&status); - - /* Set up the iteration. */ - - trailer = (struct stk_trailer *) (status.current_address - + status.current_size - - 15); - - /* There must be at least one stack segment. Therefore it is - a fatal error if "trailer" is null. */ - - if (trailer == 0) - abort (); - - /* Discard segments that do not contain our argument address. */ - - while (trailer != 0) - { - block = (long *) trailer->this_address; - size = trailer->this_size; - if (block == 0 || size == 0) - abort (); - trailer = (struct stk_trailer *) trailer->link; - if ((block <= address) && (address < (block + size))) - break; - } - - /* Set the result to the offset in this segment and add the sizes - of all predecessor segments. */ - - result = address - block; - - if (trailer == 0) - { - return result; - } - - do - { - if (trailer->this_size <= 0) - abort (); - result += trailer->this_size; - trailer = (struct stk_trailer *) trailer->link; - } - while (trailer != 0); - - /* We are done. Note that if you present a bogus address (one - not in any segment), you will get a different number back, formed - from subtracting the address of the first block. This is probably - not what you want. */ - - return (result); -} - -#else /* not CRAY2 */ -/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. - Determine the number of the cell within the stack, - given the address of the cell. The purpose of this - routine is to linearize, in some sense, stack addresses - for alloca. */ - -static long -i00afunc (long address) -{ - long stkl = 0; - - long size, pseg, this_segment, stack; - long result = 0; - - struct stack_segment_linkage *ssptr; - - /* Register B67 contains the address of the end of the - current stack segment. If you (as a subprogram) store - your registers on the stack and find that you are past - the contents of B67, you have overflowed the segment. - - B67 also points to the stack segment linkage control - area, which is what we are really interested in. */ - - stkl = CRAY_STACKSEG_END (); - ssptr = (struct stack_segment_linkage *) stkl; - - /* If one subtracts 'size' from the end of the segment, - one has the address of the first word of the segment. - - If this is not the first segment, 'pseg' will be - nonzero. */ - - pseg = ssptr->sspseg; - size = ssptr->sssize; - - this_segment = stkl - size; - - /* It is possible that calling this routine itself caused - a stack overflow. Discard stack segments which do not - contain the target address. */ - - while (!(this_segment <= address && address <= stkl)) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); -#endif - if (pseg == 0) - break; - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - this_segment = stkl - size; - } - - result = address - this_segment; - - /* If you subtract pseg from the current end of the stack, - you get the address of the previous stack segment's end. - This seems a little convoluted to me, but I'll bet you save - a cycle somewhere. */ - - while (pseg != 0) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o\n", pseg, size); -#endif - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - result += size; - } - return (result); -} - -#endif /* not CRAY2 */ -#endif /* CRAY */ - -#endif /* no alloca */ -#endif /* not GCC version 2 */ diff --git a/ansi2knr.1 b/ansi2knr.1 deleted file mode 100644 index 434ce8f..0000000 --- a/ansi2knr.1 +++ /dev/null @@ -1,19 +0,0 @@ -.TH ANSI2KNR 1 "31 December 1990" -.SH NAME -ansi2knr \- convert ANSI C to Kernighan & Ritchie C -.SH SYNOPSIS -.I ansi2knr -input_file output_file -.SH DESCRIPTION -If no output_file is supplied, output goes to stdout. -.br -There are no error messages. -.sp -.I ansi2knr -recognizes functions by seeing a non-keyword identifier at the left margin, followed by a left parenthesis, with a right parenthesis as the last character on the line. It will recognize a multi-line header if the last character on each line but the last is a left parenthesis or comma. These algorithms ignore whitespace and comments, except that the function name must be the first thing on the line. -.sp -The following constructs will confuse it: -.br - - Any other construct that starts at the left margin and follows the above syntax (such as a macro or function call). -.br - - Macros that tinker with the syntax of the function header. diff --git a/ansi2knr.c b/ansi2knr.c deleted file mode 100644 index 8a7d4b5..0000000 --- a/ansi2knr.c +++ /dev/null @@ -1,474 +0,0 @@ -/* Copyright (C) 1989, 1991, 1993, 1994 Aladdin Enterprises. All rights reserved. */ - -/* ansi2knr.c */ -/* Convert ANSI C function definitions to K&R ("traditional C") syntax */ - -/* -ansi2knr is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY. No author or distributor accepts responsibility -to anyone for the consequences of using it or for whether it serves any -particular purpose or works at all, unless he says so in writing. Refer -to the GNU General Public License for full details. - -Everyone is granted permission to copy, modify and redistribute -ansi2knr, but only under the conditions described in the GNU -General Public License. A copy of this license is supposed to have been -given to you along with ansi2knr so you can know your rights and -responsibilities. It should be in a file named COPYLEFT. Among other -things, the copyright notice and this notice must be preserved on all -copies. -*/ - -/* - * Usage: - ansi2knr input_file [output_file] - * If no output_file is supplied, output goes to stdout. - * There are no error messages. - * - * ansi2knr recognizes function definitions by seeing a non-keyword - * identifier at the left margin, followed by a left parenthesis, - * with a right parenthesis as the last character on the line. - * It will recognize a multi-line header provided that the last character - * of the last line of the header is a right parenthesis, - * and no intervening line ends with a left or right brace or a semicolon. - * These algorithms ignore whitespace and comments, except that - * the function name must be the first thing on the line. - * The following constructs will confuse it: - * - Any other construct that starts at the left margin and - * follows the above syntax (such as a macro or function call). - * - Macros that tinker with the syntax of the function header. - */ - -/* - * The original and principal author of ansi2knr is L. Peter Deutsch - * . Other authors are noted in the change history - * that follows (in reverse chronological order): - lpd 94-12-18 added conditionals for systems where ctype macros - don't handle 8-bit characters properly, suggested by - Francois Pinard ; - removed --varargs switch (this is now the default) - lpd 94-10-10 removed CONFIG_BROKETS conditional - lpd 94-07-16 added some conditionals to help GNU `configure', - suggested by Francois Pinard ; - properly erase prototype args in function parameters, - contributed by Jim Avera ; - correct error in writeblanks (it shouldn't erase EOLs) - lpd 89-xx-xx original version - */ - -/* Most of the conditionals here are to make ansi2knr work with */ -/* the GNU configure machinery. */ - -#ifdef HAVE_CONFIG_H -# include -#endif - -#include -#include - -#ifdef HAVE_CONFIG_H - -/* - For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h). - This will define HAVE_CONFIG_H and so, activate the following lines. - */ - -# if STDC_HEADERS || HAVE_STRING_H -# include -# else -# include -# endif - -#else /* not HAVE_CONFIG_H */ - -/* - Without AC_CONFIG_HEADER, merely use as in the original - Ghostscript distribution. This loses on older BSD systems. - */ - -# include - -#endif /* not HAVE_CONFIG_H */ - -#ifdef STDC_HEADERS -# include -#else -/* - malloc and free should be declared in stdlib.h, - but if you've got a K&R compiler, they probably aren't. - */ -char *malloc(); -void free(); -#endif - -/* - * The ctype macros don't always handle 8-bit characters correctly. - * Compensate for this here. - */ -#ifndef STDC_HEADERS -# define STDC_HEADERS 0 -#endif -#ifdef isascii -# undef HAVE_ISASCII /* just in case */ -# define HAVE_ISASCII 1 -#else -# ifndef HAVE_ISASCII -# define HAVE_ISASCII 0 -# endif -#endif -#if STDC_HEADERS || !HAVE_ISASCII -# define is_ascii(c) 1 -#else -# define is_ascii(c) isascii(c) -#endif - -#define is_space(c) (is_ascii(c) && isspace(c)) -#define is_alpha(c) (is_ascii(c) && isalpha(c)) -#define is_alnum(c) (is_ascii(c) && isalnum(c)) - -/* Scanning macros */ -#define isidchar(ch) (is_alnum(ch) || (ch) == '_') -#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_') - -/* Forward references */ -char *skipspace(); -void writeblanks(); -int test1(); -int convert1(); - -/* The main program */ -int -main(argc, argv) - int argc; - char *argv[]; -{ FILE *in, *out; -#define bufsize 5000 /* arbitrary size */ - char *buf; - char *line; - /* - * In previous versions, ansi2knr recognized a --varargs switch. - * If this switch was supplied, ansi2knr would attempt to convert - * a ... argument to va_alist and va_dcl; if this switch was not - * supplied, ansi2knr would simply drop any such arguments. - * Now, ansi2knr always does this conversion, and we only - * check for this switch for backward compatibility. - */ - int convert_varargs = 1; - - if ( argc > 1 && argv[1][0] == '-' ) - { if ( !strcmp(argv[1], "--varargs") ) - { convert_varargs = 1; - argc--; - argv++; - } - else - { fprintf(stderr, "Unrecognized switch: %s\n", argv[1]); - exit(1); - } - } - switch ( argc ) - { - default: - printf("Usage: ansi2knr input_file [output_file]\n"); - exit(0); - case 2: - out = stdout; - break; - case 3: - out = fopen(argv[2], "w"); - if ( out == NULL ) - { fprintf(stderr, "Cannot open output file %s\n", argv[2]); - exit(1); - } - } - in = fopen(argv[1], "r"); - if ( in == NULL ) - { fprintf(stderr, "Cannot open input file %s\n", argv[1]); - exit(1); - } - fprintf(out, "#line 1 \"%s\"\n", argv[1]); - buf = malloc(bufsize); - line = buf; - while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL ) - { switch ( test1(buf) ) - { - case 2: /* a function header */ - convert1(buf, out, 1, convert_varargs); - break; - case 1: /* a function */ - convert1(buf, out, 0, convert_varargs); - break; - case -1: /* maybe the start of a function */ - line = buf + strlen(buf); - if ( line != buf + (bufsize - 1) ) /* overflow check */ - continue; - /* falls through */ - default: /* not a function */ - fputs(buf, out); - break; - } - line = buf; - } - if ( line != buf ) fputs(buf, out); - free(buf); - fclose(out); - fclose(in); - return 0; -} - -/* Skip over space and comments, in either direction. */ -char * -skipspace(p, dir) - register char *p; - register int dir; /* 1 for forward, -1 for backward */ -{ for ( ; ; ) - { while ( is_space(*p) ) p += dir; - if ( !(*p == '/' && p[dir] == '*') ) break; - p += dir; p += dir; - while ( !(*p == '*' && p[dir] == '/') ) - { if ( *p == 0 ) return p; /* multi-line comment?? */ - p += dir; - } - p += dir; p += dir; - } - return p; -} - -/* - * Write blanks over part of a string. - * Don't overwrite end-of-line characters. - */ -void -writeblanks(start, end) - char *start; - char *end; -{ char *p; - for ( p = start; p < end; p++ ) - if ( *p != '\r' && *p != '\n' ) *p = ' '; -} - -/* - * Test whether the string in buf is a function definition. - * The string may contain and/or end with a newline. - * Return as follows: - * 0 - definitely not a function definition; - * 1 - definitely a function definition; - * 2 - definitely a function prototype (NOT USED); - * -1 - may be the beginning of a function definition, - * append another line and look again. - * The reason we don't attempt to convert function prototypes is that - * Ghostscript's declaration-generating macros look too much like - * prototypes, and confuse the algorithms. - */ -int -test1(buf) - char *buf; -{ register char *p = buf; - char *bend; - char *endfn; - int contin; - if ( !isidfirstchar(*p) ) - return 0; /* no name at left margin */ - bend = skipspace(buf + strlen(buf) - 1, -1); - switch ( *bend ) - { - case ';': contin = 0 /*2*/; break; - case ')': contin = 1; break; - case '{': return 0; /* not a function */ - case '}': return 0; /* not a function */ - default: contin = -1; - } - while ( isidchar(*p) ) p++; - endfn = p; - p = skipspace(p, 1); - if ( *p++ != '(' ) - return 0; /* not a function */ - p = skipspace(p, 1); - if ( *p == ')' ) - return 0; /* no parameters */ - /* Check that the apparent function name isn't a keyword. */ - /* We only need to check for keywords that could be followed */ - /* by a left parenthesis (which, unfortunately, is most of them). */ - { static char *words[] = - { "asm", "auto", "case", "char", "const", "double", - "extern", "float", "for", "if", "int", "long", - "register", "return", "short", "signed", "sizeof", - "static", "switch", "typedef", "unsigned", - "void", "volatile", "while", 0 - }; - char **key = words; - char *kp; - int len = endfn - buf; - while ( (kp = *key) != 0 ) - { if ( strlen(kp) == len && !strncmp(kp, buf, len) ) - return 0; /* name is a keyword */ - key++; - } - } - return contin; -} - -/* Convert a recognized function definition or header to K&R syntax. */ -int -convert1(buf, out, header, convert_varargs) - char *buf; - FILE *out; - int header; /* Boolean */ - int convert_varargs; /* Boolean */ -{ char *endfn; - register char *p; - char **breaks; - unsigned num_breaks = 2; /* for testing */ - char **btop; - char **bp; - char **ap; - char *vararg = 0; - /* Pre-ANSI implementations don't agree on whether strchr */ - /* is called strchr or index, so we open-code it here. */ - for ( endfn = buf; *(endfn++) != '('; ) ; -top: p = endfn; - breaks = (char **)malloc(sizeof(char *) * num_breaks * 2); - if ( breaks == 0 ) - { /* Couldn't allocate break table, give up */ - fprintf(stderr, "Unable to allocate break table!\n"); - fputs(buf, out); - return -1; - } - btop = breaks + num_breaks * 2 - 2; - bp = breaks; - /* Parse the argument list */ - do - { int level = 0; - char *lp = NULL; - char *rp; - char *end = NULL; - if ( bp >= btop ) - { /* Filled up break table. */ - /* Allocate a bigger one and start over. */ - free((char *)breaks); - num_breaks <<= 1; - goto top; - } - *bp++ = p; - /* Find the end of the argument */ - for ( ; end == NULL; p++ ) - { switch(*p) - { - case ',': - if ( !level ) end = p; - break; - case '(': - if ( !level ) lp = p; - level++; - break; - case ')': - if ( --level < 0 ) end = p; - else rp = p; - break; - case '/': - p = skipspace(p, 1) - 1; - break; - default: - ; - } - } - /* Erase any embedded prototype parameters. */ - if ( lp ) - writeblanks(lp + 1, rp); - p--; /* back up over terminator */ - /* Find the name being declared. */ - /* This is complicated because of procedure and */ - /* array modifiers. */ - for ( ; ; ) - { p = skipspace(p - 1, -1); - switch ( *p ) - { - case ']': /* skip array dimension(s) */ - case ')': /* skip procedure args OR name */ - { int level = 1; - while ( level ) - switch ( *--p ) - { - case ']': case ')': level++; break; - case '[': case '(': level--; break; - case '/': p = skipspace(p, -1) + 1; break; - default: ; - } - } - if ( *p == '(' && *skipspace(p + 1, 1) == '*' ) - { /* We found the name being declared */ - while ( !isidfirstchar(*p) ) - p = skipspace(p, 1) + 1; - goto found; - } - break; - default: goto found; - } - } -found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' ) - { if ( convert_varargs ) - { *bp++ = "va_alist"; - vararg = p-2; - } - else - { p++; - if ( bp == breaks + 1 ) /* sole argument */ - writeblanks(breaks[0], p); - else - writeblanks(bp[-1] - 1, p); - bp--; - } - } - else - { while ( isidchar(*p) ) p--; - *bp++ = p+1; - } - p = end; - } - while ( *p++ == ',' ); - *bp = p; - /* Make a special check for 'void' arglist */ - if ( bp == breaks+2 ) - { p = skipspace(breaks[0], 1); - if ( !strncmp(p, "void", 4) ) - { p = skipspace(p+4, 1); - if ( p == breaks[2] - 1 ) - { bp = breaks; /* yup, pretend arglist is empty */ - writeblanks(breaks[0], p + 1); - } - } - } - /* Put out the function name and left parenthesis. */ - p = buf; - while ( p != endfn ) putc(*p, out), p++; - /* Put out the declaration. */ - if ( header ) - { fputs(");", out); - for ( p = breaks[0]; *p; p++ ) - if ( *p == '\r' || *p == '\n' ) - putc(*p, out); - } - else - { for ( ap = breaks+1; ap < bp; ap += 2 ) - { p = *ap; - while ( isidchar(*p) ) - putc(*p, out), p++; - if ( ap < bp - 1 ) - fputs(", ", out); - } - fputs(") ", out); - /* Put out the argument declarations */ - for ( ap = breaks+2; ap <= bp; ap += 2 ) - (*ap)[-1] = ';'; - if ( vararg != 0 ) - { *vararg = 0; - fputs(breaks[0], out); /* any prior args */ - fputs("va_dcl", out); /* the final arg */ - fputs(bp[0], out); - } - else - fputs(breaks[0], out); - } - free((char *)breaks); - return 0; -} diff --git a/bitops.c b/bitops.c deleted file mode 100644 index 357aec6..0000000 --- a/bitops.c +++ /dev/null @@ -1,116 +0,0 @@ -/* bitops.c -- Bit-vector manipulation for mkid - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include "bitops.h" - -static int str_to_int __P((char *bufp, int size)); -static char *int_to_str __P((int i, int size)); - -int -vec_to_bits (char *bit_array, char *vec, int size) -{ - int i; - int count; - - for (count = 0; (*vec & 0xff) != 0xff; count++) - { - i = str_to_int (vec, size); - BITSET (bit_array, i); - vec += size; - } - return count; -} - -int -bits_to_vec (char *vec, char *bit_array, int bit_count, int size) -{ - char *element; - int i; - int count; - - for (count = i = 0; i < bit_count; i++) - { - if (!BITTST (bit_array, i)) - continue; - element = int_to_str (i, size); - switch (size) - { - case 4: - *vec++ = *element++; - case 3: - *vec++ = *element++; - case 2: - *vec++ = *element++; - case 1: - *vec++ = *element++; - } - count++; - } - *vec++ = 0xff; - - return count; -} - -/* NEEDSWORK: ENDIAN */ - -static char * -int_to_str (int i, int size) -{ - static char buf0[4]; - char *bufp = &buf0[size]; - - switch (size) - { - case 4: - *--bufp = (i & 0xff); - i >>= 8; - case 3: - *--bufp = (i & 0xff); - i >>= 8; - case 2: - *--bufp = (i & 0xff); - i >>= 8; - case 1: - *--bufp = (i & 0xff); - } - return buf0; -} - -static int -str_to_int (char *bufp, int size) -{ - int i = 0; - - bufp--; - switch (size) - { - case 4: - i |= (*++bufp & 0xff); - i <<= 8; - case 3: - i |= (*++bufp & 0xff); - i <<= 8; - case 2: - i |= (*++bufp & 0xff); - i <<= 8; - case 1: - i |= (*++bufp & 0xff); - } - return i; -} diff --git a/bitops.h b/bitops.h deleted file mode 100644 index 7b9f15c..0000000 --- a/bitops.h +++ /dev/null @@ -1,31 +0,0 @@ -/* bitops.h -- defs for interface to bitops.c, plus bit-vector macros - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _bitops_h_ -#define _bitops_h_ - -#define BITTST(ba, bn) ((ba)[(bn) >> 3] & (1 << ((bn) & 0x07))) -#define BITSET(ba, bn) ((ba)[(bn) >> 3] |= (1 << ((bn) & 0x07))) -#define BITCLR(ba, bn) ((ba)[(bn) >> 3] &=~(1 << ((bn) & 0x07))) -#define BITAND(ba, bn) ((ba)[(bn) >> 3] &= (1 << ((bn) & 0x07))) -#define BITXOR(ba, bn) ((ba)[(bn) >> 3] ^= (1 << ((bn) & 0x07))) - -int vec_to_bits __P((char *bit_array, char *vec, int size)); -int bits_to_vec __P((char *vec, char *bit_array, int bit_count, int size)); - -#endif /* not _bitops_h_ */ diff --git a/fid.1 b/fid.1 deleted file mode 100644 index d504e80..0000000 --- a/fid.1 +++ /dev/null @@ -1,26 +0,0 @@ -.TH FID 1 -.SH NAME -fid \- query id database for specific files -.SH SYNOPSIS -.B fid -.RB [ \-f \^file] -file1 [ file2 ] -.SH DESCRIPTION -.I Fid -is a query tool for the id database. If you specify a single file -name as an argument, it prints a list of all the identifiers that -occur in that file. -.PP -When you give it two file names it takes the intersection. It prints -only the list of identifiers that occur in both files. -.PP -The following options are recognized: -.TP 10 -.BR \-f file\^ -Use -.I file\^ -as the database instead of the default -.BR ID . -.SH SEE ALSO -mkid(1), -lid(1). diff --git a/fid.c b/fid.c deleted file mode 100644 index 6db7eff..0000000 --- a/fid.c +++ /dev/null @@ -1,186 +0,0 @@ -/* fid.c -- list all tokens in the given file(s) - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include - -#include -#include "idfile.h" -#include "bitops.h" -#include "filenames.h" -#include "misc.h" -#include "strxtra.h" -#include "alloc.h" -#include "token.h" - -int get_idarg_index __P((char const *file_name)); -int is_hit __P((unsigned char const *hits, int file_number)); -int is_hit_1 __P((unsigned char const **hits, int level, int file_number)); -void skip_hits __P((unsigned char const **hits, int level)); - -FILE *id_FILE; -struct idhead idh; -struct idarg *idarg_0; -int tree8_levels; -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "Usage: %s [-f] file1 file2\n", program_name); - exit (1); -} - -int -main (int argc, char **argv) -{ - char const *id_file_name = IDFILE; - char *buf; - int op; - int i; - int index_1 = -1; - int index_2 = -1; - - program_name = basename ((argc--, *argv++)); - - while (argc) - { - char const *arg = (argc--, *argv++); - switch (op = *arg++) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - while (*arg) - switch (*arg++) - { - case 'f': - id_file_name = arg; - goto nextarg; - default: - usage (); - } - nextarg:; - } -argsdone: - - id_file_name = find_id_file (id_file_name); - if (id_file_name == NULL) - { - filerr ("open", id_file_name); - return 1; - } - id_FILE = init_id_file (id_file_name, &idh, &idarg_0); - switch (argc) - { - case 2: - index_2 = get_idarg_index (argv[1]); - /* fall through */ - case 1: - index_1 = get_idarg_index (argv[0]); - break; - default: - usage (); - } - - if (index_1 < 0) - return 1; - - buf = MALLOC (char, idh.idh_buf_size); - fseek (id_FILE, idh.idh_tokens_offset, 0); - tree8_levels = tree8_count_levels (idh.idh_files); - - for (i = 0; i < idh.idh_tokens; i++) - { - unsigned char const *hits; - - gets_past_00 (buf, id_FILE); - hits = tok_hits_addr (buf); - if (is_hit (hits, index_1) && (index_2 < 0 || is_hit (hits, index_2))) - printf ("%s\n", tok_string (buf)); - } - - return 0; -} - -int -get_idarg_index (char const *file_name) -{ - struct idarg *idarg; - int file_name_length = strlen (file_name); - struct idarg *end = &idarg_0[idh.idh_files]; - - for (idarg = idarg_0; idarg < end; ++idarg) - { - int arg_length = strlen (idarg->ida_arg); - int prefix_length = arg_length - file_name_length; - if (prefix_length < 0 - || (prefix_length > 0 && idarg->ida_arg[prefix_length - 1] != '/')) - continue; - if (strequ (&idarg->ida_arg[prefix_length], file_name)) - return idarg->ida_index; - } - fprintf (stderr, "%s: not found\n", file_name); - return -1; -} - -int -is_hit (unsigned char const *hits, int file_number) -{ - return is_hit_1 (&hits, tree8_levels, file_number); -} - -int -is_hit_1 (unsigned char const **hits, int level, int file_number) -{ - int file_hit = 1 << ((file_number >> (3 * --level)) & 7); - int hit = *(*hits)++; - int bit; - - if (!(file_hit & hit)) - return 0; - if (level == 0) - return 1; - - for (bit = 1; (bit < file_hit) && (bit & 0xff); bit <<= 1) - { - if (hit & bit) - skip_hits (hits, level); - } - return is_hit_1 (hits, level, file_number); -} - -void -skip_hits (unsigned char const **hits, int level) -{ - int hit = *(*hits)++; - int bit; - - if (--level == 0) - return; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (hit & bit) - skip_hits (hits, level); - } -} diff --git a/filenames.c b/filenames.c deleted file mode 100644 index ad6a23d..0000000 --- a/filenames.c +++ /dev/null @@ -1,530 +0,0 @@ -/* filenames.c -- file & directory name manipulations - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include -#include -#include - -#include -#include "strxtra.h" -#include "filenames.h" -#include "misc.h" -#include "error.h" - -#ifdef S_IFLNK -static char const *unsymlink __P((char *n)); -#endif -static void canonical_name __P((char *n)); -static char const *lex_name __P((void)); -static int same_link __P((struct stat *x, struct stat *y)); - -FILE *popen (); - -/* relative_file_name takes two arguments: - 1) an absolute path name for a directory. (*must* have a trailing "/"). - 2) an absolute path name for a file. - - It looks for a common directory prefix and generates a name for the - given file that is relative to the given directory. The result - might begin with a long sequence of "../"s, if the given names are - long but have a short common prefix. - - (Note: If the the result of relative_file_name is appended to its - directory argument and passed to span_file_name, span_file_name's - result should match relative_file_name's file name argument.) - - Examples: - dir arg return value - /x/y/z/ /x/y/q/file ../q/file - /x/y/z/ /q/t/p/file ../../../q/t/p/file - /x/y/z/ /x/y/z/file file */ - -char const * -relative_file_name (char const *dir_name, char const *file_name) -{ - static char file_name_buffer[MAXPATHLEN]; - char *bp = file_name_buffer; - - while (*file_name && *file_name++ == *dir_name++) - ; - while (*--dir_name != '/') - ; - dir_name++; - while (*--file_name != '/') - ; - file_name++; - /* file_name and dir_name now point past their common directory prefix */ - - /* copy "../" into the buffer for each component of the directory - that remains. */ - - while (*dir_name) - { - if (*dir_name++ == '/') - { - strcpy (bp, "../"); - bp += 3; - } - } - - strcpy (bp, file_name); - return file_name_buffer; -} - -/* span_file_name accepts a canonical directory name and a file name - and returns a canonical path to the file name relative to the - directory. If the file name is absolute, then the directory is - ignored. */ - -char const * -span_file_name (char const *dir_name, char const *file_name) -{ - char *fnp; - static char file_name_buffer[MAXPATHLEN]; - - strcpy (file_name_buffer, dir_name); - fnp = file_name_buffer + strlen (file_name_buffer); - *fnp++ = '/'; - strcpy (fnp, file_name); - canonical_name (fnp); - /* If it is an absolute name, just return it */ - if (*fnp == '/') - return fnp; - /* otherwise, combine the names to canonical form */ - canonical_name (file_name_buffer); - return file_name_buffer; -} - -/* root_name strips off the directory prefix and one suffix. If there - is neither prefix nor suffix, (i.e., "/"), it returns the empty - string. */ - -char const * -root_name (char const *path) -{ - static char file_name_buffer[MAXPATHLEN]; - char const *root; - char const *dot; - - root = strrchr (path, '/'); - if (root == NULL) - root = path; - else - root++; - - dot = strrchr (root, '.'); - if (dot == NULL) - strcpy (file_name_buffer, root); - else - { - strncpy (file_name_buffer, root, dot - root); - file_name_buffer[dot - root] = '\0'; - } - return file_name_buffer; -} - -/* suff_name returns the suffix (including the dot), or the - empty-string if there is none. */ - -char const * -suff_name (char const *path) -{ - char const *dot; - - dot = strrchr (path, '.'); - if (dot == NULL) - return ""; - return dot; -} - -/* Return non-zero if the two stat bufs refer to the same file or - directory */ - -static int -same_link (struct stat *x, struct stat *y) -{ - return ((x->st_ino == y->st_ino) && (x->st_dev == y->st_dev)); -} - -/* find_id_file adds "../"s to the beginning of a file name until it - finds the one that really exists. If the file name starts with - "/", just return it as is. If we fail for any reason, report the - error and exit. */ - -char const * -find_id_file (char const *arg) -{ - static char file_name_buffer[MAXPATHLEN]; - char *name; - char *dir_end; - struct stat root_buf; - struct stat stat_buf; - - if (arg[0] == '/') - return arg; - if (stat (arg, &stat_buf) == 0) - return arg; - - name = &file_name_buffer[sizeof (file_name_buffer) - strlen (arg) - 1]; - strcpy (name, arg); - dir_end = name - 1; - - if (stat ("/", &root_buf) < 0) - { - error (1, errno, "Can't stat `/'"); - return NULL; - } - do - { - *--name = '/'; - *--name = '.'; - *--name = '.'; - if (stat (name, &stat_buf) == 0) - return name; - *dir_end = '\0'; - if (stat (name, &stat_buf) < 0) - return NULL; - *dir_end = '/'; - } - while (name >= &file_name_buffer[3] && !same_link(&stat_buf, &root_buf)); - error (1, errno, "Can't stat `%s' anywhere between here and `/'", arg); - return NULL; -} - -/* define special name components */ - -static char slash[] = "/"; -static char dot[] = "."; -static char dotdot[] = ".."; - -/* nextc points to the next character to look at in the string or is - * null if the end of string was reached. - * - * namep points to buffer that holds the components. - */ -static char const *nextc = NULL; -static char *namep; - -/* lex_name - Return next name component. Uses global variables initialized - * by canonical_name to figure out what it is scanning. - */ -static char const * -lex_name (void) -{ - char c; - char const *d; - - if (nextc == NULL) - return NULL; - - c = *nextc++; - if (c == '\0') - { - nextc = NULL; - return NULL; - } - if (c == '/') - return slash; - if (c == '.') - { - if ((*nextc == '/') || (*nextc == '\0')) - return dot; - if (*nextc == '.' && (*(nextc + 1) == '/' || *(nextc + 1) == '\0')) - { - ++nextc; - return dotdot; - } - } - d = namep; - *namep++ = c; - while ((c = *nextc) != '/') - { - *namep++ = c; - if (c == '\0') - { - nextc = NULL; - return d; - } - ++nextc; - } - *namep++ = '\0'; - return d; -} - -/* canonical_name puts a file name in canonical form. It looks for all - the whacky wonderful things a demented *ni* programmer might put in - a file name and reduces the name to canonical form. */ - -static void -canonical_name (char *file_name) -{ - char const *components[1024]; - char const **cap = components; - char const **cad; - char const *cp; - char name_buf[2048]; - char const *s; - - /* initialize scanner */ - nextc = file_name; - namep = name_buf; - - while ((cp = lex_name ())) - *cap++ = cp; - if (cap == components) - return; - *cap = NULL; - - /* remove all trailing slashes and dots */ - while ((--cap != components) && - ((*cap == slash) || (*cap == dot))) - *cap = NULL; - - /* squeeze out all "./" sequences */ - cad = cap = components; - while (*cap) - { - if ((*cap == dot) && (*(cap + 1) == slash)) - cap += 2; - else - *cad++ = *cap++; - } - *cad++ = NULL; - - /* find multiple // and use last slash as root, except on apollo which - apparently actually uses // in real file names (don't ask me why). */ -#ifndef apollo - s = NULL; - cad = cap = components; - while (*cap) - { - if ((s == slash) && (*cap == slash)) - cad = components; - s = *cap++; - *cad++ = s; - } - *cad = NULL; -#endif - - /* if this is absolute name get rid of any /.. at beginning */ - if ((components[0] == slash) && (components[1] == dotdot)) - { - cad = cap = &components[1]; - while (*cap == dotdot) - { - ++cap; - if (*cap == NULL) - break; - if (*cap == slash) - ++cap; - } - while (*cap) - *cad++ = *cap++; - *cad = NULL; - } - - /* squeeze out any name/.. sequences (but leave leading ../..) */ - cap = components; - cad = cap; - while (*cap) - { - if ((*cap == dotdot) && ((cad - 2) >= components) && (*(cad - 2) != dotdot)) - { - cad -= 2; - ++cap; - if (*cap) - ++cap; - } - else - *cad++ = *cap++; - } - /* squeezing out a trailing /.. can leave unsightly trailing /s */ - if ((cad >= &components[2]) && ((*(cad - 1)) == slash)) - --cad; - *cad = NULL; - /* if it was just name/.. it now becomes . */ - if (components[0] == NULL) - { - components[0] = dot; - components[1] = NULL; - } - - /* re-assemble components */ - cap = components; - while ((s = *cap++)) - { - while (*s) - *file_name++ = *s++; - } - *file_name++ = '\0'; -} - -/* get_PWD is an optimized getwd(3) or getcwd(3) that takes advantage - of the shell's $PWD environment-variable, if present. This is - particularly worth doing on NFS mounted filesystems. */ - -char const * -get_PWD (char *pwd_buf) -{ - struct stat pwd_stat; - struct stat dot_stat; - char *pwd = getenv ("PWD"); - - if (pwd) - { - pwd = strcpy (pwd_buf, pwd); - if (pwd[0] != '/' - || stat (".", &dot_stat) < 0 - || stat (pwd, &pwd_stat) < 0 - || !same_link(&pwd_stat, &dot_stat) -#ifdef S_IFLNK - || !unsymlink (pwd) - || pwd[0] != '/' - || stat (pwd, &pwd_stat) < 0 - || !same_link(&pwd_stat, &dot_stat) -#endif - ) - pwd = 0; - } - - if (pwd == 0) - { - /* Oh well, something did not work out right, so do it the hard way... */ -#if HAVE_GETCWD - pwd = getcwd (pwd_buf, MAXPATHLEN); -#else -#if HAVE_GETWD - pwd = getwd (pwd_buf); -#endif -#endif - } - if (pwd) - strcat (pwd, "/"); - else - error (1, errno, "Can't determine current working directory!"); - - return pwd; -} - -#ifdef S_IFLNK - -/* unsymlink resolves all symbolic links in a file name into hard - links. If successful, it returns its argument and transforms - the file name in situ. If unsuccessful, it returns NULL, and leaves - the argument untouched. */ - -static char const * -unsymlink (char *file_name_buf) -{ - char new_buf[MAXPATHLEN]; - char part_buf[MAXPATHLEN]; - char link_buf[MAXPATHLEN]; - char const *s; - char *d; - char *lastcomp; - struct stat stat_buf; - - strcpy (new_buf, file_name_buf); - - /* Now loop, lstating each component to see if it is a symbolic - link. For symbolic link components, use readlink() to get the - real name, put the read link name in place of the last component, - and start again. */ - - canonical_name (new_buf); - s = new_buf; - d = part_buf; - if (*s == '/') - *d++ = *s++; - lastcomp = d; - for (;;) - { - if ((*s == '/') || (*s == '\0')) - { - /* we have a complete component name in partname, check it out */ - *d = '\0'; - if (lstat (part_buf, &stat_buf) < 0) - return NULL; - if ((stat_buf.st_mode & S_IFMT) == S_IFLNK) - { - /* This much of name is a symbolic link, do a readlink - and tack the bits and pieces together */ - int link_size = readlink (part_buf, link_buf, MAXPATHLEN); - if (link_size < 0) - return NULL; - link_buf[link_size] = '\0'; - strcpy (lastcomp, link_buf); - lastcomp += link_size; - strcpy (lastcomp, s); - strcpy (new_buf, part_buf); - canonical_name (new_buf); - s = new_buf; - d = part_buf; - if (*s == '/') - *d++ = *s++; - lastcomp = d; - } - else - { - /* Not a symlink, just keep scanning to next component */ - if (*s == '\0') - break; - *d++ = *s++; - lastcomp = d; - } - } - else - { - *d++ = *s++; - } - } - strcpy (file_name_buf, new_buf); - return file_name_buf; -} - -#endif - -FILE * -open_source_FILE (char *file_name, char const *filter) -{ - FILE *source_FILE; - - if (filter) - { - char command[1024]; - sprintf (command, filter, file_name); - source_FILE = popen (command, "r"); - } - else - source_FILE = fopen (file_name, "r"); - if (source_FILE == NULL) - filerr ("open", file_name); - return source_FILE; -} - -void -close_source_FILE (FILE *fp, char const *filter) -{ - if (filter) - pclose (fp); - else - fclose (fp); -} diff --git a/filenames.h b/filenames.h deleted file mode 100644 index 6d5b9f2..0000000 --- a/filenames.h +++ /dev/null @@ -1,36 +0,0 @@ -/* filenames.h -- defs for interface to filenames.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _filenames_h_ -#define _filenames_h_ - -#include -#ifndef MAXPATHLEN -#define MAXPATHLEN 1024 -#endif - -char const *relative_file_name __P((char const *dir_name, char const *file_name)); -char const *span_file_name __P((char const *dir, char const *arg)); -char const *root_name __P((char const *path)); -char const *suff_name __P((char const *path)); -char const *find_id_file __P((char const *arg)); -char const *get_PWD __P((char *pathname)); -FILE *open_source_FILE __P((char *file_name, char const *filter)); -void close_source_FILE __P((FILE *fp, char const *filter)); - -#endif /* not _filenames_h_ */ diff --git a/getopt.c b/getopt.c deleted file mode 100644 index 43c0a6a..0000000 --- a/getopt.c +++ /dev/null @@ -1,748 +0,0 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu - before changing it! - - Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* This tells Alpha OSF/1 not to define a getopt prototype in . - Ditto for AIX 3.2 and . */ -#ifndef _NO_PROTO -#define _NO_PROTO -#endif - -#ifdef HAVE_CONFIG_H -#include -#endif - -#if !defined (__STDC__) || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -#ifndef const -#define const -#endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -/* Don't include stdlib.h for non-GNU C libraries because some of them - contain conflicting prototypes for getopt. */ -#include -#endif /* GNU C library. */ - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable POSIXLY_CORRECT disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg = NULL; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns EOF, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* XXX 1003.2 says this must be 1 before any call. */ -int optind = 0; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters. - - PERMUTE is the default. We permute the contents of ARGV as we scan, - so that eventually all the non-options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code 1. - Using `-' as the first character of the list of option characters - selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return EOF with `optind' != ARGC. */ - -static enum -{ - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER -} ordering; - -/* Value of POSIXLY_CORRECT environment variable. */ -static char *posixly_correct; - -#ifdef __GNU_LIBRARY__ -/* We want to avoid inclusion of string.h with non-GNU libraries - because there are many ways it can cause trouble. - On some systems, it contains special magic macros that don't work - in GCC. */ -#include -#define my_index strchr -#else - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -char *getenv (); - -static char * -my_index (str, chr) - const char *str; - int chr; -{ - while (*str) - { - if (*str == chr) - return (char *) str; - str++; - } - return 0; -} - -/* If using GCC, we can safely declare strlen this way. - If not using GCC, it is ok not to declare it. */ -#ifdef __GNUC__ -/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. - That was relevant to code that was here before. */ -#if !defined (__STDC__) || !__STDC__ -/* gcc with -traditional declares the built-in strlen to return int, - and has done so at least since version 2.4.5. -- rms. */ -extern int strlen (const char *); -#endif /* not __STDC__ */ -#endif /* __GNUC__ */ - -#endif /* not __GNU_LIBRARY__ */ - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -static void -exchange (argv) - char **argv; -{ - int bottom = first_nonopt; - int middle = last_nonopt; - int top = optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Initialize the internal data when the first call is made. */ - -static const char * -_getopt_initialize (optstring) - const char *optstring; -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - first_nonopt = last_nonopt = optind = 1; - - nextchar = NULL; - - posixly_correct = getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (posixly_correct != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns `EOF'. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - The elements of ARGV aren't really const, because we permute them. - But we pretend they're const in the prototype to be compatible - with other systems. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. */ - -int -_getopt_internal (argc, argv, optstring, longopts, longind, long_only) - int argc; - char *const *argv; - const char *optstring; - const struct option *longopts; - int *longind; - int long_only; -{ - optarg = NULL; - - if (optind == 0) - optstring = _getopt_initialize (optstring); - - if (nextchar == NULL || *nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc - && (argv[optind][0] != '-' || argv[optind][1] == '\0')) - optind++; - last_nonopt = optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return EOF; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if ((argv[optind][0] != '-' || argv[optind][1] == '\0')) - { - if (ordering == REQUIRE_ORDER) - return EOF; - optarg = argv[optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - nextchar = (argv[optind] + 1 - + (longopts != NULL && argv[optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[optind][1] == '-' - || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound; - int option_index; - - for (nameend = nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if (nameend - nextchar == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (opterr) - fprintf (stderr, "%s: option `%s' is ambiguous\n", - argv[0], argv[optind]); - nextchar += strlen (nextchar); - optind++; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (opterr) - { - if (argv[optind - 1][1] == '-') - /* --option */ - fprintf (stderr, - "%s: option `--%s' doesn't allow an argument\n", - argv[0], pfound->name); - else - /* +option or -option */ - fprintf (stderr, - "%s: option `%c%s' doesn't allow an argument\n", - argv[0], argv[optind - 1][0], pfound->name); - } - nextchar += strlen (nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (opterr) - fprintf (stderr, "%s: option `%s' requires an argument\n", - argv[0], argv[optind - 1]); - nextchar += strlen (nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[optind][1] == '-' - || my_index (optstring, *nextchar) == NULL) - { - if (opterr) - { - if (argv[optind][1] == '-') - /* --option */ - fprintf (stderr, "%s: unrecognized option `--%s'\n", - argv[0], nextchar); - else - /* +option or -option */ - fprintf (stderr, "%s: unrecognized option `%c%s'\n", - argv[0], argv[optind][0], nextchar); - } - nextchar = (char *) ""; - optind++; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *nextchar++; - char *temp = my_index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == '\0') - ++optind; - - if (temp == NULL || c == ':') - { - if (opterr) - { - if (posixly_correct) - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); - else - fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c); - } - optopt = c; - return '?'; - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != '\0') - { - optarg = nextchar; - optind++; - } - else - optarg = NULL; - nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (opterr) - { - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: option requires an argument -- %c\n", - argv[0], c); - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = NULL; - } - } - return c; - } -} - -int -getopt (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0); -} - -#endif /* _LIBC or not __GNU_LIBRARY__. */ - -#ifdef TEST - -/* Compile with -DTEST to make an executable for use in testing - the above definition of `getopt'. */ - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - - c = getopt (argc, argv, "abc:d:0123456789"); - if (c == EOF) - break; - - switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/getopt.h b/getopt.h deleted file mode 100644 index 4ac33b7..0000000 --- a/getopt.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Declarations for getopt. - Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef _GETOPT_H -#define _GETOPT_H 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns EOF, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ -#if defined (__STDC__) && __STDC__ - const char *name; -#else - char *name; -#endif - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -#define no_argument 0 -#define required_argument 1 -#define optional_argument 2 - -#if defined (__STDC__) && __STDC__ -#ifdef __GNU_LIBRARY__ -/* Many other libraries have conflicting prototypes for getopt, with - differences in the consts, in stdlib.h. To avoid compilation - errors, only prototype getopt for the GNU C library. */ -extern int getopt (int argc, char *const *argv, const char *shortopts); -#else /* not __GNU_LIBRARY__ */ -extern int getopt (); -#endif /* __GNU_LIBRARY__ */ -extern int getopt_long (int argc, char *const *argv, const char *shortopts, - const struct option *longopts, int *longind); -extern int getopt_long_only (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind); - -/* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind, - int long_only); -#else /* not __STDC__ */ -extern int getopt (); -extern int getopt_long (); -extern int getopt_long_only (); - -extern int _getopt_internal (); -#endif /* __STDC__ */ - -#ifdef __cplusplus -} -#endif - -#endif /* _GETOPT_H */ diff --git a/getopt1.c b/getopt1.c deleted file mode 100644 index 4580211..0000000 --- a/getopt1.c +++ /dev/null @@ -1,180 +0,0 @@ -/* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987, 88, 89, 90, 91, 92, 1993, 1994 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "getopt.h" - -#if !defined (__STDC__) || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -#ifndef const -#define const -#endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -#include -#else -char *getenv (); -#endif - -#ifndef NULL -#define NULL 0 -#endif - -int -getopt_long (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 0); -} - -/* Like getopt_long, but '-' as well as '--' can indicate a long option. - If an option that starts with '-' (not '--') doesn't match a long option, - but does match a short option, it is parsed as a short option - instead. */ - -int -getopt_long_only (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 1); -} - - -#endif /* _LIBC or not __GNU_LIBRARY__. */ - -#ifdef TEST - -#include - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - int option_index = 0; - static struct option long_options[] = - { - {"add", 1, 0, 0}, - {"append", 0, 0, 0}, - {"delete", 1, 0, 0}, - {"verbose", 0, 0, 0}, - {"create", 0, 0, 0}, - {"file", 1, 0, 0}, - {0, 0, 0, 0} - }; - - c = getopt_long (argc, argv, "abc:d:0123456789", - long_options, &option_index); - if (c == EOF) - break; - - switch (c) - { - case 0: - printf ("option %s", long_options[option_index].name); - if (optarg) - printf (" with arg %s", optarg); - printf ("\n"); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case 'd': - printf ("option d with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/hash.h b/hash.h deleted file mode 100644 index e71657c..0000000 --- a/hash.h +++ /dev/null @@ -1,124 +0,0 @@ -/* hash.h -- decls for hash table - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _hash_h_ -#define _hash_h_ - -typedef unsigned long (*hash_t) __P((void const *key)); -typedef int (*hash_cmp_t) __P((void const *x, void const *y)); - -struct hash_table -{ - void **ht_vec; - unsigned long ht_size; /* total number of slots (power of 2) */ - unsigned long ht_capacity; /* usable slots, limited by loading-factor */ - unsigned long ht_fill; /* items in table */ - unsigned long ht_probes; /* number of comparisons */ - unsigned long ht_lookups; /* number of queries */ - unsigned int ht_rehashes; /* number of times we've expanded table */ - hash_t ht_hash_1; /* primary hash function */ - hash_t ht_hash_2; /* secondary hash function */ - hash_cmp_t ht_compare; /* comparison function */ -}; - -void hash_init __P((struct hash_table* ht, long size, - hash_t hash_1, hash_t hash_2, hash_cmp_t hash_cmp)); -void rehash __P((struct hash_table* ht)); -void **hash_lookup __P((struct hash_table* ht, void const *key)); - - -/* hash and comparison macros for string keys. */ - -#define STRING_HASH_1(_key_, _result_) { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - while (*++kk) \ - (_result_) += (*kk << (kk[1] & 0xf)); \ -} while (0) -#define return_STRING_HASH_1(_key_) { \ - unsigned long result = 0; \ - STRING_HASH_1 ((_key_), result); \ - return result; \ -} while (0) - -#define STRING_HASH_2(_key_, _result_) { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - while (*++kk) \ - (_result_) += (*kk << (kk[1] & 0x7)); \ -} while (0) -#define return_STRING_HASH_2(_key_) { \ - unsigned long result = 0; \ - STRING_HASH_2 ((_key_), result); \ - return result; \ -} while (0) - -#define STRING_COMPARE(_x_, _y_, _result_) { \ - unsigned char const *xx = (unsigned char const *) (_x_) - 1; \ - unsigned char const *yy = (unsigned char const *) (_y_) - 1; \ - do { \ - if (*++xx == '\0') { \ - yy++; \ - break; \ - } \ - } while (*xx == *++yy); \ - (_result_) = *xx - *yy; \ -} while (0) -#define return_STRING_COMPARE(_x_, _y_) { \ - int result; \ - STRING_COMPARE (_x_, _y_, result); \ - return result; \ -} while (0) - -/* hash and comparison macros for integer keys. */ - -#define INTEGER_HASH_1(_key_, _result_) { \ - (_result_) = ((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_1(_key_) { \ - unsigned long result = 0; \ - INTEGER_HASH_1 ((_key_), result); \ - return result; \ -} while (0) - -#define INTEGER_HASH_2(_key_, _result_) { \ - (_result_) = ~((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_2(_key_) { \ - unsigned long result = 0; \ - INTEGER_HASH_2 ((_key_), result); \ - return result; \ -} while (0) - -#define INTEGER_COMPARE(_x_, _y_, _result_) { \ - (_result_) = _x_ - _y_; \ -} while (0) -#define return_INTEGER_COMPARE(_x_, _y_) { \ - int result; \ - INTEGER_COMPARE (_x_, _y_, result); \ - return result; \ -} while (0) - -/* hash and comparison macros for address keys. */ - -#define ADDRESS_HASH_1(_key_, _result_) INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3, (_result_)) -#define ADDRESS_HASH_2(_key_, _result_) INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3, (_result_)) -#define ADDRESS_COMPARE(_x_, _y_, _result_) INTEGER_COMPARE ((_x_), (_y_), (_result_)) -#define return_ADDRESS_HASH_1(_key_) return_INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3) -#define return_ADDRESS_HASH_2(_key_) return_INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3) -#define return_ADDRESS_COMPARE(_x_, _y_) return_INTEGER_COMPARE ((_x_), (_y_)) - -#endif /* not _hash_h_ */ diff --git a/id.info b/id.info deleted file mode 100644 index 08834b6..0000000 --- a/id.info +++ /dev/null @@ -1,1433 +0,0 @@ -This is Info file id.info, produced by Makeinfo-1.55 from the input -file id.texinfo. - -START-INFO-DIR-ENTRY -* ID database: (id). Identifier database utilities. -* aid: (id)aid invocation:: Matching strings. -* eid: (id)eid invocation:: Invoking an editor on matches. -* fid: (id)fid invocation:: Listing a file's identifiers. -* gid: (id)gid invocation:: Listing all matching lines. -* idx: (id)idx invocation:: Testing mkid scanners. -* iid: (id)iid invocation:: Interactive complex queries. -* lid: (id)lid invocation:: Matching patterns. -* mkid: (id)mkid invocation:: Creating an ID database. -* pid: (id)pid invocation:: Looking up filenames. -END-INFO-DIR-ENTRY - - This file documents the `mkid' identifier database utilities. - - Copyright (C) 1991, 1995 Tom Horsley. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation. - - -File: id.info, Node: Top, Next: Introduction, Prev: (DIR), Up: (DIR) - -ID database utilities -********************* - - This manual documents version 3.0.9 of the ID database utilities. - -* Menu: - -* Introduction:: Overview of the tools, and authors. -* mkid invocation:: Creating an ID database. -* Common query arguments:: Common lookup options and search patterns. -* gid invocation:: Listing all matching lines. -* Looking up identifiers:: lid, aid, eid, and fid. -* pid invocation:: Looking up filenames. -* iid invocation:: Interactive and complex queries. -* Index:: General index. - - -File: id.info, Node: Introduction, Next: mkid invocation, Prev: Top, Up: Top - -Introduction -************ - - An "ID database" is a binary file containing a list of filenames, a -list of identifiers, and a matrix indicating which identifiers appear in -which files. With this database and some tools to manipulate it -(described in this manual), a host of tasks become simpler and faster. -For example, you can list all files containing a particular `#include' -throughout a huge source hierarchy, search for all the memos containing -references to a project, or automatically invoke an editor on all files -containing references to some function. Anyone with a large software -project to maintain, or a large set of text files to organize, can -benefit from an ID database. - - Although the ID utilities are most commonly used with identifiers, -numeric constants are also stored in the database, and can be searched -for in the same way (independent of radix, if desired). - - There are a number of programs in the ID family: - -`mkid' - scans files for identifiers and numeric constants and builds the ID - database file. - -`gid' - lists all lines that match given patterns. - -`lid' - lists the filenames containing identifiers that match given - patterns. - -`aid' - lists the filenames containing identifiers that contain given - strings, independent of case. - -`eid' - invokes an editor on each file containing identifiers that match - given patterns. - -`fid' - lists all identifiers recorded in the database for given files, or - identifiers common to two files. - -`pid' - matches the filenames in the database, rather than the identifiers. - -`iid' - interactively supports more complex queries, such as intersection - and union. - -`idx' - helps with testing of new `mkid' scanners. - - Please report bugs to `gkm@magilla.cichlid.com'. Remember to -include the version number, machine architecture, input files, and any -other information needed to reproduce the bug: your input, what you -expected, what you got, and why it is wrong. Diffs are welcome, but -please include a description of the problem as well, since this is -sometimes difficult to infer. *Note Bugs: (gcc)Bugs. - -* Menu: - -* Past and future:: How the ID tools came about, and where they're going. - - -File: id.info, Node: Past and future, Up: Introduction - -Past and future -=============== - - Greg McGary conceived of the ideas behind mkid when he began hacking -the Unix kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first `mkid'-like -tools were shell scripts, and produced an ASCII database that looks much -like the output of `lid' with no arguments. It took over an hour on a -VAX 11/750 to build a database for a 4.1BSD-ish kernel. Lookups were -done with the system utility `look', modified to handle very long lines. - - In 1986, Greg rewrote `mkid', `lid', `fid' and `idx' in C to improve -performance. Database-build times were shortened by an order of -magnitude. The `mkid' tools were first posted to `comp.sources.unix' -in September 1987. - - Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the `iid' program. A first release of `mkid' -version 2 was posted to `alt.sources' near the end of 1990. At that -time, Tom wrote this Texinfo manual with the encouragement the net -community. (Tom especially thanks Doug Scofield and Bill Leonard whom -he dragooned into helping poorfraed and edit--they found several -problems in the initial version.) Karl Berry revamped the manual for -Texinfo style, indexing, and organization in 1995. - - In January 1995, Greg McGary reemerged as the primary maintaner and -launched development of `mkid' version 3, whose primary new feature is -an efficient algorithm for building databases that is linear in both -time and space over the size of the input text. (The old algorithm was -quadratic in space and therefore choked on very large source trees.) -The code is released under the GNU Public License, and might become a -part of the GNU system. `mkid' 3 is an interim release, since several -significant enhancements are still in the works: an optional coupling -with GNU `grep', so that `grep' can use an ID database for hints; a -`cscope' work-alike query interface; incremental update of the ID -database; and an automatic file-tree walker so you need not explicitly -supply every filename argument to the `mkid' program. - - -File: id.info, Node: mkid invocation, Next: Common query arguments, Prev: Introduction, Up: Top - -`mkid': Creating ID databases -***************************** - - The `mkid' program builds an ID database. To do this, it must scan -each file you tell it to include in the database. This takes some time, -but once the work is done the query programs run very rapidly. (You can -run `mkid' as a `cron' job to regularly update your databases.) - - The `mkid' program knows how to extract identifiers from various -types of files. For example, it can recognize and skip over comments -and string constants in a C program. - - Identifiers are not the only thing included in the database. Numbers -are also recognized and included in the database indexed by their binary -value. This feature allows you to find uses of constants without regard -to the radix used to specify them, since the same number can frequently -be written in many different ways (for instance, `47', `0x2f', `057' in -C). - - All the places in this document which mention identifiers should -really mention both identifiers and numbers, but that gets fairly -clumsy after a while, so you just need to keep in mind that numbers are -included in the database as well as identifiers. - - The ID files that `mkid' creates are architecture- and -byte-order-independent; you can share them at will across systems. - -* Menu: - -* mkid options:: Command-line options to mkid. -* Scanners:: Built-in and defining your own. -* mkid examples:: Examples of mkid usage. - - -File: id.info, Node: mkid options, Next: Scanners, Up: mkid invocation - -`mkid' options -============== - - By default, `mkid' scans the files you specify and writes the -database to a file named `ID' in the current directory. - - mkid [-v] [-SSCANARG] [-aARGFILE] [-] [-fIDFILE] FILES... - - The program accepts the following options. - -`-v' - Verbose. `mkid' tells you as it scans each file and indicates - which scanner it is using. It also summarizes some statistics - about the database at the end. - -`-SSCANARG' - Specify options regarding `mkid''s scanners. *Note Scanner option - formats::. - -`-aARGFILE' - Read additional command line arguments from ARGFILE. This is - typically used to specify lists of filenames longer than will fit - on a command line; some systems have severe limitations on the - total length of a command line. - -`-' - Read additional command line arguments from standard input. - -`-fIDFILE' - Write the database to the file IDFILE, instead of `ID'. The - database stores filenames relative to the directory containing the - database, so if you move the database to a different directory - after creating it, you may have trouble finding files. - - The remaining arguments FILES are the files to be scanned and -included in the database. If no files are given at all (either on -command line or via `-a' or `-'), `mkid' does nothing. - - -File: id.info, Node: Scanners, Next: mkid examples, Prev: mkid options, Up: mkid invocation - -Scanners -======== - - To determine which identifiers to extract from a file and store in -the database, `mkid' calls a "scanner"; we say a scanner "recognizes" a -particular language. Scanners for several languages are built-in to -`mkid'; you can add your own scanners as well, as explained in the -sections below. - - `mkid' determines which scanner to use for a particular file by -looking at the suffix of the filename. This "suffix" is everything -after and including the last `.' in a filename; for example, the suffix -of `foo.c' is `.c'. `mkid' has a built-in list of bindings from some -suffixes to corresponding scanners; for example, `.c' files are (not -surprisingly) scanned by the predefined C language scanner. - - If `mkid' cannot determine what scanner to use for a particular -file, either because the file has no suffix (e.g., `foo') or because -`mkid' has no binding for the file's suffix (e.g., `foo.bar'), it uses -the scanner bound to the `.default' suffix. By default, this is the -plain text scanner (*note Plain text scanner::.), but you can change -this with the `-S' option, as explained below. - -* Menu: - -* Scanner option formats:: Overview of the -S option. -* Predefined scanners:: The C, plain text, and assembler scanners. -* Defining new scanners:: Either in source code or at runtime with -S. -* idx invocation:: Testing mkid scanners. - - -File: id.info, Node: Scanner option formats, Next: Predefined scanners, Up: Scanners - -Scanner option formats ----------------------- - - With the `-S' option, you can change which language scanner to use -for which files, give language-specific options, and get some limited -online help about scanner options. - - Here are the different forms of the `-S' option: - -`-S.SUFFIX=SCANNER' - Use SCANNER for a file with the given `.SUFFIX'. For example, - `-S.yacc=c' tells `mkid' to use the `c' language scanner for all - files ending in `.yacc'. - -`-S.SUFFIX=?' - Display which scanner is used for the given `.SUFFIX'. - -`-S?=SCANNER' - Display which suffixes SCANNER is used for. - -`-S?=?' - Display the scanner binding for every known suffix. - -`-SSCANNER+ARG' -`-SSCANNER-ARG' - Each scanner accepts certain scanner-dependent arguments. These - options all have one of these forms. *Note Predefined scanners::. - -`-SSCANNER?' - Display the scanner-specific options accepted by SCANNER. - -`-SNEW-SCANNER/OLD-SCANNER/FILTER-COMMAND' - Define NEW-SCANNER in terms of OLD-SCANNER and FILTER-COMMAND. - *Note Defining scanners with options::. - - -File: id.info, Node: Predefined scanners, Next: Defining new scanners, Prev: Scanner option formats, Up: Scanners - -Predefined scanners -------------------- - - `mkid' has built-in scanners for several types of languages; you can -get the list by running `mkid -S?=?'. The supported languages are -documented below(1). - -* Menu: - -* C scanner:: For the C programming language. -* Plain text scanner:: For documents or other non-source code. -* Assembler scanner:: For assembly language. - - ---------- Footnotes ---------- - - (1) This is not strictly true: `vhil' is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored. - - -File: id.info, Node: C scanner, Next: Plain text scanner, Up: Predefined scanners - -C scanner -......... - - The C scanner is the most commonly used. Files with the usual `.c' -and `.h' suffixes, and the `.y' (yacc) and `.l' (lex) suffixes, are -processed with this scanner (by default). - - Scanner-specific options: - -`-Sc-sCHARACTER' - Allow the specified CHARACTER in identifiers. For example, if you - use `$' in identifiers, you'll want to use `-Sc-s$'. - -`-Sc+u' - Strip leading underscores from identifiers. You might to do this in - peculiar circumstances, such as trying to parse the output from - `nm' or some other system utility. - -`-Sc-u' - Don't strip leading underscores from identifiers; this is the - default. - - -File: id.info, Node: Plain text scanner, Next: Assembler scanner, Prev: C scanner, Up: Predefined scanners - -Plain text scanner -.................. - - The plain text scanner is intended for scanning most non-source-code -files. This is typically the scanner used when adding custom scanners -via `-S' (*note Defining scanners with options::.). - - Scanner-specific options: - -`-Stext+aCHARACTER' - Include CHARACTER in identifiers. By default, letters (a-z and - A-Z) and underscore are included. - -`-Stext-aCHARACTER' - Exclude CHARACTER from identifiers. - -`-Stext+sCHARACTER' - Squeeze CHARACTER from identifiers, i.e., do not terminate an - identifier when CHARACTER is seen. By default, the characters - `'', `-', and `.' are squeezed out of identifiers. For example, - the input `fred's' leads to the identifier `freds'. - -`-Stext-sCHARACTER' - Do not squeeze CHARACTER. - - -File: id.info, Node: Assembler scanner, Prev: Plain text scanner, Up: Predefined scanners - -Assembler scanner -................. - - Since assembly languages come in several flavors, this scanner has a -number of options: - -`-Sasm-cCHARACTER' - Define CHARACTER as starting a comment that extends to the end of - the input line; no default. In many assemblers this is `;' or `#'. - -`-Sasm+u' -`-Sasm-u' - Strip (`+u') or do not strip (`-u') leading underscores from - identifiers. The default is to strip them. - -`-Sasm+aCHARACTER' - Allow CHARACTER in identifiers. - -`-Sasm-aCHARACTER' - Allow CHARACTER in identifiers, but if an identifier contains - CHARACTER, ignore it. This is useful to ignore temporary labels, - which can be generated in great profusion; these often contain `.' - or `@'. - -`-Sasm+p' -`-Sasm-p' - Recognize (`+p') or do not recognize (`-p') C preprocessor - directives in assembler source. The default is to recognize them. - -`-Sasm+C' -`-Sasm-C' - Skip over (`+C') or do not skip over (`-C') C style comments in - assembler source. The default is to skip them. - - -File: id.info, Node: Defining new scanners, Next: idx invocation, Prev: Predefined scanners, Up: Scanners - -Defining new scanners ---------------------- - - You can add new scanners to `mkid' in two ways: modify the source -code and recompile, or at runtime via the `-S' option. Each has their -advantages and disadvantages, as explained below. - - If you create a new scanner that would be of use to others, please -consider sending it back to the maintainer, `gkm@magilla.cichlid.com', -for inclusion in future releases of `mkid'. - -* Menu: - -* Defining scanners in source code:: -* Defining scanners with options:: - - -File: id.info, Node: Defining scanners in source code, Next: Defining scanners with options, Up: Defining new scanners - -Defining scanners in source code -................................ - - To add a new scanner in source code, you should add a new section to -the file `scanners.c'. Copy one of the existing scanners (most likely -either C or plain text), and modify as necessary. Also add the new -scanner to the `languages_0' and `suffixes_0' tables near the beginning -of the file. - - This is not a terribly difficult programming task, but it requires -recompiling and installing the new version of `mkid', which may be -inconvenient. - - This method leads to scanners which operate much more quickly than -ones that depend on external programmers. It is also likely the -easiest way to define scanners for new programming languages. - - -File: id.info, Node: Defining scanners with options, Prev: Defining scanners in source code, Up: Defining new scanners - -Defining scanners with options -.............................. - - You can use the `-S' option on the command line to define a new -language scanner: - - -SNEW-SCANNER/EXISTING-SCANNER/FILTER - -Here, NEW-SCANNER is the name of the new scanner being defined, -EXISTING-SCANNER is the name of an existing scanner, and FILTER is a -shell command or pipeline. - - The new scanner works by passing the input file to FILTER, and then -arranging for the result to be passed through EXISTING-SCANNER. -Typically, EXISTING-SCANNER is `text'. - - Somewhere within FILTER, the string`%s' should occur. This `%s' is -replaced by the name of the source file being scanned. - - For example, `mkid' has no built-in scanner for Texinfo files (like -this one). In indexing a Texinfo file, you most likely would want to -ignore the Texinfo @-commands. Here's one way to specify a new scanner -to do this: - - -S/texinfo/text/sed s,@[a-z]*,,g %s - - This defines a new language scanner (`texinfo') defined in terms of -a `sed' command to strip out Texinfo directives (an `@' character -followed by letters). Once the directives are stripped, the remaining -text is run through the plain text scanner. - - This is a minimal example; to do a complete job, you would need to -completely delete some lines, such as those beginning with `@end' or -@node. - - -File: id.info, Node: idx invocation, Prev: Defining new scanners, Up: Scanners - -`idx': Testing `mkid' scanners ------------------------------- - - `idx' prints the identifiers found in the files you specify to -standard output. This is useful in debugging new `mkid' scanners (*note -Scanners::.). Synopsis: - - idx [-SSCANARG] FILES... - - `idx' accepts the same `-S' options as `mkid'. *Note Scanner option -formats::. - - The name "idx" stands for "ID eXtract". The name may change in -future releases, since this is such an infrequently used program. - - -File: id.info, Node: mkid examples, Prev: Scanners, Up: mkid invocation - -`mkid' examples -=============== - - The simplest example of `mkid' is something like: - - mkid *.[chy] - - This will build an ID database indexing identifiers and numbers in -the all the `.c', `.h', and `.y' files in the current directory. -Because `mkid' already knows how to scan files with those suffixes, no -additional options are needed. - - Here's a more complex example. Suppose you want to build a database -indexing the contents of all the `man' pages, and furthur suppose that -your system is using `gzip' (*note Top: (gzip)Top.) to store compressed -`cat' versions of the `man' pages in the directory `/usr/catman'. The -`gzip' program creates files with a `.gz' suffix, so you must tell -`mkid' how to scan `.gz' files. Here are the commands to do the job: - - cd /usr/catman - find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - - -Explanation: - - 1. We first `cd' to `/usr/catman' so the ID database will store the - correct relative filenames. - - 2. The `find' command prints the names of all `.gz' files under the - current directory. *Note find invocation: (sh-utils)find - invocation. - - 3. This list is piped to `mkid'; the `-' option (at the end of the - line) tells `mkid' to read arguments (in this case, as is typical, - the list of filenames) from standard input. *Note mkid options::. - - 4. The `-Sman/text/gzip ...' defines a new language `man' in terms of - the `gzip' program and `mkid''s existing text scanner. *Note - Defining scanners with options::. - - 5. The `-S.gz=man' tells `mkid' to treat all `.gz' files as this new - language `man'. *Note Scanner option formats::. - - - As a further complication, `cat' pages typically contain underlining -and backspace sequences, which will confuse `mkid'. To handle this, -the `gzip' command becomes a pipeline, like this: - - mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - - - -File: id.info, Node: Common query arguments, Next: gid invocation, Prev: mkid invocation, Up: Top - -Common query arguments -********************** - - Certain options, and regular expression syntax, are shared by the ID -query tools. So we describe those things in the sections below, instead -of repeating the description for each tool. - -* Menu: - -* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. -* Patterns:: Regular expression syntax for searches. -* Examples: Query examples. Some common uses. - - -File: id.info, Node: Query options, Next: Patterns, Up: Common query arguments - -Query options -============= - - The ID query tools (*not* `mkid') share certain command line -options. Not all of these options are recognized by all programs, but -if an option is used by more than one program, it is described below. -The description of each program gives the options that program uses. - -`-fIDFILE' - Read the database from IDFILE, in the current directory or in any - directory above the current directory. The default database name - is `ID'. Searching parent directories lets you have a single ID - database at the root of a large source tree and then use the query - tools from anywhere within that tree. - -`-rDIRECTORY' - Find files relative to DIRECTORY, instead of the directory in - which the ID database was found. This is useful if the ID - database was moved after its creation. - -`-c' - Equivalent to `-r`pwd`', i.e., find files relative to the current - directory, instead of the directory in which the ID database was - found. - -`-e' -`-w' - `-e' forces pattern arguments to be treated as regular expressions, - and `-w' forces pattern arguments to be treated as constant - strings. By default, the query tools guess whether a pattern is - regular expressions or constant strings by looking for special - characters. *Note Patterns::. - -`-k' -`-g' - `-k' suppresses use of shell brace notation in the output. By - default, the query tools that generate lists of filenames attempt - to compress the lists using the usual shell brace notation, e.g., - `{foo,bar}.c' to mean `foo.c' and `bar.c'. (This is useful if you - use `ksh' or the original (not GNU) `sh' and want to feed the list - of names to another command, since those shells do not support - this brace notation; the name of the `-k' option comes from the - `k' in `ksh'). - - `-g' turns on use of brace notation; this is only needed if the - query tools were compiled with `-k' as the default behavior. - -`-n' - Suppress the matching identifier before each list of filenames - that the query tools output by default. This is useful if you want - a list of just the names to feed to another command. - -`-d' -`-o' -`-x' -`-a' - These options may be used in any combination to specify the radix - of numeric matches. `-d' allows matching on decimal numbers, `-o' - on octal numbers, and `-x' on hexadecimal numbers. The `-a' - option is equivalent to specifying all three; this is the default. - Any combination of these options may be used. - -`-m' - Merge multiple lines of output into a single line. If your query - matches more than one identifier, the default is to generate a - separate line of output for each matching identifier. - -`-F-' -`-FN' -`-F-M' -`-FN-M' - Show identifiers matching at least N and at most M times. `-F-' - is equivalent to `-F1', i.e., find identifiers that appear only - once in the database. (This is useful to locate identifiers that - are defined but never used, or used once and never defined.) - -`-uNUMBER' - List identifiers that conflict in the first NUMBER characters. - This could be in useful porting programs to brain-dead computers - that refuse to support long identifiers, but your best long term - option is to set such computers on fire. - - -File: id.info, Node: Patterns, Next: Query examples, Prev: Query options, Up: Common query arguments - -Patterns -======== - - "Patterns", also called "regular expressions", allow you to match -many different identifiers in a single query. - - The same regular expression syntax is recognized by all the query -tools that handle regular expressions. The exact syntax depends on how -the ID tools were compiled, but the following constructs should always -be supported: - -`.' - Match any single character. - -`[CHARS]' - Match any of the characters specified within the brackets. You can - match any characters *except* the ones in brackets by typing `^' - as the first character. A range of characters can be specified - using `-'. For example, `[abc]' and `[a-c]' both match `a', `b', - or `c', and `[^abc]' matches anything *except* `a', `b', or `c'. - -`*' - Match the previous construct zero or more times. - -`^' -`$' - `^' (`$') at the beginning (end) of a pattern anchors the match to - the first (last) character of the identifier. - - The query programs use either the `regex'/`regcmp' or -`re_comp'/`re_exec' functions, depending on which are available in the -library on your system. These do not always support the exact same -regular expression syntax, so consult your local `man' pages to find -out. - - -File: id.info, Node: Query examples, Prev: Patterns, Up: Common query arguments - -Query examples -============== - - Here are some examples of the options described in the previous -sections. - - To restrict searches to exact matches, use `^...$'. For example: - - prompt$ gid '^FILE$' - ansi2knr.c:144: { FILE *in, *out; - ansi2knr.c:315: FILE *out; - fid.c:38: FILE *id_FILE; - filenames.c:576: FILE * - ... - - To show identifiers not unique in the first 16 characters: - - prompt$ lid -u16 - RE_CONTEXT_INDEP_ANCHORS regex.c - RE_CONTEXT_INDEP_OPS regex.c - RE_SYNTAX_POSIX_BASIC regex.c - RE_SYNTAX_POSIX_EXTENDED regex.c - ... - - Numbers are searched for numerically rather than textually. For -example: - - prompt$ lid 0xff - 0377 {lid,regex}.c - 0xff {bitops,fid,lid,mkid}.c - 255 regex.c - - On the other hand, you can restrict a numeric search to a particular -radix if you want: - - laurie$ lid -x 0xff - 0xff {bitops,fid,lid,mkid}.c - - Filenames in the output are always adjusted to be correct for the -correct working directory. For example: - - prompt$ lid bdevsw - bdevsw sys/conf.h cf/conf.c io/bio.c os/{fio,main,prf,sys3}.c - prompt$ cd io - prompt$ lid bdevsw - bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/{fio,main,prf,sys3}.c - - -File: id.info, Node: gid invocation, Next: Looking up identifiers, Prev: Common query arguments, Up: Top - -`gid': Listing matching lines -***************************** - - Synopsis: - - gid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN...] - - `gid' finds the identifiers in the database that match the specified -PATTERNs, then searches for all occurrences of those identifiers, in -only the files containing matches. In a large source tree, this saves -an enormous amount of time (compared to searching every source file). - - With no PATTERN arguments, `gid' prints every line of every source -file. - - The name "gid" stands for "grep for identifiers", `grep' being the -standard utility to search regular files. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - `gid' uses the standard GNU output format for identifying source -lines: - - FILENAME:LINENUM: TEXT - - Here is an example: - - prompt$ gid FILE - ansi2knr.c:144: { FILE *in, *out; - ansi2knr.c:315: FILE *out; - fid.c:38: FILE *id_FILE; - ... - -* Menu: - -* GNU Emacs gid interface:: Using next-error with gid. - - -File: id.info, Node: GNU Emacs gid interface, Up: gid invocation - -GNU Emacs `gid' interface -========================= - - The `mkid' source distribution comes with a file `gid.el', which -defines a GNU Emacs interface to `gid'. To install it, put `gid.el' -somewhere that Emacs will find it (i.e., in your `load-path') and put - - (autoload 'gid "gid" nil t) - -in one of Emacs' initialization files, e.g., `~/.emacs'. You will then -be able to use `M-x gid' to run the command. - - The `gid' function prompts you with the word around point. If you -want to search for something else, simply delete the line and type the -pattern of interest. - - The function then runs the `gid' program in a `*compilation*' -buffer, so the normal `next-error' function can be used to visit all -the places the identifier is found (*note Compilation: -(emacs)Compilation.). - - -File: id.info, Node: Looking up identifiers, Next: pid invocation, Prev: gid invocation, Up: Top - -Looking up identifiers -********************** - - These commands look up identifiers in the ID database and operate on -the files containing matches. - -* Menu: - -* lid invocation:: Matching patterns. -* aid invocation:: Matching strings. -* eid invocation:: Invoking an editor on matches. -* fid invocation:: Listing a file's identifiers. - - -File: id.info, Node: lid invocation, Next: aid invocation, Up: Looking up identifiers - -`lid': Matching patterns -======================== - - Synopsis: - - lid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] PATTERN... - - `lid' searches the database for identifiers matching the given -PATTERN arguments and prints the names of the files that match each -PATTERN. With no PATTERNs, `lid' lists every entry in the database. - - The name "lid" stands for "lookup identifier". - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - By default, each line of output consists of an identifier and all the -files containing that identifier. - - Here is an example showing a search for a single identifier (omitting -some output to keep lines short): - - prompt$ lid FILE - FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c - - This example shows a regular expression search: - - prompt$ lid 'FILE$' - AF_FILE mkid.c - AF_IDFILE mkid.c - FILE extern.h {fid,gets0,getsFF,idx,init,lid,mkid,...}.c - IDFILE id.h {fid,lid,mkid}.c - IdFILE {fid,lid}.c - ... - -As you can see, when a regular expression is used, it is possible to -get more than one line of output. To merge multiple lines into one, -use `-m': - - prompt$ lid -m ^get - ^get extern.h {bitsvec,fid,gets0,getsFF,getscan,idx,lid,...}.c - - -File: id.info, Node: aid invocation, Next: eid invocation, Prev: lid invocation, Up: Looking up identifiers - -`aid': Matching strings -======================= - - Synopsis: - - aid [-fFILE] [-uN] [-rDIR] [-mewdoxaskgnc] STRING... - - `aid' searches the database for identifiers containing the given -STRING arguments. The search is case-insensitive. - - The name "aid" stands for "apropos identifier", `apropros' being a -command that does a similar search of the `whatis' database of `man' -descriptions. - - For example, `aid get' matches the identifiers `fgets', `GETLINE', -and `getchar'. - - The default output format is the same as `lid'; see the previous -section. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - -File: id.info, Node: eid invocation, Next: fid invocation, Prev: aid invocation, Up: Looking up identifiers - -`eid': Invoking an editor on matches -==================================== - - Synopsis: - - eid [-fFILE] [-uN] [-rDIR] [-doxasc] [PATTERN]... - - `eid' runs the usual search (*note lid invocation::.) on the given -arguments, shows you the output, and then asks: - - Edit? [y1-9^S/nq] - -You can respond with: - -`y' - Edit all files listed. - -`1...9' - Start editing at the N + 1'st file. - -`/STRING or `CTRL-S'STRING' - Start editing at the first filename containing STRING. - -`n' - Go on to the next PATTERN, i.e., edit nothing for this one. - -`q' - Quit `eid'. - - `eid' invokes the editor defined by the `EDITOR' environment -variable to edit a file. If this editor can accept an initial search -argument on the command line, `eid' can move automatically to the -location of the match, via the environment variables below. - - *Note Common query arguments::, for a description of the command-line -options and PATTERN arguments. - - Here are the environment variables relevant to `eid': - -`EDITOR' - The name of the editor program to invoke. - -`EIDARG' - The argument to pass to the editor to search for the matching - identifier. For `vi', this should be `+/%s/''. - -`EIDLDEL' - A regular expression to force a match at the beginning of a word - ("left delimiter). `eid' inserts this in front of the matching - identifier when composing the search argument. For `vi', this - should be `\<'. - -`EIDRDEL' - The end-of-word regular expression. For `vi', this should be `\>'. - - For Emacs users, the interface in `gid.el' is probably preferable to -`eid'. *Note GNU Emacs gid interface::. - - Here is an example: - - prompt$ eid FILE \^print - FILE {ansi2knr,fid,filenames,idfile,idx,iid,lid,misc,...}.c - Edit? [y1-9^S/nq] n - ^print {ansi2knr,fid,getopt,getopt1,iid,lid,mkid,regex,scanners}.c - Edit? [y1-9^S/nq] 2 - -This will start editing at `getopt'.c. - - -File: id.info, Node: fid invocation, Prev: eid invocation, Up: Looking up identifiers - -`fid': Listing a file's identifiers -=================================== - - `fid' lists the identifiers found in a given file. Synopsis: - - fid [-fDBFILE] FILE1 [FILE2] - -`-fDBFILE' - Read the database from DBFILE instead of `ID'. - -`FILE1' - List all the identifiers contained in FILE1. - -`FILE2' - With a second file argument, list only the identifiers both files - have in common. - - The output is simply one identifier (or number) per line. - - -File: id.info, Node: pid invocation, Next: iid invocation, Prev: Looking up identifiers, Up: Top - -`pid': Looking up filenames -*************************** - - `pid' matches the filenames stored in the ID database, rather than -the identifiers. Synopsis: - - pid [-fDBFILE] [-rDIR] [-ebkgnc] WILDCARD... - - By default, the WILDCARD patterns are treated as shell globbing -patterns, rather than the regular expressions the other utilities -accept. See the section below for details. - - Besides the standard options given in the synopsis (*note Query -options::.), `pid' accepts the following: - -`-e' - Do the usual regular expression matching (*note Patterns::.), - instead of shell wildcard matching. - -`-b' - Match the basenames of the files in the database. For example, - `pid -b foo' will match the stored filename `dir/foo', but not - `foo/file'. - - For example, the command: - - pid \*.c - -lists all the `.c' files in the database. (The `\' here protects the -`*' from being expanded by the shell.) - -* Menu: - -* Wildcard patterns:: Shell-style globbing patterns. - - -File: id.info, Node: Wildcard patterns, Up: pid invocation - -Wildcard patterns -================= - - `pid' does simplified shell wildcard matching (unless the `-e' -option is specified), rather than the regular expression matching done -by the other utilities. Here is a description of wildcard matching, -also called "globbing": - - * `*' matches zero or more characters. - - * `?' matches any single character. - - * `\' forces the next character to be taken literally. - - * `[CHARS]' matches any single character listed in CHARS. - - * `[!CHARS]' matches any character *not* listed in CHARS. - - Most shells treat `/' and leading `.' characters specially. `pid' -does not do this. It simply matches the filename in the database -against the wildcard pattern. - - -File: id.info, Node: iid invocation, Next: Index, Prev: pid invocation, Up: Top - -`iid': Complex interactive queries -********************************** - - `iid' is an interactive query utility for ID databases. It operates -by running another query program (`lid' by default, `aid' if `-a' is -specified) and manipulating the sets of filenames returned by these -queries. - -* Menu: - -* iid command line options:: Command-line options. -* iid query expressions:: Operands to the commands. -* iid commands:: Printing matching filenames, etc. - - -File: id.info, Node: iid command line options, Next: iid query expressions, Up: iid invocation - -`iid' command line options -========================== - - `iid' recognizes the following options (the standard query options -described in *Note Query options:: are inapplicable): - -`-a' - Use `aid' for searches, instead of `lid'. - -`-cCOMMAND' - Execute COMMAND and exit, instead of prompting for interactive - commands. - -`-H' - Print a usage message and exit successfully. The `help' command - inside `iid' gives more information. *Note iid commands::. - - -File: id.info, Node: iid query expressions, Next: iid commands, Prev: iid command line options, Up: iid invocation - -`iid' query expressions -======================= - - An `iid' "query expression" generates a set of filenames or -manipulates existing sets. These expressions are operands to some of -the `iid' commands (see the next section), not commands themselves. - - Here are the possible constructs, highest precedence first: - -`sSET-NUMBER' - Refer to a set previously created by a query operation. During - each `iid' session, every query generates a different set number, - so any previously generated set may be used as part of any new - query by reference to its set number. - -`PATTERN' - `iid' treats any non-keyword input (i.e., anything not in this - table) as an identifier to be searched for in the database. It is - passed to the search program (`lid' by default, `aid' if the `-a' - option was specified). The result of this operation is a set of - filenames, and it is assigned a unique set number. - -`lid IDENTIFIER-LIST' - Invoke the `lid' program on IDENTIFIER-LIST and construct a new - set from the result. - -`aid IDENTIFIER-LIST' - Like `lid', but use the `aid' program. - -`match WILDCARDS' - Invoke the `pid' program on WILDCARDS, therefore matching on the - filenames in the database instead of the identifiers. The - resulting set contains the filenames that match the specified - patterns. *Note pid invocation::. - -`not EXPR' - The result is those filenames in the database that are not in EXPR. - -`EXPR1 and EXPR2' - The result is the intersection of the sets EXPR1 and EXPR2, i.e., - only those filenames contained in both. - -`EXPR1 or EXPR2' - The result is the union of the sets EXPR1 and EXPR2, i.e., all the - filenames contained in either or both. - - Operator names are recognized independent of case, so `AND', `and', -and `aNd' are all the same as far as `iid' is concerned. - - To pass a keyword as an operand, you must enclose it in double -quotes: the command `lid "lid"' generates the set of all filenames -matching the string `lid'. - - Patterns containing shell metacharacters (such as `*' or `?') must -also be properly quoted, since the query commands are run by invoking -them with the shell. - - -File: id.info, Node: iid commands, Prev: iid query expressions, Up: iid invocation - -`iid' commands -============== - - This section describes the interactive commands that `iid' -recognizes. The database query expressions you can pass to the `ss' -and `files' commands are described in the previous section. - - Some commands output a "summary line" for sets. These lines show the -set number, the number of filenames in the set, and the command that -generated it. - -`ss QUERY' - Build the set(s) of filenames resulting from the query expression - QUERY. The output is a summary line for each set. - -`files QUERY' -`f QUERY' - Evaluate the query expression QUERY as in `ss', but output the - full list of matching filenames instead of a summary. - -`sets' - Output a summary line for each extant set. - -`show SET' -`p SET' - Pass the filename in the set number SET to the program named in - the `PAGER' environment variable. Typically, this is a - page-at-a-time display program like `less' or `more'. If you use - Emacs, you might want to set `PAGER' to `emacsclient' (*note Emacs - Server: (emacs)Emacs Server.). - -`anything else' - When `iid' does not recognize the first word on an input line as a - builtin `iid' command, it assumes the input is a shell command - which will write a list of filenames to standard output, which it - gathers into a set as usual. - - Any set numbers that appear in the input are expanded into the - lists of filenames they represent prior to running the command. - -`!SHELL-COMMAND' - Expand set numbers appear in SHELL-COMMAND into the filenames they - represent, and pass the result to `/bin/sh'. The output is not - interpreted. - -`begin DIRECTORY' -`b DIRECTORY' - Begin a new `iid' session in a different directory (which - presumably contains a different database). It deletes all the sets - created so far and switches to the specified directory. It is - equivalent to exiting `iid', changing directories in the shell, and - running `iid' again. - -`help' -`h' -`?' - Display a short help file using the program named in `PAGER'. - -`quit' -`q' -`off' - Quit `iid'. An end-of-file character (usually `CTRL-D') also exits. - - -File: id.info, Node: Index, Prev: iid invocation, Up: Top - -Index -***** - -* Menu: - -* $ in identifiers: C scanner. -* * in globbing: Wildcard patterns. -* *scratch* Emacs buffer: GNU Emacs gid interface. -* -: mkid options. -* -a: iid command line options. -* -a: Query options. -* -aARGFILE: mkid options. -* -b: pid invocation. -* -c: iid command line options. -* -c: Query options. -* -d: Query options. -* -e: pid invocation. -* -e: Query options. -* -F: Query options. -* -fIDFILE: Query options. -* -g: Query options. -* -H: iid command line options. -* -k: Query options. -* -m: Query options. -* -n: Query options. -* -o: Query options. -* -rDIRECTORY: Query options. -* -S scanner option: Scanner option formats. -* -S.: Scanner option formats. -* -S?: Scanner option formats. -* -SSCANARG: mkid options. -* -Sasm+a: Assembler scanner. -* -Sasm+C: Assembler scanner. -* -Sasm+p: Assembler scanner. -* -Sasm+u: Assembler scanner. -* -Sasm-c: Assembler scanner. -* -Sc+u: C scanner. -* -Sc-s: C scanner. -* -Sc-u: C scanner. -* -Stext+a: Plain text scanner. -* -Stext+s: Plain text scanner. -* -Stext-a: Plain text scanner. -* -u: Query options. -* -v: mkid options. -* -w: Query options. -* -x: Query options. -* .default scanner: Scanners. -* .[chly] files, scanning: C scanner. -* ? in globbing: Wildcard patterns. -* aid: aid invocation. -* aid used for iid searches: iid command line options. -* architecture-independence: mkid invocation. -* assembler scanner: Assembler scanner. -* basename match: pid invocation. -* beginning-of-word editor argument: eid invocation. -* Berry, Karl: Past and future. -* brace notation in filename lists: Query options. -* bugs, reporting: Introduction. -* C scanner, predefined: C scanner. -* case-insensitive searching: aid invocation. -* commands for iid: iid commands. -* comments in assembler: Assembler scanner. -* common query arguments: Common query arguments. -* common query options: Query options. -* complex queries: iid invocation. -* compressed files, building ID from: mkid examples. -* conflicting identifiers, finding: Query options. -* constant strings, forcing evaluation as: Query options. -* creating databases: mkid invocation. -* cron: mkid invocation. -* cscope: Past and future. -* database name, specifying: Query options. -* databases, creating: mkid invocation. -* EDITOR: eid invocation. -* eid: eid invocation. -* EIDARG: eid invocation. -* EIDLDEL: eid invocation. -* EIDRDEL: eid invocation. -* Emacs interface to gid: GNU Emacs gid interface. -* emacsclient: iid commands. -* end-of-word editor argument: eid invocation. -* examples of mkid: mkid examples. -* examples, queries: Query examples. -* fid: fid invocation. -* filenames, matching: pid invocation. -* future: Past and future. -* gid Emacs function: GNU Emacs gid interface. -* gid.el interface to Emacs: GNU Emacs gid interface. -* globbing patterns: Wildcard patterns. -* grep: Past and future. -* help for iid: iid command line options. -* history: Past and future. -* Horsley, Tom: Past and future. -* ID database, definition of: Introduction. -* ID file format: mkid invocation. -* identifiers in a file: fid invocation. -* iid: iid invocation. -* iid commands: iid commands. -* iid options: iid command line options. -* iid query expressions: iid query expressions. -* interactive queries: iid invocation. -* introduction: Introduction. -* languages_0: Defining scanners in source code. -* left delimiter editor argument: eid invocation. -* Leonard, Bill: Past and future. -* lid: lid invocation. -* load-path: GNU Emacs gid interface. -* look and mkid 1: Past and future. -* man pages, compressed: mkid examples. -* matching filenames: pid invocation. -* McGary, Greg: Past and future. -* mkid: mkid invocation. -* mkid options: mkid options. -* multiple lines, merging: Query options. -* numbers, in databases: mkid invocation. -* numeric matches, specifying radix of: Query options. -* numeric searches: Query examples. -* options for iid: iid command line options. -* options for mkid: mkid options. -* overview: Introduction. -* PAGER: iid commands. -* parent directories, searched for ID: Query options. -* patterns: Patterns. -* pid: pid invocation. -* plain text scanner: Plain text scanner. -* predefined scanners: Predefined scanners. -* queries for iid: iid query expressions. -* query examples: Query examples. -* query options, common: Query options. -* radix of numeric matches, specifying: Query options. -* regular expression syntax: Patterns. -* regular expressions, forcing evaluation as: Query options. -* right delimiter editor argument: eid invocation. -* scanner options: Scanner option formats. -* scanners: Scanners. -* scanners, adding new: Defining new scanners. -* scanners, defining in source code: Defining scanners in source code. -* scanners, defining with options: Defining scanners with options. -* scanners, predefined: Predefined scanners. -* scanners.c: Defining scanners in source code. -* Scofield, Doug: Past and future. -* search for identifier, initial: eid invocation. -* sharing ID files: mkid invocation. -* shell brace notation in filename lists: Query options. -* shell commands in iid: iid commands. -* shell escape: iid commands. -* shell wildcard patterns: Wildcard patterns. -* single matches, showing: Query options. -* squeezing characters from identifiers: Plain text scanner. -* statistics: mkid options. -* string searching: aid invocation. -* strings, forcing evaluation as: Query options. -* suffixes of filenames: Scanners. -* suffixes_0: Defining scanners in source code. -* suppressing matching identifier: Query options. -* Texinfo, scanning example of: Defining scanners with options. -* whatis: aid invocation. -* wildcard wildcard patterns: Wildcard patterns. -* [!...] in globbing: Wildcard patterns. -* [...] in globbing: Wildcard patterns. -* \ in globbing: Wildcard patterns. - - - -Tag Table: -Node: Top1418 -Node: Introduction2101 -Node: Past and future4406 -Node: mkid invocation6731 -Node: mkid options8295 -Node: Scanners9707 -Node: Scanner option formats11196 -Node: Predefined scanners12366 -Node: C scanner13063 -Node: Plain text scanner13812 -Node: Assembler scanner14717 -Node: Defining new scanners15840 -Node: Defining scanners in source code16457 -Node: Defining scanners with options17296 -Node: idx invocation18744 -Node: mkid examples19304 -Node: Common query arguments21277 -Node: Query options21819 -Node: Patterns25208 -Node: Query examples26542 -Node: gid invocation27924 -Node: GNU Emacs gid interface29080 -Node: Looking up identifiers29938 -Node: lid invocation30428 -Node: aid invocation31856 -Node: eid invocation32636 -Node: fid invocation34674 -Node: pid invocation35226 -Node: Wildcard patterns36327 -Node: iid invocation37091 -Node: iid command line options37642 -Node: iid query expressions38213 -Node: iid commands40515 -Node: Index42745 - -End Tag Table diff --git a/id.texinfo b/id.texinfo deleted file mode 100644 index cdd9e56..0000000 --- a/id.texinfo +++ /dev/null @@ -1,1615 +0,0 @@ -\input texinfo -@comment %**start of header -@setfilename id.info -@settitle ID database utilities -@comment %**end of header - -@include version.texi - -@c Define new indices for filenames, commands and options. -@defcodeindex fl -@defcodeindex cm -@defcodeindex op - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex fl cp -@syncodeindex fn cp -@syncodeindex ky cp -@syncodeindex op cp -@syncodeindex pg cp -@syncodeindex vr cp - -@ifinfo -@set Francois Franc,ois -@end ifinfo -@tex -@set Francois Fran\noexpand\ptexc cois -@end tex - -@ifinfo -@format -START-INFO-DIR-ENTRY -* ID database: (id). Identifier database utilities. -* aid: (id)aid invocation:: Matching strings. -* eid: (id)eid invocation:: Invoking an editor on matches. -* fid: (id)fid invocation:: Listing a file's identifiers. -* gid: (id)gid invocation:: Listing all matching lines. -* idx: (id)idx invocation:: Testing mkid scanners. -* iid: (id)iid invocation:: Interactive complex queries. -* lid: (id)lid invocation:: Matching patterns. -* mkid: (id)mkid invocation:: Creating an ID database. -* pid: (id)pid invocation:: Looking up filenames. -END-INFO-DIR-ENTRY -@end format -@end ifinfo - -@ifinfo -This file documents the @code{mkid} identifier database utilities. - -Copyright (C) 1991, 1995 Tom Horsley. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end ifinfo - -@titlepage -@title ID database utilities -@subtitle Programs for simple, fast, high-capacity cross-referencing -@subtitle for version @value{VERSION} -@author Tom Horsley - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1991, 1995 Tom Horsley. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end titlepage - - -@ifinfo -@node Top -@top ID database utilities - -This manual documents version @value{VERSION} of the ID database -utilities. - -@menu -* Introduction:: Overview of the tools, and authors. -* mkid invocation:: Creating an ID database. -* Common query arguments:: Common lookup options and search patterns. -* gid invocation:: Listing all matching lines. -* Looking up identifiers:: lid, aid, eid, and fid. -* pid invocation:: Looking up filenames. -* iid invocation:: Interactive and complex queries. -* Index:: General index. -@end menu -@end ifinfo - - -@node Introduction -@chapter Introduction - -@cindex overview -@cindex introduction - -@cindex ID database, definition of -An @dfn{ID database} is a binary file containing a list of filenames, a -list of identifiers, and a matrix indicating which identifiers appear in -which files. With this database and some tools to manipulate it -(described in this manual), a host of tasks become simpler and faster. -For example, you can list all files containing a particular -@code{#include} throughout a huge source hierarchy, search for all the -memos containing references to a project, or automatically invoke an -editor on all files containing references to some function. Anyone with -a large software project to maintain, or a large set of text files to -organize, can benefit from an ID database. - -Although the ID utilities are most commonly used with identifiers, -numeric constants are also stored in the database, and can be searched -for in the same way (independent of radix, if desired). - -There are a number of programs in the ID family: - -@table @code - -@item mkid -scans files for identifiers and numeric constants and builds the ID -database file. - -@item gid -lists all lines that match given patterns. - -@item lid -lists the filenames containing identifiers that match given patterns. - -@item aid -lists the filenames containing identifiers that contain given strings, -independent of case. - -@item eid -invokes an editor on each file containing identifiers that match given -patterns. - -@item fid -lists all identifiers recorded in the database for given files, or -identifiers common to two files. - -@item pid -matches the filenames in the database, rather than the identifiers. - -@item iid -interactively supports more complex queries, such as intersection and -union. - -@item idx -helps with testing of new @code{mkid} scanners. - -@end table - -@cindex bugs, reporting -Please report bugs to @samp{gkm@@magilla.cichlid.com}. Remember to -include the version number, machine architecture, input files, and any -other information needed to reproduce the bug: your input, what you -expected, what you got, and why it is wrong. Diffs are welcome, but -please include a description of the problem as well, since this is -sometimes difficult to infer. @xref{Bugs, , , gcc, GNU CC}. - -@menu -* Past and future:: How the ID tools came about, and where they're going. -@end menu - - -@node Past and future -@section Past and future - -@cindex history - -@pindex look @r{and @code{mkid} 1} -@cindex McGary, Greg -Greg McGary conceived of the ideas behind mkid when he began hacking the -Unix kernel in 1984. He needed a navigation tool to help him find his -way the expansive, unfamiliar landscape. The first @code{mkid}-like -tools were shell scripts, and produced an ASCII database that looks much -like the output of @code{lid} with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSD-ish kernel. Lookups -were done with the system utility @code{look}, modified to handle very -long lines. - -In 1986, Greg rewrote @code{mkid}, @code{lid}, @code{fid} and @code{idx} -in C to improve performance. Database-build times were shortened by an -order of magnitude. The @code{mkid} tools were first posted to -@samp{comp.sources.unix} in September 1987. - -@cindex Horsley, Tom -@cindex Scofield, Doug -@cindex Leonard, Bill -@cindex Berry, Karl -Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the @code{iid} program. A first release of -@code{mkid} @w{version 2} was posted to @file{alt.sources} near the end -of 1990. At that time, Tom wrote this Texinfo manual with the -encouragement the net community. (Tom especially thanks Doug Scofield -and Bill Leonard whom he dragooned into helping poorfraed and -edit---they found several problems in the initial version.) Karl Berry -revamped the manual for Texinfo style, indexing, and organization in -1995. - -@pindex cscope -@pindex grep -@cindex future -In January 1995, Greg McGary reemerged as the primary maintaner and -launched development of @code{mkid} version 3, whose primary new feature -is an efficient algorithm for building databases that is linear in both -time and space over the size of the input text. (The old algorithm was -quadratic in space and therefore choked on very large source trees.) -The code is released under the GNU Public License, and might become a -part of the GNU system. @code{mkid} 3 is an interim release, since -several significant enhancements are still in the works: an optional -coupling with GNU @code{grep}, so that @code{grep} can use an ID -database for hints; a @code{cscope} work-alike query interface; -incremental update of the ID database; and an automatic file-tree walker -so you need not explicitly supply every filename argument to the -@code{mkid} program. - - -@node mkid invocation -@chapter @code{mkid}: Creating ID databases - -@pindex mkid -@cindex creating databases -@cindex databases, creating - -@pindex cron -The @code{mkid} program builds an ID database. To do this, it must scan -each file you tell it to include in the database. This takes some time, -but once the work is done the query programs run very rapidly. (You can -run @code{mkid} as a @code{cron} job to regularly update your -databases.) - -The @code{mkid} program knows how to extract identifiers from various -types of files. For example, it can recognize and skip over comments -and string constants in a C program. - -@cindex numbers, in databases -Identifiers are not the only thing included in the database. Numbers -are also recognized and included in the database indexed by their binary -value. This feature allows you to find uses of constants without regard -to the radix used to specify them, since the same number can frequently -be written in many different ways (for instance, @samp{47}, @samp{0x2f}, -@samp{057} in C). - -All the places in this document which mention identifiers should really -mention both identifiers and numbers, but that gets fairly clumsy after -a while, so you just need to keep in mind that numbers are included in -the database as well as identifiers. - -@cindex ID file format -@cindex architecture-independence -@cindex sharing ID files -The ID files that @code{mkid} creates are architecture- and -byte-order-independent; you can share them at will across systems. - -@menu -* mkid options:: Command-line options to mkid. -* Scanners:: Built-in and defining your own. -* mkid examples:: Examples of mkid usage. -@end menu - - -@node mkid options -@section @code{mkid} options - -@cindex options for @code{mkid} -@pindex mkid @r{options} - -By default, @code{mkid} scans the files you specify and writes the -database to a file named @file{ID} in the current directory. - -@example -mkid [-v] [-S@var{scanarg}] [-a@var{argfile}] [-] [-f@var{idfile}] @c -@var{files}@dots{} -@end example - -The program accepts the following options. - -@table @samp - -@item -v -@opindex -v -@cindex statistics -Verbose. @code{mkid} tells you as it scans each file and indicates -which scanner it is using. It also summarizes some statistics about the -database at the end. - -@item -S@var{scanarg} -@opindex -S@var{scanarg} -Specify options regarding @code{mkid}'s scanners. @xref{Scanner option -formats}. - -@item -a@var{argfile} -@opindex -a@var{argfile} -Read additional command line arguments from @var{argfile}. This is -typically used to specify lists of filenames longer than will fit on a -command line; some systems have severe limitations on the total length -of a command line. - -@item - -@opindex - -Read additional command line arguments from standard input. - -@item -f@var{idfile} -Write the database to the file @var{idfile}, instead of @file{ID}. The -database stores filenames relative to the directory containing the -database, so if you move the database to a different directory after -creating it, you may have trouble finding files. - -@c @item -u -@c @opindex -u -@c The @code{-u} option updates an existing database by rescanning any -@c files that have changed since the database was written. Unfortunately -@c you cannot incrementally add new files to a database. -@c Greg is reimplementing this ... - -@end table - -The remaining arguments @var{files} are the files to be scanned and -included in the database. If no files are given at all (either on -command line or via @samp{-a} or @samp{-}), @code{mkid} does nothing. - - -@node Scanners -@section Scanners - -@cindex scanners - -To determine which identifiers to extract from a file and store in the -database, @code{mkid} calls a @dfn{scanner}; we say a scanner -@dfn{recognizes} a particular language. Scanners for several languages -are built-in to @code{mkid}; you can add your own scanners as well, as -explained in the sections below. - -@cindex suffixes of filenames -@code{mkid} determines which scanner to use for a particular file by -looking at the suffix of the filename. This @dfn{suffix} is everything -after and including the last @samp{.} in a filename; for example, the -suffix of @file{foo.c} is @file{.c}. @code{mkid} has a built-in list of -bindings from some suffixes to corresponding scanners; for example, -@file{.c} files are (not surprisingly) scanned by the predefined C -language scanner. - -@findex .default @r{scanner} -If @code{mkid} cannot determine what scanner to use for a particular -file, either because the file has no suffix (e.g., @file{foo}) or -because @code{mkid} has no binding for the file's suffix (e.g., -@file{foo.bar}), it uses the scanner bound to the @samp{.default} -suffix. By default, this is the plain text scanner (@pxref{Plain text -scanner}), but you can change this with the @samp{-S} option, as -explained below. - -@menu -* Scanner option formats:: Overview of the -S option. -* Predefined scanners:: The C, plain text, and assembler scanners. -* Defining new scanners:: Either in source code or at runtime with -S. -* idx invocation:: Testing mkid scanners. -@end menu - - -@node Scanner option formats -@subsection Scanner option formats - -@cindex scanner options -@opindex -S @r{scanner option} - -With the @samp{-S} option, you can change which language scanner to use -for which files, give language-specific options, and get some limited -online help about scanner options. - -Here are the different forms of the @samp{-S} option: - -@table @samp - -@item -S.@var{suffix}=@var{scanner} -@opindex -S. -Use @var{scanner} for a file with the given @samp{.@var{suffix}}. For -example, @samp{-S.yacc=c} tells @code{mkid} to use the @samp{c} language -scanner for all files ending in @samp{.yacc}. - -@item -S.@var{suffix}=? -Display which scanner is used for the given @samp{.@var{suffix}}. - -@item -S?=@var{scanner} -@opindex -S? -Display which suffixes @var{scanner} is used for. - -@item -S?=? -Display the scanner binding for every known suffix. - -@item -S@var{scanner}+@var{arg} -@itemx -S@var{scanner}-@var{arg} -Each scanner accepts certain scanner-dependent arguments. These options -all have one of these forms. @xref{Predefined scanners}. - -@item -S@var{scanner}? -Display the scanner-specific options accepted by @var{scanner}. - -@item -S@var{new-scanner}/@var{old-scanner}/@var{filter-command} -Define @var{new-scanner} in terms of @var{old-scanner} and -@var{filter-command}. @xref{Defining scanners with options}. - -@end table - - -@node Predefined scanners -@subsection Predefined scanners - -@cindex predefined scanners -@cindex scanners, predefined - -@code{mkid} has built-in scanners for several types of languages; you -can get the list by running @code{mkid -S?=?}. -The supported languages are documented -below@footnote{This is not strictly true: @samp{vhil} is a supported -language, but it is an obsolete and arcane dialect of C and should be -ignored.}. - -@menu -* C scanner:: For the C programming language. -* Plain text scanner:: For documents or other non-source code. -* Assembler scanner:: For assembly language. -@end menu - - -@node C scanner -@subsubsection C scanner - -@cindex C scanner, predefined -@flindex .[chly] @r{files, scanning} - -The C scanner is the most commonly used. Files with the usual @file{.c} -and @file{.h} suffixes, and the @file{.y} (yacc) and @file{.l} (lex) -suffixes, are processed with this scanner (by default). - -Scanner-specific options: - -@table @samp - -@item -Sc-s@var{character} -@kindex $ @r{in identifiers} -@opindex -Sc-s -Allow the specified @var{character} in identifiers. For example, if you -use @samp{$} in identifiers, you'll want to use @samp{-Sc-s$}. - -@item -Sc+u -@opindex -Sc+u -Strip leading underscores from identifiers. You might to do this in -peculiar circumstances, such as trying to parse the output from -@code{nm} or some other system utility. - -@item -Sc-u -@opindex -Sc-u -Don't strip leading underscores from identifiers; this is the default. - -@end table - - -@node Plain text scanner -@subsubsection Plain text scanner - -@cindex plain text scanner - -The plain text scanner is intended for scanning most non-source-code -files. This is typically the scanner used when adding custom scanners -via @samp{-S} (@pxref{Defining scanners with options}). - -@c @code{mkid} predefines a troff scanner in terms of the plain text -@c scanner and -@c the @code{deroff} utility. -@c A compressed man page -@c scanner runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner -@c runs @code{detex}. - -Scanner-specific options: - -@table @samp - -@item -Stext+a@var{character} -@opindex -Stext+a -Include @var{character} in identifiers. By default, letters (a--z and -A--Z) and underscore are included. - -@item -Stext-a@var{character} -@opindex -Stext-a -Exclude @var{character} from identifiers. - -@item -Stext+s@var{character} -@opindex -Stext+s -@cindex squeezing characters from identifiers -Squeeze @var{character} from identifiers, i.e., do not terminate an -identifier when @var{character} is seen. By default, the characters -@samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers. For -example, the input @samp{fred's} leads to the identifier @samp{freds}. - -@item -Stext-s@var{character} -Do not squeeze @var{character}. - -@end table - - -@node Assembler scanner -@subsubsection Assembler scanner - -@cindex assembler scanner - -Since assembly languages come in several flavors, this scanner has a -number of options: - -@table @samp - -@item -Sasm-c@var{character} -@opindex -Sasm-c -@cindex comments in assembler -Define @var{character} as starting a comment that extends to the end of -the input line; no default. In many assemblers this is @samp{;} or -@samp{#}. - -@item -Sasm+u -@itemx -Sasm-u -@opindex -Sasm+u -Strip (@samp{+u}) or do not strip (@samp{-u}) leading underscores from -identifiers. The default is to strip them. - -@item -Sasm+a@var{character} -@opindex -Sasm+a -Allow @var{character} in identifiers. - -@item -Sasm-a@var{character} -Allow @var{character} in identifiers, but if an identifier contains -@var{character}, ignore it. This is useful to ignore temporary labels, -which can be generated in great profusion; these often contain @samp{.} -or @samp{@@}. - -@item -Sasm+p -@itemx -Sasm-p -@opindex -Sasm+p -Recognize (@samp{+p}) or do not recognize (@samp{-p}) C preprocessor -directives in assembler source. The default is to recognize them. - -@item -Sasm+C -@itemx -Sasm-C -@opindex -Sasm+C -Skip over (@samp{+C}) or do not skip over (@samp{-C}) C style comments -in assembler source. The default is to skip them. - -@end table - - -@node Defining new scanners -@subsection Defining new scanners - -@cindex scanners, adding new - -You can add new scanners to @code{mkid} in two ways: modify the source -code and recompile, or at runtime via the @samp{-S} option. Each has -their advantages and disadvantages, as explained below. - -If you create a new scanner that would be of use to others, please -consider sending it back to the maintainer, -@samp{gkm@@magilla.cichlid.com}, for inclusion in future releases of -@code{mkid}. - -@menu -* Defining scanners in source code:: -* Defining scanners with options:: -@end menu - - -@node Defining scanners in source code -@subsubsection Defining scanners in source code - -@flindex scanners.c -@cindex scanners, defining in source code - -@vindex languages_0 -@vindex suffixes_0 -To add a new scanner in source code, you should add a new section to the -file @file{scanners.c}. Copy one of the existing scanners (most likely -either C or plain text), and modify as necessary. Also add the new -scanner to the @code{languages_0} and @code{suffixes_0} tables near the -beginning of the file. - -This is not a terribly difficult programming task, but it requires -recompiling and installing the new version of @code{mkid}, which may be -inconvenient. - -This method leads to scanners which operate much more quickly than ones -that depend on external programmers. It is also likely the easiest way -to define scanners for new programming languages. - - -@node Defining scanners with options -@subsubsection Defining scanners with options - -@cindex scanners, defining with options - -You can use the @samp{-S} option on the command line to define a new -language scanner: - -@example --S@var{new-scanner}/@var{existing-scanner}/@var{filter} -@end example - -@noindent -Here, @var{new-scanner} is the name of the new scanner being defined, -@var{existing-scanner} is the name of an existing scanner, and -@var{filter} is a shell command or pipeline. - -The new scanner works by passing the input file to @var{filter}, and -then arranging for the result to be passed through -@var{existing-scanner}. Typically, @var{existing-scanner} is @samp{text}. - -Somewhere within @var{filter}, the string@samp{%s} should occur. This -@samp{%s} is replaced by the name of the source file being scanned. - -@cindex Texinfo, scanning example of -For example, @code{mkid} has no built-in scanner for Texinfo files (like -this one). In indexing a Texinfo file, you most likely would want -to ignore the Texinfo @@-commands. Here's one way to specify a new -scanner to do this: - -@example --S/texinfo/text/sed s,@@[a-z]*,,g %s -@end example - -This defines a new language scanner (@samp{texinfo}) defined in terms of -a @code{sed} command to strip out Texinfo directives (an @samp{@@} -character followed by letters). Once the directives are stripped, the -remaining text is run through the plain text scanner. - -This is a minimal example; to do a complete job, you would need to -completely delete some lines, such as those beginning with @code{@@end} -or @@node. - - -@node idx invocation -@subsection @code{idx}: Testing @code{mkid} scanners - -@code{idx} prints the identifiers found in the files you specify to -standard output. This is useful in debugging new @code{mkid} scanners -(@pxref{Scanners}). Synopsis: - -@example -idx [-S@var{scanarg}] @var{files}@dots{} -@end example - -@code{idx} accepts the same @samp{-S} options as @code{mkid}. -@xref{Scanner option formats}. - -The name ``idx'' stands for ``ID eXtract''. The name may change in -future releases, since this is such an infrequently used program. - - -@node mkid examples -@section @code{mkid} examples - -@cindex examples of @code{mkid} - -The simplest example of @code{mkid} is something like: - -@example -mkid *.[chy] -@end example - -This will build an ID database indexing identifiers and numbers in the -all the @file{.c}, @file{.h}, and @file{.y} files in the current -directory. Because @code{mkid} already knows how to scan files with -those suffixes, no additional options are needed. - -@cindex man pages, compressed -@cindex compressed files, building ID from -Here's a more complex example. Suppose you want to build a database -indexing the contents of all the @code{man} pages, and furthur suppose -that your system is using @code{gzip} (@pxref{Top, , , gzip, Gzip}) to -store compressed @code{cat} versions of the @code{man} pages in the -directory @file{/usr/catman}. The @code{gzip} program creates files -with a @code{.gz} suffix, so you must tell @code{mkid} how to scan -@file{.gz} files. Here are the commands to do the job: - -@example -cd /usr/catman -find . -name \*.gz -print | mkid '-Sman/text/gzip <%s' -S.gz=man - -@end example - -@noindent Explanation: - -@enumerate - -@item -We first @code{cd} to @file{/usr/catman} so the ID database -will store the correct relative filenames. - -@item -The @code{find} command prints the names of all @file{.gz} files under -the current directory. @xref{find invocation, , , sh-utils, GNU shell -utilities}. - -@item -This list is piped to @code{mkid}; the @code{-} option (at the end of -the line) tells @code{mkid} to read arguments (in this case, as is -typical, the list of filenames) from standard input. @xref{mkid options}. - -@item -The @samp{-Sman/text/gzip @dots{}} defines a new language @samp{man} in -terms of the @code{gzip} program and @code{mkid}'s existing text -scanner. @xref{Defining scanners with options}. - -@item -The @samp{-S.gz=man} tells @code{mkid} to treat all @file{.gz} files as -this new language @code{man}. @xref{Scanner option formats}. - -@end enumerate - -As a further complication, @code{cat} pages typically contain -underlining and backspace sequences, which will confuse @code{mkid}. To -handle this, the @code{gzip} command becomes a pipeline, like this: - -@example -mkid '-Sman/text/gzip <%s | col -b' -S.gz=man - -@end example - - -@node Common query arguments -@chapter Common query arguments - -@cindex common query arguments - -Certain options, and regular expression syntax, are shared by the ID -query tools. So we describe those things in the sections below, instead -of repeating the description for each tool. - -@menu -* Query options:: -f -r -c -ew -kg -n -doxa -m -F -u. -* Patterns:: Regular expression syntax for searches. -* Examples: Query examples. Some common uses. -@end menu - - -@node Query options -@section Query options - -@cindex query options, common -@cindex common query options - -The ID query tools (@emph{not} @code{mkid}) share certain command line -options. Not all of these options are recognized by all programs, but -if an option is used by more than one program, it is described below. -The description of each program gives the options that program uses. - -@table @samp - -@item -f@var{idfile} -@opindex -f@var{idfile} -@cindex database name, specifying -@cindex parent directories, searched for ID -Read the database from @var{idfile}, in the current directory or in any -directory above the current directory. The default database name is -@file{ID}. Searching parent directories lets you have a single ID -database at the root of a large source tree and then use the query tools -from anywhere within that tree. - -@item -r@var{directory} -@opindex -r@var{directory} -Find files relative to @var{directory}, instead of the directory in -which the ID database was found. This is useful if the ID database was -moved after its creation. - -@item -c -@opindex -c -Equivalent to @code{-r`pwd`}, i.e., find files relative to the current -directory, instead of the directory in which the ID database was found. - -@item -e -@itemx -w -@opindex -e -@opindex -w -@cindex regular expressions, forcing evaluation as -@cindex strings, forcing evaluation as -@cindex constant strings, forcing evaluation as -@samp{-e} forces pattern arguments to be treated as regular expressions, -and @samp{-w} forces pattern arguments to be treated as constant -strings. By default, the query tools guess whether a pattern is regular -expressions or constant strings by looking for special characters. -@xref{Patterns}. - -@item -k -@itemx -g -@opindex -k -@opindex -g -@cindex brace notation in filename lists -@cindex shell brace notation in filename lists -@samp{-k} suppresses use of shell brace notation in the output. By -default, the query tools that generate lists of filenames attempt to -compress the lists using the usual shell brace notation, e.g., -@file{@{foo,bar@}.c} to mean @file{foo.c} and @file{bar.c}. (This is -useful if you use @code{ksh} or the original (not GNU) @code{sh} and -want to feed the list of names to another command, since those shells do -not support this brace notation; the name of the @code{-k} option comes -from the @code{k} in @code{ksh}). - -@samp{-g} turns on use of brace notation; this is only needed if the -query tools were compiled with @samp{-k} as the default behavior. - -@item -n -@opindex -n -@cindex suppressing matching identifier -Suppress the matching identifier before each list of filenames that the -query tools output by default. This is useful if you want a list of just -the names to feed to another command. - -@item -d -@itemx -o -@itemx -x -@itemx -a -@opindex -d -@opindex -o -@opindex -x -@opindex -a -@cindex radix of numeric matches, specifying -@cindex numeric matches, specifying radix of -These options may be used in any combination to specify the radix of -numeric matches. @samp{-d} allows matching on decimal numbers, -@samp{-o} on octal numbers, and @samp{-x} on hexadecimal numbers. The -@code{-a} option is equivalent to specifying all three; this is the -default. Any combination of these options may be used. - -@item -m -@opindex -m -@cindex multiple lines, merging -Merge multiple lines of output into a single line. If your query -matches more than one identifier, the default is to generate a separate -line of output for each matching identifier. - -@itemx -F- -@itemx -F@var{n} -@itemx -F-@var{m} -@itemx -F@var{n}-@var{m} -@opindex -F -@cindex single matches, showing -Show identifiers matching at least @var{n} and at most @var{m} times. -@samp{-F-} is equivalent to @samp{-F1}, i.e., find identifiers that -appear only once in the database. (This is useful to locate identifiers -that are defined but never used, or used once and never defined.) - -@item -u@var{number} -@opindex -u -@cindex conflicting identifiers, finding -List identifiers that conflict in the first @var{number} characters. -This could be in useful porting programs to brain-dead computers that -refuse to support long identifiers, but your best long term option is to -set such computers on fire. - -@end table - - -@node Patterns -@section Patterns - -@cindex patterns -@cindex regular expression syntax - -@dfn{Patterns}, also called @dfn{regular expressions}, allow you to -match many different identifiers in a single query. - -The same regular expression syntax is recognized by all the query tools -that handle regular expressions. The exact syntax depends on how the ID -tools were compiled, but the following constructs should always be -supported: - -@table @samp - -@item . -Match any single character. - -@item [@var{chars}] -Match any of the characters specified within the brackets. You can -match any characters @emph{except} the ones in brackets by typing -@samp{^} as the first character. A range of characters can be specified -using @samp{-}. For example, @samp{[abc]} and @samp{[a-c]} both match -@samp{a}, @samp{b}, or @samp{c}, and @samp{[^abc]} matches anything -@emph{except} @samp{a}, @samp{b}, or @samp{c}. - -@item * -Match the previous construct zero or more times. - -@item ^ -@itemx $ -@samp{^} (@samp{$}) at the beginning (end) of a pattern anchors the -match to the first (last) character of the identifier. - -@end table - -The query programs use either the @code{regex}/@code{regcmp} or -@code{re_comp}/@code{re_exec} functions, depending on which are -available in the library on your system. These do not always support -the exact same regular expression syntax, so consult your local -@code{man} pages to find out. - - -@node Query examples -@section Query examples - -@cindex examples, queries -@cindex query examples -Here are some examples of the options described in the previous -sections. - -To restrict searches to exact matches, use @samp{^@dots{}$}. For example: - -@example -prompt$ gid '^FILE$' -ansi2knr.c:144: @{ FILE *in, *out; -ansi2knr.c:315: FILE *out; -fid.c:38: FILE *id_FILE; -filenames.c:576: FILE * -@dots{} -@end example - -To show identifiers not unique in the first 16 characters: - -@example -prompt$ lid -u16 -RE_CONTEXT_INDEP_ANCHORS regex.c -RE_CONTEXT_INDEP_OPS regex.c -RE_SYNTAX_POSIX_BASIC regex.c -RE_SYNTAX_POSIX_EXTENDED regex.c -@dots{} -@end example - -@cindex numeric searches -Numbers are searched for numerically rather than textually. For example: - -@example -prompt$ lid 0xff -0377 @{lid,regex@}.c -0xff @{bitops,fid,lid,mkid@}.c -255 regex.c -@end example - -On the other hand, you can restrict a numeric search to a particular -radix if you want: - -@example -laurie$ lid -x 0xff -0xff @{bitops,fid,lid,mkid@}.c -@end example - -Filenames in the output are always adjusted to be correct for the -correct working directory. For example: - -@example -prompt$ lid bdevsw -bdevsw sys/conf.h cf/conf.c io/bio.c os/@{fio,main,prf,sys3@}.c -prompt$ cd io -prompt$ lid bdevsw -bdevsw ../sys/conf.h ../cf/conf.c bio.c ../os/@{fio,main,prf,sys3@}.c -@end example - - -@node gid invocation -@chapter @code{gid}: Listing matching lines - -Synopsis: - -@example -gid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}@dots{}] -@end example - -@code{gid} finds the identifiers in the database that match the -specified @var{pattern}s, then searches for all occurrences of those -identifiers, in only the files containing matches. In a large source -tree, this saves an enormous amount of time (compared to searching every -source file). - -With no @var{pattern} arguments, @code{gid} prints every line of every -source file. - -The name ``gid'' stands for ``grep for identifiers'', @code{grep} being -the standard utility to search regular files. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -@code{gid} uses the standard GNU output format for identifying source lines: - -@example -@var{filename}:@var{linenum}: @var{text} -@end example - -Here is an example: - -@example -prompt$ gid FILE -ansi2knr.c:144: @{ FILE *in, *out; -ansi2knr.c:315: FILE *out; -fid.c:38: FILE *id_FILE; -@dots{} -@end example - -@menu -* GNU Emacs gid interface:: Using next-error with gid. -@end menu - - -@node GNU Emacs gid interface -@section GNU Emacs @code{gid} interface - -@cindex Emacs interface to @code{gid} -@flindex gid.el @r{interface to Emacs} - -@vindex load-path -The @code{mkid} source distribution comes with a file @file{gid.el}, -which defines a GNU Emacs interface to @code{gid}. To install it, put -@file{gid.el} somewhere that Emacs will find it (i.e., in your -@code{load-path}) and put - -@example -(autoload 'gid "gid" nil t) -@end example - -@noindent in one of Emacs' initialization files, e.g., @file{~/.emacs}. -You will then be able to use @kbd{M-x gid} to run the command. - -@findex gid @r{Emacs function} -The @code{gid} function prompts you with the word around point. If you -want to search for something else, simply delete the line and type the -pattern of interest. - -@flindex *scratch* @r{Emacs buffer} -The function then runs the @code{gid} program in a @samp{*compilation*} -buffer, so the normal @code{next-error} function can be used to visit -all the places the identifier is found (@pxref{Compilation,,, emacs, The -GNU Emacs Manual}). - - -@node Looking up identifiers -@chapter Looking up identifiers - -These commands look up identifiers in the ID database and operate on the -files containing matches. - -@menu -* lid invocation:: Matching patterns. -* aid invocation:: Matching strings. -* eid invocation:: Invoking an editor on matches. -* fid invocation:: Listing a file's identifiers. -@end menu - - -@node lid invocation -@section @code{lid}: Matching patterns - -@pindex lid - -Synopsis: - -@example -lid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c -@var{pattern}@dots{} -@end example - -@code{lid} searches the database for identifiers matching the given -@var{pattern} arguments and prints the names of the files that match -each @var{pattern}. With no @var{pattern}s, @code{lid} lists every -entry in the database. - -The name ``lid'' stands for ``lookup identifier''. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -By default, each line of output consists of an identifier and all the -files containing that identifier. - -Here is an example showing a search for a single identifier (omitting -some output to keep lines short): - -@example -prompt$ lid FILE -FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c -@end example - -This example shows a regular expression search: - -@example -prompt$ lid 'FILE$' -AF_FILE mkid.c -AF_IDFILE mkid.c -FILE extern.h @{fid,gets0,getsFF,idx,init,lid,mkid,@dots{}@}.c -IDFILE id.h @{fid,lid,mkid@}.c -IdFILE @{fid,lid@}.c -@dots{} -@end example - -@noindent As you can see, when a regular expression is used, it is -possible to get more than one line of output. To merge multiple lines -into one, use @samp{-m}: - -@example -prompt$ lid -m ^get -^get extern.h @{bitsvec,fid,gets0,getsFF,getscan,idx,lid,@dots{}@}.c -@end example - - -@node aid invocation -@section @code{aid}: Matching strings - -@pindex aid - -Synopsis: - -@example -aid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-mewdoxaskgnc] @c -@var{string}@dots{} -@end example - -@cindex case-insensitive searching -@cindex string searching -@code{aid} searches the database for identifiers containing the given -@var{string} arguments. The search is case-insensitive. - -@flindex whatis -The name ``aid'' stands for ``apropos identifier'', @code{apropros} -being a command that does a similar search of the @code{whatis} database -of @code{man} descriptions. - -For example, @samp{aid get} matches the identifiers @code{fgets}, -@code{GETLINE}, and @code{getchar}. - -The default output format is the same as @code{lid}; see the previous -section. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - - -@node eid invocation -@section @code{eid}: Invoking an editor on matches - -@pindex eid - -Synopsis: - -@example -eid [-f@var{file}] [-u@var{n}] [-r@var{dir}] [-doxasc] [@var{pattern}]@dots{} -@end example - -@code{eid} runs the usual search (@pxref{lid invocation}) on the given -arguments, shows you the output, and then asks: - -@example -Edit? [y1-9^S/nq] -@end example - -@noindent -You can respond with: - -@table @samp -@item y -Edit all files listed. - -@item 1@dots{}9 -Start editing at the @math{@var{n} + 1}'st file. - -@item /@var{string} @r{or} @kbd{CTRL-S}@var{string} -Start editing at the first filename containing @var{string}. - -@item n -Go on to the next @var{pattern}, i.e., edit nothing for this one. - -@item q -Quit @code{eid}. - -@end table - -@code{eid} invokes the editor defined by the @samp{EDITOR} environment -variable to edit a file. If this editor can accept an initial search -argument on the command line, @code{eid} can move automatically to the -location of the match, via the environment variables below. - -@xref{Common query arguments}, for a description of the command-line -options and @var{pattern} arguments. - -Here are the environment variables relevant to @code{eid}: - -@table @samp - -@item EDITOR -@vindex EDITOR -The name of the editor program to invoke. - -@item EIDARG -@vindex EIDARG -@cindex search for identifier, initial -The argument to pass to the editor to search for the matching -identifier. For @code{vi}, this should be @samp{+/%s/'}. - -@item EIDLDEL -@vindex EIDLDEL -@cindex left delimiter editor argument -@cindex beginning-of-word editor argument -A regular expression to force a match at the beginning of a word (``left -delimiter). @code{eid} inserts this in front of the matching identifier -when composing the search argument. For @code{vi}, this should be -@samp{\<}. - -@item EIDRDEL -@vindex EIDRDEL -@cindex right delimiter editor argument -@cindex end-of-word editor argument -The end-of-word regular expression. For @code{vi}, this should be -@samp{\>}. - -@end table - -For Emacs users, the interface in @code{gid.el} is probably preferable -to @code{eid}. @xref{GNU Emacs gid interface}. - - -Here is an example: - -@example -prompt$ eid FILE \^print -FILE @{ansi2knr,fid,filenames,idfile,idx,iid,lid,misc,@dots{}@}.c -Edit? [y1-9^S/nq] n -^print @{ansi2knr,fid,getopt,getopt1,iid,lid,mkid,regex,scanners@}.c -Edit? [y1-9^S/nq] 2 -@end example - -@noindent This will start editing at @file{getopt}.c. - - -@node fid invocation -@section @code{fid}: Listing a file's identifiers - -@pindex fid -@cindex identifiers in a file - -@code{fid} lists the identifiers found in a given file. Synopsis: - -@example -fid [-f@var{dbfile}] @var{file1} [@var{file2}] -@end example - -@table @samp - -@item -f@var{dbfile} -Read the database from @var{dbfile} instead of @file{ID}. - -@item @var{file1} -List all the identifiers contained in @var{file1}. - -@item @var{file2} -With a second file argument, list only the identifiers both files have -in common. - -@end table - -The output is simply one identifier (or number) per line. - - -@node pid invocation -@chapter @code{pid}: Looking up filenames - -@pindex pid -@cindex filenames, matching -@cindex matching filenames - -@code{pid} matches the filenames stored in the ID database, rather than -the identifiers. Synopsis: - -@example -pid [-f@var{dbfile}] [-r@var{dir}] [-ebkgnc] @var{wildcard}@dots{} -@end example - -By default, the @var{wildcard} patterns are treated as shell globbing -patterns, rather than the regular expressions the other utilities -accept. See the section below for details. - -Besides the standard options given in the synopsis (@pxref{Query -options}), @code{pid} accepts the following: - -@table @samp - -@item -e -@opindex -e -Do the usual regular expression matching (@pxref{Patterns}), instead -of shell wildcard matching. - -@item -b -@opindex -b -@cindex basename match -Match the basenames of the files in the database. For example, -@samp{pid -b foo} will match the stored filename @file{dir/foo}, but not -@file{foo/file}. - -@end table - -For example, the command: - -@example -pid \*.c -@end example - -@noindent lists all the @file{.c} files in the database. (The @samp{\} -here protects the @samp{*} from being expanded by the shell.) - -@menu -* Wildcard patterns:: Shell-style globbing patterns. -@end menu - - -@node Wildcard patterns -@section Wildcard patterns - -@cindex globbing patterns -@cindex shell wildcard patterns -@cindex wildcard wildcard patterns - -@code{pid} does simplified shell wildcard matching (unless the @samp{-e} -option is specified), rather than the regular expression matching done -by the other utilities. Here is a description of wildcard matching, -also called @dfn{globbing}: - -@itemize - -@item -@kindex * @r{in globbing} -@samp{*} matches zero or more characters. - -@item -@kindex ? @r{in globbing} -@samp{?} matches any single character. - -@item -@kindex \ @r{in globbing} -@samp{\} forces the next character to be taken literally. - -@item -@kindex [@dots{}] @r{in globbing} -@samp{[@var{chars}]} matches any single character listed in @var{chars}. - -@item -@kindex [!@dots{}] @r{in globbing} -@samp{[!@var{chars}]} matches any character @emph{not} listed in @var{chars}. - -@end itemize - -Most shells treat @samp{/} and leading @samp{.} characters -specially. @code{pid} does not do this. It simply matches the filename -in the database against the wildcard pattern. - - -@node iid invocation -@chapter @code{iid}: Complex interactive queries - -@pindex iid -@cindex interactive queries -@cindex complex queries - -@code{iid} is an interactive query utility for ID databases. It -operates by running another query program (@code{lid} by default, -@code{aid} if @samp{-a} is specified) and manipulating the sets of -filenames returned by these queries. - -@menu -* iid command line options:: Command-line options. -* iid query expressions:: Operands to the commands. -* iid commands:: Printing matching filenames, etc. -@end menu - - -@node iid command line options -@section @code{iid} command line options - -@cindex options for @code{iid} -@pindex iid @r{options} - -@code{iid} recognizes the following options (the standard query options -described in @ref{Query options} are inapplicable): - -@table @samp - -@item -a -@opindex -a -@pindex aid @r{used for @code{iid} searches} -Use @code{aid} for searches, instead of @code{lid}. - -@item -c@var{command} -@pindex -c -Execute @var{command} and exit, instead of prompting for interactive -commands. - -@item -H -@pindex -H -@cindex help for @code{iid} -Print a usage message and exit successfully. The @code{help} command -inside @code{iid} gives more information. @xref{iid commands}. - -@end table - - -@node iid query expressions -@section @code{iid} query expressions - -@cindex queries for @code{iid} -@pindex iid @r{query expressions} - -An @code{iid} @dfn{query expression} generates a set of filenames or -manipulates existing sets. These expressions are operands to some of -the @code{iid} commands (see the next section), not commands themselves. - -Here are the possible constructs, highest precedence first: - -@table @samp - -@item s@var{set-number} -Refer to a set previously created by a query operation. During each -@code{iid} session, every query generates a different set number, so -any previously generated set may be used as part of any new query by -reference to its set number. - -@item @var{pattern} -@code{iid} treats any non-keyword input (i.e., anything not in this -table) as an identifier to be searched for in the database. It is -passed to the search program (@code{lid} by default, @code{aid} if the -@code{-a} option was specified). The result of this operation is a set -of filenames, and it is assigned a unique set number. - -@item lid @var{identifier-list} -@cmindex lid @r{iid operator} -Invoke the @code{lid} program on @var{identifier-list} and construct a -new set from the result. - -@item aid @var{identifier-list} -@cmindex lid @r{iid operator} -Like @code{lid}, but use the @code{aid} program. - -@item match @var{wildcards} -@cmindex match @r{iid operator} -Invoke the @code{pid} program on @var{wildcards}, therefore matching on -the filenames in the database instead of the identifiers. The resulting -set contains the filenames that match the specified patterns. @xref{pid -invocation}. - -@item not @var{expr} -@cmindex not @r{iid operator} -The result is those filenames in the database that are not in -@var{expr}. - -@item @var{expr1} and @var{expr2} -@cmindex and @r{iid operator} -The result is the intersection of the sets @var{expr1} and @var{expr2}, -i.e., only those filenames contained in both. - -@item @var{expr1} or @var{expr2} -@cmindex or @r{iid operator} -The result is the union of the sets @var{expr1} and @var{expr2}, i.e., -all the filenames contained in either or both. - -@end table - -Operator names are recognized independent of case, so @code{AND}, -@code{and}, and @code{aNd} are all the same as far as @code{iid} is -concerned. - -To pass a keyword as an operand, you must enclose it in double quotes: -the command @samp{lid "lid"} generates the set of all filenames matching -the string @samp{lid}. - -Patterns containing shell metacharacters (such as @samp{*} or @samp{?}) -must also be properly quoted, since the query commands are run by -invoking them with the shell. - -@c Summary of query expression syntax: -@c -@c A @var{query} is: -@c @example -@c -@c -@c lid -@c aid -@c match -@c or -@c and -@c not -@c ( ) -@c @end example - - -@node iid commands -@section @code{iid} commands - -@cindex commands for @code{iid} -@pindex iid @r{commands} - -This section describes the interactive commands that @code{iid} -recognizes. The database query expressions you can pass to the -@samp{ss} and @samp{files} commands are described in the previous -section. - -Some commands output a @dfn{summary line} for sets. These lines show the -set number, the number of filenames in the set, and the command that -generated it. - -@table @samp - -@item ss @var{query} -@cmindex ss iid @r{command} -Build the set(s) of filenames resulting from the query expression -@var{query}. The output is a summary line for each set. - -@item files @var{query} -@itemx f @var{query} -@cmindex files iid @r{command} -@cmindex f iid @r{command} -Evaluate the query expression @var{query} as in @code{ss}, but output -the full list of matching filenames instead of a summary. - -@item sets -@cmindex sets iid @r{command} -Output a summary line for each extant set. - -@item show @var{set} -@itemx p @var{set} -@cmindex show iid @r{command} -@cmindex p iid @r{command} -@vindex PAGER -@pindex emacsclient -Pass the filename in the set number @var{set} to the program named in -the @code{PAGER} environment variable. Typically, this is a -page-at-a-time display program like @code{less} or @code{more}. If you -use Emacs, you might want to set @samp{PAGER} to @code{emacsclient} -(@pxref{Emacs Server,,, emacs, The GNU Emacs Manual}). - -@item @r{anything else} -@cindex shell commands in @code{iid} -When @code{iid} does not recognize the first word on an input line as a -builtin @code{iid} command, it assumes the input is a shell command -which will write a list of filenames to standard output, which it -gathers into a set as usual. - -Any set numbers that appear in the input are expanded into the lists of -filenames they represent prior to running the command. - -@item !@var{shell-command} -@cmindex ! iid @r{command} -@cindex shell escape -Expand set numbers appear in @var{shell-command} into the filenames they -represent, and pass the result to @file{/bin/sh}. The output is not -interpreted. - -@item begin @var{directory} -@itemx b @var{directory} -@cmindex begin iid @r{command} -@cmindex b iid @r{command} -Begin a new @code{iid} session in a different directory (which -presumably contains a different database). It deletes all the sets -created so far and switches to the specified directory. It is -equivalent to exiting @code{iid}, changing directories in the shell, and -running @code{iid} again. - -@item help -@itemx h -@itemx ? -@cmindex help iid @r{command} -@cmindex h iid @r{command} -@cmindex ? iid @r{command} -Display a short help file using the program named in @samp{PAGER}. - -@item quit -@itemx q -@itemx off -@cmindex quit iid @r{command} -@cmindex q iid @r{command} -@cmindex off iid @r{command} -Quit @code{iid}. An end-of-file character (usually @kbd{CTRL-D}) also exits. - -@end table - - -@node Index -@unnumbered Index - -@printindex cp - -@contents -@bye diff --git a/idarg.h b/idarg.h deleted file mode 100644 index 7570ebd..0000000 --- a/idarg.h +++ /dev/null @@ -1,33 +0,0 @@ -/* idarg.h -- defs for internal form of command-line arguments - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _idarg_h_ -#define _idarg_h_ - -struct idarg -{ - struct idarg *ida_next; - char *ida_arg; - int ida_index; - char ida_flags; -#define IDA_RELATIVE 0x01 /* file name is now relative (lid) */ -#define IDA_SCAN_ME 0x01 /* file should be scanned (mkid) */ -#define IDA_PREFIX_US 0x02 /* file has names with prefixed underscores */ -}; - -#endif /* not _idarg_h_ */ diff --git a/idfile.c b/idfile.c deleted file mode 100644 index f244a0f..0000000 --- a/idfile.c +++ /dev/null @@ -1,246 +0,0 @@ -/* idfile.c -- read & write mkid database file header - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include - -#include -#include "alloc.h" -#include "idfile.h" -#include "strxtra.h" - -typedef int (*iof_t) __P((FILE *, void *, unsigned int, int)); -static int io_idhead __P((FILE *fp, iof_t iof, struct idhead *idh)); -static int io_write __P((FILE *output_FILE, void *addr, unsigned int size, int is_int)); -static int io_read __P((FILE *input_FILE, void *addr, unsigned int size, int is_int)); -static int io_size __P((FILE *, void *, unsigned int size, int)); - -extern char *program_name; - -/* init_id_file opens the ID file, reads header fields into idh, - verifies the magic number and version, and reads the constituent - file names. Any errors are considered fatal and cause an exit. */ - -FILE * -init_id_file (char const *id_file_name, struct idhead *idh) -{ - FILE *id_FILE = maybe_init_id_file (id_file_name, idh); - if (id_FILE) - return id_FILE; - error (1, errno, "Can't open `%s'", id_file_name); - return NULL; -} - -/* maybe_init_id_file does everything that init_id_file does, but is - tolerant of errors opening the ID file, returning NULL in this case - (this is called from mkid where an ID might or might not already - exist). All other errors are considered fatal. */ - -FILE * -maybe_init_id_file (char const *id_file_name, struct idhead *idh) -{ - FILE *id_FILE; - unsigned int i; - char *strings; - struct idarg *ida; - - id_FILE = fopen (id_file_name, "r"); - if (id_FILE == NULL) - return NULL; - - read_idhead (id_FILE, idh); - if (idh->idh_magic[0] != IDH_MAGIC_0 || idh->idh_magic[1] != IDH_MAGIC_1) - error (1, 0, "`%s' is not an ID file! (bad magic #)", id_file_name); - if (idh->idh_version != IDH_VERSION) - error (1, 0, "`%s' is version %d, but I only grok version %d", - id_file_name, idh->idh_version, IDH_VERSION); - - fseek (id_FILE, idh->idh_args_offset, 0); - /* NEEDSWORK */ - fseek (id_FILE, idh->idh_files_offset, 0); - - i = idh->idh_tokens_offset - idh->idh_args_offset; - strings = malloc (i); - fread (strings, i, 1, id_FILE); - ida = *id_args = CALLOC (struct idarg, idh->idh_files); - for (i = 0; i < idh->idh_files; i++) - { - while (*strings == '+' || *strings == '-') - { - while (*strings++) - ; - } - ida->ida_flags = 0; - ida->ida_arg = strings; - ida->ida_next = ida + 1; - ida->ida_index = i; - ida++; - while (*strings++) - ; - } - (--ida)->ida_next = NULL; - return id_FILE; -} - - -unsigned long -file_link_hash_1 (void const *key) -{ - unsigned long result = 0; - ADDRESS_HASH_1 (((struct file_link const *) key)->fl_parent, result); - STRING_HASH_1 (((struct file_link const *) key)->fl_name, result); - return result; -} - -unsigned long -file_link_hash_2 (void const *key) -{ - unsigned long result = 0; - ADDRESS_HASH_2 (((struct file_link const *) key)->fl_parent, result); - STRING_HASH_2 (((struct file_link const *) key)->fl_name, result); - return result; -} - -int -file_link_hash_cmp (void const *x, void const *y) -{ - int result; - ADDRESS_CMP (((struct file_link const *) x)->fl_parent, - ((struct file_link const *) y)->fl_parent, result); - if (result) - return result; - STRING_CMP (((struct file_link const *) x)->fl_name, - ((struct file_link const *) y)->fl_name, result); - return result; -} - - -int -read_idhead (FILE *input_FILE, struct idhead *idh) -{ - return io_idhead (input_FILE, io_read, idh); -} - -int -write_idhead (FILE *input_FILE, struct idhead *idh) -{ - return io_idhead (input_FILE, io_write, idh); -} - -int -sizeof_idhead () -{ - return io_idhead (0, io_size, 0); -} - -static int -io_size (FILE *ignore_FILE, void *ignore_addr, unsigned int size, int ignore_int) -{ - return size; -} - -static int -io_read (FILE *input_FILE, void *addr, unsigned int size, int is_int) -{ - if (is_int) - { - switch (size) - { - case 4: /* This must be a literal 4. Don't use sizeof (unsigned long)! */ - *(unsigned long *)addr = getc (input_FILE); - *(unsigned long *)addr += getc (input_FILE) << 010; - *(unsigned long *)addr += getc (input_FILE) << 020; - *(unsigned long *)addr += getc (input_FILE) << 030; - break; - case 2: - *(unsigned short *)addr = getc (input_FILE); - *(unsigned short *)addr += getc (input_FILE) << 010; - break; - case 1: - *(unsigned char *)addr = getc (input_FILE); - break; - default: - fprintf (stderr, "Unsupported size in io_write (): %d\n", size); - abort (); - } - } - else if (size > 1) - fread (addr, size, 1, input_FILE); - else - *(char *)addr = getc (input_FILE); - return size; -} - -static int -io_write (FILE *output_FILE, void *addr, unsigned int size, int is_int) -{ - if (is_int) - { - switch (size) - { - case 4: /* This must be a literal 4. Don't use sizeof (unsigned long)! */ - putc (*(unsigned long *)addr, output_FILE); - putc (*(unsigned long *)addr >> 010, output_FILE); - putc (*(unsigned long *)addr >> 020, output_FILE); - putc (*(unsigned long *)addr >> 030, output_FILE); - break; - case 2: - putc (*(unsigned short *)addr, output_FILE); - putc (*(unsigned short *)addr >> 010, output_FILE); - break; - case 1: - putc (*(unsigned char *)addr, output_FILE); - break; - default: - fprintf (stderr, "Unsupported size in io_write (): %d\n", size); - abort (); - } - } - else if (size > 1) - fwrite (addr, size, 1, output_FILE); - else - putc (*(char *)addr, output_FILE); - return size; -} - -/* The sizes of the fields must be hard-coded. They aren't - necessarily the sizes of the struct members, because some - architectures don't have any way to declare 4-byte integers - (e.g., Cray) */ - -static int -io_idhead (FILE *fp, iof_t iof, struct idhead *idh) -{ - unsigned int size = 0; - unsigned char pad = 0; - if (fp) - fseek (fp, 0L, 0); - size += iof (fp, idh->idh_magic, 2, 0); - size += iof (fp, &pad, 1, 0); - size += iof (fp, &idh->idh_version, 1, 0); - size += iof (fp, &idh->idh_flags, 2, 1); - size += iof (fp, &idh->idh_links, 4, 1); - size += iof (fp, &idh->idh_files, 4, 1); - size += iof (fp, &idh->idh_tokens, 4, 1); - size += iof (fp, &idh->idh_buf_size, 4, 1); - size += iof (fp, &idh->idh_vec_size, 4, 1); - size += iof (fp, &idh->idh_args_offset, 4, 1); - size += iof (fp, &idh->idh_tokens_offset, 4, 1); - size += iof (fp, &idh->idh_end_offset, 4, 1); - return size; -} diff --git a/idfile.h b/idfile.h deleted file mode 100644 index be5b00e..0000000 --- a/idfile.h +++ /dev/null @@ -1,102 +0,0 @@ -/* idfile.h -- decls for ID file header and constituent file names - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _idfile_h_ -#define _idfile_h_ 1 - -#include -#include -#include "hash.h" - -#define IDFILE "ID" - -struct idhead -{ - unsigned char idh_magic[2]; -#define IDH_MAGIC_0 ('I'|0x80) -#define IDH_MAGIC_1 ('D'|0x80) - unsigned char idh_version; -#define IDH_VERSION 3 - unsigned short idh_flags; -#define IDH_COUNTS 0x0001 /* include occurrence counts for each token */ -#define IDH_FOLLOW_SL 0x0002 /* follow symlinks to directories */ -#define IDH_COMMENTS 0x0004 /* include tokens found in comments */ -#define IDH_LOCALS 0x0008 /* include names of formal params & local vars */ -#define IDH_DECL_DEFN_USE 0x0100 /* include decl/defn/use info */ -#define IDH_L_R_VALUE 0x0200 /* include lvalue/rvalue info */ -#define IDH_CALL_ER_EE 0x0400 /* include caller/callee relationship info */ - unsigned long idh_links; /* total # of file name components */ - unsigned long idh_files; /* total # of constituent source files */ - unsigned long idh_tokens; /* total # of constituent tokens */ - /* idh_*_size: max buffer-sizes for ID file reading programs */ - unsigned long idh_buf_size; /* # of bytes in longest entry */ - unsigned long idh_vec_size; /* # of hits in longest entry */ - unsigned long idh_path_size; /* # of bytes in longest file name path */ - /* idh_*_offset: ID file offsets for start of various sections */ - long idh_args_offset; /* command-line options section */ - long idh_files_offset; /* constituent file & directory names section */ - long idh_tokens_offset; /* constituent tokens section */ - long idh_end_offset; /* end of tokens section */ - /* */ - struct hash_table ia_link_table; /* all file and dir name name links */ - struct arg_file **ia_file_order; /* sequence in ID file */ - struct arg_file **ia_scan_order; /* sequence in summaries */ -}; - -struct file_link -{ - struct file_link *fl_parent; - unsigned char fl_flags; -#define FL_IS_ARG 0x01 /* is an explicit command-line argument */ -#define FL_SYM_LINK 0x02 /* is a symlink (only used for dirs) */ -#define FL_TYPE_MASK 0x10 -# define FL_TYPE_DIR 0x00 -# define FL_TYPE_FILE 0x10 - char fl_name[1]; -}; - -struct arg_file -{ - struct file_link *af_name; - short af_old_index; /* order in extant ID file */ - short af_new_index; /* order in new ID file */ - short af_scan_index; /* order of scanning in summary */ -}; - -#if HAVE_LINK - -/* If the system supports filesystem links (e.g., any UN*X variant), - we should detect file name aliases. */ - -struct dev_ino -{ - dev_t di_dev; - ino_t di_ino; - struct file_link *di_file_link; -}; - -extern struct hash_table dev_ino_table; - -#endif - -FILE *init_id_file __P((char const *id_file, struct idhead *idh)); -int read_idhead __P((FILE *input_FILE, struct idhead *idh)); -int write_idhead __P((FILE *input_FILE, struct idhead *idh)); -int sizeof_idhead __P((void)); - -#endif /* not _idfile_h_ */ diff --git a/idx.c b/idx.c deleted file mode 100644 index 2af1039..0000000 --- a/idx.c +++ /dev/null @@ -1,95 +0,0 @@ -/* static char copyright[] = "@(#)Copyright (c) 1986, Greg McGary"; - static char sccsid[] = "@(#)idx.c 1.2 86/10/17"; */ - -#include -#include - -#include -#include "misc.h" -#include "filenames.h" -#include "scanners.h" - -void idxtract __P((char *path)); - -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "\ -Usage: %s [(+|-)S] files\n\ - -S- Pass arg to scanner\n\ - -S.= Scan files with . as \n\ - -S? Print usage documentation for \n", - program_name); - - exit (1); -} - -int -main (int argc, char **argv) -{ - char *arg; - int op; - - program_name = basename ((argc--, *argv++)); - - init_scanners (); - - while (argc) - { - arg = (argc--, *argv++); - switch (op = *arg++) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - switch (*arg++) - { - case 'S': - set_scan_args (op, arg); - break; - default: - usage (); - } - } -argsdone: - - if (argc == 0) - usage (); - - while (argc) - idxtract ((argc--, *argv++)); - - return 0; -} - -void -idxtract (char *file_name) -{ - char const *key; - FILE *source_FILE; - int flags; - char const *suffix; - char const *filter; - char const *lang_name; - get_token_t scanner; - - suffix = strrchr (file_name, '.'); - lang_name = get_lang_name (suffix); - scanner = get_scanner (lang_name); - if (scanner == NULL) - return; - source_FILE = open_source_FILE (file_name, filter = get_filter (suffix)); - if (source_FILE == NULL) - return; - - while ((key = (*scanner) (source_FILE, &flags)) != NULL) - puts (key); - - close_source_FILE (source_FILE, filter); -} diff --git a/iid.1 b/iid.1 deleted file mode 100644 index 0cc256f..0000000 --- a/iid.1 +++ /dev/null @@ -1,235 +0,0 @@ -.TH IID 1 -.SH NAME -iid \- interactive query for ID database -.SH SYNOPSIS -.PP -.B iid -.RB [ \-a] -.RB [ \-c \^command] -.RB [ \-H] -.SH DESCRIPTION -This command provides an interactive query interface to the -.I ID -database. -.I Iid\^ -allows you to query an -.I ID -database in a fashion similar to using \fIDIALOG\fP. Any individual -query command results in a list of files that satisfy that query, -each set of files is retained by -.I iid -and assigned a set number. The sets may be combined with -.IR AND , -.I OR -and -.I NOT -operators to produce additional sets. The primitive operators that -produce sets are invocations of the -.I lid -or -.I aid -programs. -.SH OPTIONS -Normally -.I iid -runs interactively. Options may be used to run it in batch mode. -.TP 8 -.B \-a -Use the -.I aid -program as the default query program, normally -.I lid -is used. -.TP 8 -.B \-c -Accept a single command as an argument, run that command, and exit -.IR Iid . -.TP -.B \-H -Print a brief help message and exit. -.SH SUBCOMMANDS -The subcommands are used to carry on a dialog with -.I iid -after invoking the program. -.PP -Two basic query commands are available: -.B SS -and -.BR FILES . -The -.B SS -command shows the sets generated by a query, but does not display -the actual file names that satisfy the query. -The -.B FILES -command only displays the list of files, it does not show any -of the sets created during the query. -.PP -Queries consist of keywords and identifier strings. The keywords are: -.B and or not lid aid match -and -.B s -where -.B s -is a set number consisting of the letter -.B s -followed (with no space) by a decimal set number. -A clause of the form -.B lid -invokes -.I lid -with the -.B -as arguments and produces a set of files as a result. -Substituting -.B aid -for -.B lid -runs the -.I aid -program to generate the list of files. -As a shorthand notation for -.B lid -you may simply use -.B . -The -.B match -operator runs the standard system -.I ls -utility to produce a set of files. This allows sets to be -constructed based on the names of files (using wild cards) -rather than contents. -The -.B and or -and -.B not -operators can be used to combine sets in the obvious fashion. -If you need to pass any of the keywords as actual arguments to -programs, or if the search strings contain any shell escape -characters place the argument in quotes. -.PP -The -.B NOT -operator has highest precedence, followed by -.B AND -and -.B OR -in that order. Parenthesis may be used for grouping. -.PP -The remaining commands are: -.PP -.B BEGIN -accepts a directory name and switches to that directory. By changing -directories you control which -.I ID -database is searched. Changing directories automatically deletes -all the sets constructed so far. The -.B BEGIN -command may be abbreviated as -.BR B . -.PP -.B SETS -shows the description of all the sets created so far. Each set -description has the set number, the number of files in the set, -and a symbolic description of the query that created the set. -.PP -.B SHOW -runs a pager program, passing as arguments all the files in -the specified set. The pager program comes from the -.B $PAGER -environment variable. This command may be abbreviated -.BR P . -.PP -.B HELP -runs the pager on the help file. The commands -.B H -and -.B ? -also act as help commands. -.PP -.B OFF -exits the program. -.B Q -is short for -.BR OFF . -.PP -All commands and keywords are case insensitive, so that -.B SHOW ShOW -and -.B show -all work equally well. -.SH INTERFACE -Two forms of commands are provided for interface with arbitrary -programs. Any command that is not recognized as one -of the above built in -.I iid -commands, is assumed to be a program which, when run, will print -a list of file names. -.I Iid -runs the command as typed, and records the output as a new set -which may be combined with other sets in subsequent queries. -.PP -If the command starts with a -.BR !, -.I iid -strips off the leading -.B ! -and simply runs the command. Any output goes to stdout and -is not recorded as a set. -.PP -In both types of shell commands, any set numbers specified as -arguments are expanded into a list of file names before running -the command. -.SH EXAMPLE -.nf -.ft L -===> iid -iid> ss lid "^get" or lid "Arg$" - S0 14 lid -kmn "^get" - S1 3 lid -kmn "Arg$" - S2 15 (lid -kmn "^get") OR (lid -kmn "Arg$") -iid> f s1 -lid.c -paths.c -init.c -iid> off -.FT P -.fi -.EX off -.PP -In this example the -.B ss -command displays the sets it creates as it -does the parts of the query. In this case 3 sets are created, set S0 -has 14 files in it, set S1 has 3 files and the union of the two sets, -S2, has 15 files. A description of the query that created any given -set is kept along with the set and displayed when sets are printed. -.PP -The -.B f s1 -command lists the three files in set S1. -.PP -The -.B off -command terminates the example session. -.SH HINTS -The shell interface commands can be used to generate file sets by -running the -.I find -or -.I ls -utilities, or compiles of a selected group of files can be done -using the -.BR ! cc -command with a set number as the argument. -.BR ! lp -can be used to print a selected group of files. -.PP -This program interfaces nicely with -.I emacs -if you run the server program and specify the client program -as your $PAGER. -.SH SEE ALSO -mkid(1), -lid(1), -aid(1). diff --git a/iid.c b/iid.c deleted file mode 100644 index 61edcca..0000000 --- a/iid.c +++ /dev/null @@ -1,2329 +0,0 @@ - -/* A Bison parser, made from ./iid.y with Bison version GNU Bison version 1.22 - */ - -#define YYBISON 1 /* Identify Bison output. */ - -#define SET 258 -#define ID 259 -#define SHELL_QUERY 260 -#define SHELL_COMMAND 261 -#define LID 262 -#define AID 263 -#define BEGIN 264 -#define SETS 265 -#define SS 266 -#define FILES 267 -#define SHOW 268 -#define HELP 269 -#define OFF 270 -#define MATCH 271 -#define OR 272 -#define AND 273 -#define NOT 274 - -#line 1 "./iid.y" - -/* iid.y -- interactive mkid query language - Copyright (C) 1991 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include -#include -#include - -#include -#include "strxtra.h" -#include "obstack.h" -#include "xmalloc.h" - -FILE *popen (); - -#define obstack_chunk_alloc xmalloc -#define obstack_chunk_free free - -#if HAVE_ALLOCA - -#if HAVE_ALLOCA_H -#include -#endif -#define TEMP_ALLOC(s) alloca(s) -#define TEMP_FREE(s) - -#else /* not HAVE_ALLOCA */ - -#define TEMP_ALLOC(s) malloc(s) -#define TEMP_FREE(s) free(s) - -#endif /* not HAVE_ALLOCA */ - -#define HASH_SIZE 947 /* size of hash table for file names */ -#define INIT_FILES 8000 /* start with bits for this many */ -#define INIT_SETSPACE 500 /* start with room for this many */ -#define MAXCMD 1024 /* input command buffer size */ - -#define MAX(a,b) (((a)<(b))?(b):(a)) -#define MIN(a,b) (((a)>(b))?(b):(a)) - -#ifndef PAGER -#define PAGER "pg" -#endif - -#define PROMPT "iid> " - -/* set_type is the struct defining a set of file names - * The file names are stored in a symbol table and assigned - * unique numbers. The set is a bit set of file numbers. - * One of these set structs is calloced for each new set - * constructed, the size allocated depends on the max file - * bit number. An array of pointers to sets are kept to - * represent the complete set of sets. - */ - -struct set_struct { - char * set_desc ; /* string describing the set */ - int set_num ; /* the set number */ - int set_size ; /* number of long words in set */ - unsigned long int set_tail ; /* set extended with these bits */ - unsigned long int set_data[1] ;/* the actual set data (calloced) */ -} ; -typedef struct set_struct set_type ; - -/* id_type is one element of an id_list - */ - -struct id_struct { - struct id_struct * next_id ; /* Linked list of IDs */ - char id [ 1 ] ; /* calloced data holding id string */ -} ; -typedef struct id_struct id_type ; - -/* id_list_type is used during parsing to build lists of - * identifiers that will eventually represent arguments - * to be passed to the database query programs. - */ - -struct id_list_struct { - int id_count ; /* count of IDs in the list */ - id_type * * end_ptr_ptr ;/* pointer to link word at end of list */ - id_type * id_list ; /* pointer to list of IDs */ -} ; -typedef struct id_list_struct id_list_type ; - -/* symtab_type is used to record file names in the symbol table. - */ -struct symtab_struct { - struct symtab_struct * hash_link ; /* list of files with same hash code */ - int mask_word ; /* word in bit vector */ - unsigned long mask_bit ; /* bit in word */ - char name [ 1 ] ; /* the file name */ -} ; -typedef struct symtab_struct symtab_type ; - -/* LidCommand is the command to run for a Lid_group. It is set - * to "lid -kmn" if explicitly preceeded by "lid", otherwise - * it is the default command which is determined by an option. - */ -char const * LidCommand ; - -/* DefaultCommand is the default command for a Lid_group. If - * the -a option is given to iid, it is set to use 'aid'. - */ -char const * DefaultCommand = "lid -kmn" ; - -/* FileList is a lexically ordered list of file symbol table - * pointers. It is dynamically expanded when necessary. - */ -symtab_type * * FileList = NULL ; - -/* FileSpace is the number of long ints in TheFiles array. - */ -int FileSpace = 0 ; - -/* HashTable is the symbol table used to store file names. Each - * new name installed is assigned the next consecutive file number. - */ -symtab_type * HashTable [ HASH_SIZE ] ; - -/* HelpSet is a dummy set containing only one bit set which corresponds - * to the help file name. Simply a cheesy way to maximize sharing of - * the code that runs the pager. - */ -set_type * HelpSet ; - -/* high_bit is a unsigned long with the most significant bit set. - */ -unsigned long high_bit ; - -/* ListSpace is the amount of space avail in the FileList. - */ -int ListSpace = 0 ; - -/* MaxCurFile - max word that has any bit currently set in the - * TheFiles array. - */ -int MaxCurFile = 0 ; - -/* NextFileNum is the file number that will be assigned to the next - * new file name seen when it is installed in the symtab. - */ -int NextFileNum = 0 ; - -/* NextMaskBit is the bit within the next mask word that will - * correspond to the next file added to the symbol table. - */ -unsigned long NextMaskBit ; - -/* NextMaskWord is the next word number to be assigned to a file - * bit mask entry. - */ -int NextMaskWord = 0 ; - -/* NextSetNum is the number that will be assigned to the next set - * created. Starts at 0 because I am a C programmer. - */ -int NextSetNum = 0 ; - -/* The PAGER program to run on a SHOW command. - */ -char Pager[MAXCMD] ; - -/* Prompt - the string to use for a prompt. - */ -char Prompt[MAXCMD] ; - -/* SetSpace is the number of pointers available in TheSets. TheSets - * is realloced when we run out of space. - */ -int SetSpace = 0 ; - -/* TheFiles is a bit set used to construct the initial set of files - * generated while running one of the subprograms. It is copied to - * the alloced set once we know how many bits are set. - */ -unsigned long * TheFiles = NULL ; - -/* TheSets is a dynamically allocated array of pointers pointing - * the sets that have been allocated. It represents the set of - * sets. - */ -set_type * * TheSets = NULL ; - -/* VerboseQuery controls the actions of the semantic routines during - * the process of a query. If TRUE the sets are described as they - * are constructed. - */ -int VerboseQuery ; - -char const *program_name ; - -int yyerror __P(( char const * s )) ; -void ScanInit __P(( char * line )) ; -int yylex __P(( void )) ; -int ArgListSize __P(( id_list_type * idlp )) ; -int SetListSize __P(( set_type * sp )) ; -void FlushFiles __P(( void )) ; -void fatal __P(( char const * s )) ; -int CountBits __P(( set_type * sp )) ; -void OneDescription __P(( set_type * sp )) ; -void DescribeSets __P(( void )) ; -id_list_type * SetList __P(( id_list_type * idlp , set_type * sp )) ; -void PrintSet __P(( set_type * sp )) ; -void FlushSets __P(( void )) ; -id_list_type * InitList __P(( void )) ; -id_list_type * ExtendList __P(( id_list_type * idlp , id_type * idp )) ; -void InitIid __P(( void )) ; -symtab_type * InstallFile __P(( char const * fp )) ; -void RunPager __P(( char * pp , set_type * sp )) ; -void AddSet __P(( set_type * sp )) ; -set_type * RunProg __P(( char const * pp , id_list_type * idlp )) ; -void SetDirectory __P(( id_type * dir )) ; -set_type * SetIntersect __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetUnion __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetInverse __P(( set_type * sp )) ; -void RunShell __P(( char * pp , id_list_type * idlp )) ; - - -#line 240 "./iid.y" -typedef union { - set_type * setdef ; - id_type * strdef ; - id_list_type * listdef ; -} YYSTYPE; - -#ifndef YYLTYPE -typedef - struct yyltype - { - int timestamp; - int first_line; - int first_column; - int last_line; - int last_column; - char *text; - } - yyltype; - -#define YYLTYPE yyltype -#endif - -#include - -#ifndef __cplusplus -#ifndef __STDC__ -#define const -#endif -#endif - - - -#define YYFINAL 46 -#define YYFLAG -32768 -#define YYNTBASE 22 - -#define YYTRANSLATE(x) ((unsigned)(x) <= 274 ? yytranslate[x] : 31) - -static const char yytranslate[] = { 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, - 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19 -}; - -#if YYDEBUG != 0 -static const short yyprhs[] = { 0, - 0, 3, 6, 9, 12, 14, 16, 18, 21, 24, - 26, 28, 30, 34, 38, 41, 43, 45, 47, 50, - 54, 56, 59, 62, 64, 66, 69, 72, 74 -}; - -static const short yyrhs[] = { 9, - 4, 0, 23, 25, 0, 24, 25, 0, 13, 3, - 0, 10, 0, 14, 0, 15, 0, 5, 29, 0, - 6, 29, 0, 11, 0, 12, 0, 26, 0, 25, - 18, 25, 0, 25, 17, 25, 0, 19, 25, 0, - 3, 0, 27, 0, 28, 0, 16, 30, 0, 20, - 25, 21, 0, 4, 0, 7, 30, 0, 8, 30, - 0, 4, 0, 3, 0, 29, 4, 0, 29, 3, - 0, 4, 0, 30, 4, 0 -}; - -#endif - -#if YYDEBUG != 0 -static const short yyrline[] = { 0, - 266, 274, 275, 281, 287, 293, 299, 303, 310, 319, - 328, 337, 344, 353, 362, 373, 380, 389, 398, 406, - 414, 423, 432, 441, 449, 456, 462, 470, 478 -}; - -static const char * const yytname[] = { "$","error","$illegal.","SET","ID", -"SHELL_QUERY","SHELL_COMMAND","LID","AID","BEGIN","SETS","SS","FILES","SHOW", -"HELP","OFF","MATCH","OR","AND","NOT","'('","')'","Command","Set_query","File_query", -"Query","Primitive","Lid_group","Aid_group","Command_list","Id_list","" -}; -#endif - -static const short yyr1[] = { 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, - 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, - 27, 27, 28, 29, 29, 29, 29, 30, 30 -}; - -static const short yyr2[] = { 0, - 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, - 1, 1, 3, 3, 2, 1, 1, 1, 2, 3, - 1, 2, 2, 1, 1, 2, 2, 1, 2 -}; - -static const short yydefact[] = { 0, - 0, 0, 0, 5, 10, 11, 0, 6, 7, 0, - 0, 25, 24, 8, 9, 1, 4, 16, 21, 0, - 0, 0, 0, 0, 2, 12, 17, 18, 3, 27, - 26, 28, 22, 23, 19, 15, 0, 0, 0, 29, - 20, 14, 13, 0, 0, 0 -}; - -static const short yydefgoto[] = { 44, - 10, 11, 25, 26, 27, 28, 14, 33 -}; - -static const short yypact[] = { 10, - 5, 5, 22,-32768,-32768,-32768, 28,-32768,-32768, -2, - -2,-32768,-32768, 7, 7,-32768,-32768,-32768,-32768, 30, - 30, 30, -2, -2, 12,-32768,-32768,-32768, 12,-32768, --32768,-32768, 31, 31, 31,-32768, -14, -2, -2,-32768, --32768, 18,-32768, 37, 38,-32768 -}; - -static const short yypgoto[] = {-32768, --32768,-32768, -11,-32768,-32768,-32768, 39, 11 -}; - - -#define YYLAST 41 - - -static const short yytable[] = { 29, - 18, 19, 38, 39, 20, 21, 41, 12, 13, 30, - 31, 36, 37, 22, 1, 2, 23, 24, 3, 4, - 5, 6, 7, 8, 9, 16, 42, 43, 38, 39, - 17, 34, 35, 32, 40, 39, 45, 46, 0, 0, - 15 -}; - -static const short yycheck[] = { 11, - 3, 4, 17, 18, 7, 8, 21, 3, 4, 3, - 4, 23, 24, 16, 5, 6, 19, 20, 9, 10, - 11, 12, 13, 14, 15, 4, 38, 39, 17, 18, - 3, 21, 22, 4, 4, 18, 0, 0, -1, -1, - 2 -}; -/* -*-C-*- Note some compilers choke on comments on `#line' lines. */ -#line 3 "/usr/lib/bison.simple" - -/* Skeleton output parser for bison, - Copyright (C) 1984, 1989, 1990 Bob Corbett and Richard Stallman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - - -#ifndef alloca -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not GNU C. */ -#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) -#include -#else /* not sparc */ -#if defined (MSDOS) && !defined (__TURBOC__) -#include -#else /* not MSDOS, or __TURBOC__ */ -#if defined(_AIX) -#include - #pragma alloca -#else /* not MSDOS, __TURBOC__, or _AIX */ -#ifdef __hpux -#ifdef __cplusplus -extern "C" { -void *alloca (unsigned int); -}; -#else /* not __cplusplus */ -void *alloca (); -#endif /* not __cplusplus */ -#endif /* __hpux */ -#endif /* not _AIX */ -#endif /* not MSDOS, or __TURBOC__ */ -#endif /* not sparc. */ -#endif /* not GNU C. */ -#endif /* alloca not defined. */ - -/* This is the parser code that is written into each bison parser - when the %semantic_parser declaration is not specified in the grammar. - It was written by Richard Stallman by simplifying the hairy parser - used when %semantic_parser is specified. */ - -/* Note: there must be only one dollar sign in this file. - It is replaced by the list of actions, each action - as one case of the switch. */ - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY -2 -#define YYEOF 0 -#define YYACCEPT return(0) -#define YYABORT return(1) -#define YYERROR goto yyerrlab1 -/* Like YYERROR except do call yyerror. - This remains here temporarily to ease the - transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ -#define YYFAIL goto yyerrlab -#define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(token, value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { yychar = (token), yylval = (value); \ - yychar1 = YYTRANSLATE (yychar); \ - YYPOPSTACK; \ - goto yybackup; \ - } \ - else \ - { yyerror ("syntax error: cannot back up"); YYERROR; } \ -while (0) - -#define YYTERROR 1 -#define YYERRCODE 256 - -#ifndef YYLEX -#ifndef YYPURE -#define YYLEX yylex() -#else -#ifdef YYLSP_NEEDED -#define YYLEX yylex(&yylval, &yylloc) -#else -#define YYLEX yylex(&yylval) -#endif -#endif -#endif - -/* If nonreentrant, generate the variables here */ - -#ifndef YYPURE - -int yychar; /* the lookahead symbol */ -YYSTYPE yylval; /* the semantic value of the */ - /* lookahead symbol */ - -#ifdef YYLSP_NEEDED -YYLTYPE yylloc; /* location data for the lookahead */ - /* symbol */ -#endif - -int yynerrs; /* number of parse errors so far */ -#endif /* not YYPURE */ - -#if YYDEBUG != 0 -int yydebug; /* nonzero means print parse trace */ -/* Since this is uninitialized, it does not stop multiple parsers - from coexisting. */ -int yydebug_reducing = 0; -#endif - -/* YYINITDEPTH indicates the initial size of the parser's stacks */ - -#ifndef YYINITDEPTH -#define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH is the maximum size the stacks can grow to - (effective only if the built-in stack extension method is used). */ - -#if YYMAXDEPTH == 0 -#undef YYMAXDEPTH -#endif - -#ifndef YYMAXDEPTH -#define YYMAXDEPTH 10000 -#endif - -/* Prevent warning if -Wstrict-prototypes. */ -#ifdef __GNUC__ -int yyparse (void); -#endif - -#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ -#define __yy_bcopy(FROM,TO,COUNT) __builtin_memcpy(TO,FROM,COUNT) -#else /* not GNU C or C++ */ -#ifndef __cplusplus - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_bcopy (from, to, count) - char *from; - char *to; - int count; -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#else /* __cplusplus */ - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_bcopy (char *from, char *to, int count) -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#endif -#endif - -#line 185 "/usr/lib/bison.simple" -int -yyparse() -{ - register int yystate; - register int yyn; - register short *yyssp; - register YYSTYPE *yyvsp; - int yyerrstatus; /* number of tokens to shift before error messages enabled */ - int yychar1 = 0; /* lookahead token as an internal (translated) token number */ - - short yyssa[YYINITDEPTH]; /* the state stack */ - YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ - - short *yyss = yyssa; /* refer to the stacks thru separate pointers */ - YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ - -#ifdef YYLSP_NEEDED - YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ - YYLTYPE *yyls = yylsa; - YYLTYPE *yylsp; - -#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) -#else -#define YYPOPSTACK (yyvsp--, yyssp--) -#endif - - int yystacksize = YYINITDEPTH; - -#ifdef YYPURE - int yychar; - YYSTYPE yylval; - int yynerrs; -#ifdef YYLSP_NEEDED - YYLTYPE yylloc; -#endif -#endif - - YYSTYPE yyval; /* the variable used to return */ - /* semantic values from the action */ - /* routines */ - - int yylen; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Starting parse\n"); -#else /* __cplusplus */ - clog << "Starting parse" << endl; -#endif /* __cplusplus */ -#endif - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss - 1; - yyvsp = yyvs; -#ifdef YYLSP_NEEDED - yylsp = yyls; -#endif - -/* Push a new state, which is found in yystate . */ -/* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. */ -yynewstate: - - *++yyssp = yystate; - - if (yyssp >= yyss + yystacksize - 1) - { - /* Give user a chance to reallocate the stack */ - /* Use copies of these so that the &'s don't force the real ones into memory. */ - YYSTYPE *yyvs1 = yyvs; - short *yyss1 = yyss; -#ifdef YYLSP_NEEDED - YYLTYPE *yyls1 = yyls; -#endif - - /* Get the current used size of the three stacks, in elements. */ - int size = yyssp - yyss + 1; - -#ifdef yyoverflow - /* Each stack pointer address is followed by the size of - the data in use in that stack, in bytes. */ -#ifdef YYLSP_NEEDED - /* This used to be a conditional around just the two extra args, - but that might be undefined if yyoverflow is a macro. */ - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yyls1, size * sizeof (*yylsp), - &yystacksize); -#else - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yystacksize); -#endif - - yyss = yyss1; yyvs = yyvs1; -#ifdef YYLSP_NEEDED - yyls = yyls1; -#endif -#else /* no yyoverflow */ - /* Extend the stack our own way. */ - if (yystacksize >= YYMAXDEPTH) - { - yyerror("parser stack overflow"); - return 2; - } - yystacksize *= 2; - if (yystacksize > YYMAXDEPTH) - yystacksize = YYMAXDEPTH; - yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); - __yy_bcopy ((char *)yyss1, (char *)yyss, size * sizeof (*yyssp)); - yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp)); - __yy_bcopy ((char *)yyvs1, (char *)yyvs, size * sizeof (*yyvsp)); -#ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp)); - __yy_bcopy ((char *)yyls1, (char *)yyls, size * sizeof (*yylsp)); -#endif -#endif /* no yyoverflow */ - - yyssp = yyss + size - 1; - yyvsp = yyvs + size - 1; -#ifdef YYLSP_NEEDED - yylsp = yyls + size - 1; -#endif - -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Stack size increased to %d\n", yystacksize); -#else /* __cplusplus */ - clog << "Stack size increased to " << yystacksize << endl; -#endif /* __cplusplus */ -#endif - - if (yyssp >= yyss + yystacksize - 1) - YYABORT; - } - -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Entering state %d\n", yystate); -#else /* __cplusplus */ - clog << "Entering state " << yystate << endl; -#endif /* __cplusplus */ -#endif - - goto yybackup; - yybackup: - -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -/* yyresume: */ - - /* First try to decide what to do without reference to lookahead token. */ - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* yychar is either YYEMPTY or YYEOF - or a valid token in external form. */ - - if (yychar == YYEMPTY) - { -#if YYDEBUG != 0 - if (yydebug >= 3) -#ifndef __cplusplus - fprintf(stderr, "Reading a token: "); -#else /* __cplusplus */ - clog << "Reading a token: "; -#endif /* __cplusplus */ -#endif - yychar = YYLEX; - } - - /* Convert token to internal form (in yychar1) for indexing tables with */ - - if (yychar <= 0) /* This means end of input. */ - { - yychar1 = 0; - yychar = YYEOF; /* Don't call YYLEX any more */ - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Now at end of input.\n"); -#else /* __cplusplus */ - clog << "Now at end of input." << endl; -#endif /* __cplusplus */ -#endif - } - else - { - yychar1 = YYTRANSLATE(yychar); - -#if YYDEBUG != 0 - if (yydebug >= 3) - { -#ifndef __cplusplus - fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Next token is " << yychar << " (" << yytname[yychar1]; -#endif /* __cplusplus */ -#ifdef YYPRINT -#ifndef __cplusplus - YYPRINT (stderr, yychar, yylval); -#else /* __cplusplus */ - YYPRINT (yychar, yylval); -#endif /* __cplusplus */ -#endif -#ifndef __cplusplus - fprintf (stderr, ")\n"); -#else /* __cplusplus */ - clog << ')' << endl; -#endif /* __cplusplus */ - } -#endif - } - - yyn += yychar1; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) - goto yydefault; - - yyn = yytable[yyn]; - - /* yyn is what to do for this token type in this state. - Negative => reduce, -yyn is rule number. - Positive => shift, yyn is new state. - New state is final state => don't bother to shift, - just return success. - 0, or most negative number => error. */ - - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrlab; - - if (yyn == YYFINAL) - YYACCEPT; - - /* Shift the lookahead token. */ - -#if YYDEBUG != 0 - if (yydebug) - { - if (yydebug_reducing) - { -#ifndef __cplusplus - fprintf(stderr, "\nShift:"); -#else /* __cplusplus */ - clog << endl << "Shift:"; -#endif /* __cplusplus */ - yydebug_reducing = 0; - } - if (yydebug >= 2) -#ifndef __cplusplus - fprintf (stderr, "Shifting token %d: %s", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Shifting token " << yychar << ": " << yytname[yychar1]; -#endif /* __cplusplus */ - else -#ifndef __cplusplus - fprintf (stderr, " %s", yytname[yychar1]); -#else /* __cplusplus */ - clog << ' ' << yytname[yychar1]; -#endif /* __cplusplus */ -#ifdef YYPRINT -#ifndef __cplusplus - YYPRINT (stderr, yychar, yylval); -#else /* __cplusplus */ - YYPRINT (yychar, yylval); -#endif /* __cplusplus */ -#endif - if (yydebug >= 2) -#ifndef __cplusplus - fputc ('\n', stderr); -#else /* __cplusplus */ - clog << endl; -#endif /* __cplusplus */ - } -#endif - - /* Discard the token being shifted unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - /* count tokens shifted since error; after three, turn off error status. */ - if (yyerrstatus) yyerrstatus--; - - yystate = yyn; - goto yynewstate; - -/* Do the default action for the current state. */ -yydefault: - - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - -/* Do a reduction. yyn is the number of a rule to reduce with. */ -yyreduce: - yylen = yyr2[yyn]; - if (yylen > 0) - yyval = yyvsp[1-yylen]; /* implement default value of the action */ - -#if YYDEBUG != 0 - if (yydebug) - { - int i; - if (!yydebug_reducing) - { -#ifndef __cplusplus - fputc('\n', stderr); -#else /* __cplusplus */ - clog << endl; -#endif /* __cplusplus */ - yydebug_reducing = 1; - } - if (yydebug >= 2) -#ifndef __cplusplus - fprintf (stderr, "Reducing via rule %d (line %d): ", yyn, yyrline[yyn]); -#else /* __cplusplus */ - clog << "Reducing via rule " << yyn << " (line " << yyrline[yyn] << " ): "; -#endif /* __cplusplus */ - else -#ifndef YYFILE -#define YYFILE "" -#endif -#ifndef __cplusplus - fprintf (stderr, YYFILE ":%d: ", yyrline[yyn]); -#else /* __cplusplus */ - clog << YYFILE ":" << yyrline[yyn] << ": "; -#endif /* __cplusplus */ - - /* Print the symbols being reduced, and their result. */ -#ifdef __cplusplus - clog << yytname[yyr1[yyn]] << " <-"; -#endif /* __cplusplus */ - for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) -#ifndef __cplusplus - fprintf (stderr, "%s ", yytname[yyrhs[i]]); - fprintf (stderr, "-> %s\n", yytname[yyr1[yyn]]); -#else /* __cplusplus */ - clog << ' ' << yytname[yyrhs[i]]; - clog << endl; -#endif /* __cplusplus */ - } -#endif - - - switch (yyn) { - -case 1: -#line 268 "./iid.y" -{ - /* cd to the directory specified as argument, flush sets */ - - SetDirectory(yyvsp[0]. strdef ) ; - FlushSets() ; - ; - break;} -case 3: -#line 276 "./iid.y" -{ - /* print the list of files resulting from Query */ - - PrintSet(yyvsp[0]. setdef ) ; - ; - break;} -case 4: -#line 282 "./iid.y" -{ - /* run PAGER on the list of files in SET */ - - RunPager(Pager, yyvsp[0]. setdef ) ; - ; - break;} -case 5: -#line 288 "./iid.y" -{ - /* describe sets created so far */ - - DescribeSets() ; - ; - break;} -case 6: -#line 294 "./iid.y" -{ - /* run PAGER on the help file */ - - RunPager(Pager, HelpSet) ; - ; - break;} -case 7: -#line 300 "./iid.y" -{ - exit(0) ; - ; - break;} -case 8: -#line 304 "./iid.y" -{ - /* run the shell command and eat the results as a file set */ - - OneDescription(RunProg(yyvsp[-1]. strdef ->id, yyvsp[0]. listdef )) ; - free(yyvsp[-1]. strdef ) ; - ; - break;} -case 9: -#line 311 "./iid.y" -{ - /* run the shell command */ - - RunShell(yyvsp[-1]. strdef ->id, yyvsp[0]. listdef ) ; - free(yyvsp[-1]. strdef ) ; - ; - break;} -case 10: -#line 321 "./iid.y" -{ - /* Turn on verbose query flag */ - - VerboseQuery = 1 ; - ; - break;} -case 11: -#line 330 "./iid.y" -{ - /* Turn off verbose query flag */ - - VerboseQuery = 0 ; - ; - break;} -case 12: -#line 339 "./iid.y" -{ - /* value of query is set associated with primitive */ - - yyval. setdef = yyvsp[0]. setdef ; - ; - break;} -case 13: -#line 345 "./iid.y" -{ - /* value of query is intersection of the two query sets */ - - yyval. setdef = SetIntersect(yyvsp[-2]. setdef , yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 14: -#line 354 "./iid.y" -{ - /* value of query is union of the two query sets */ - - yyval. setdef = SetUnion(yyvsp[-2]. setdef , yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 15: -#line 363 "./iid.y" -{ - /* value of query is inverse of other query */ - - yyval. setdef = SetInverse(yyvsp[0]. setdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 16: -#line 375 "./iid.y" -{ - /* Value of primitive is value of recorded set */ - - yyval. setdef = yyvsp[0]. setdef ; - ; - break;} -case 17: -#line 381 "./iid.y" -{ - /* Value of primitive is obtained by running an lid query */ - - yyval. setdef = RunProg(LidCommand, yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 18: -#line 390 "./iid.y" -{ - /* Value of primitive is obtained by running an aid query */ - - yyval. setdef = RunProg("aid -kmn", yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 19: -#line 399 "./iid.y" -{ - /* Match names from database against pattern */ - yyval. setdef = RunProg("pid -kmn", yyvsp[0]. listdef ) ; - if (VerboseQuery) { - OneDescription(yyval. setdef ) ; - } - ; - break;} -case 20: -#line 407 "./iid.y" -{ - /* value of primitive is value of query */ - - yyval. setdef = yyvsp[-1]. setdef ; - ; - break;} -case 21: -#line 416 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - LidCommand = DefaultCommand ; - ; - break;} -case 22: -#line 424 "./iid.y" -{ - /* arg list is Id_list */ - - yyval. listdef = yyvsp[0]. listdef ; - LidCommand = "lid -kmn" ; - ; - break;} -case 23: -#line 434 "./iid.y" -{ - /* arg list is Id_list */ - - yyval. listdef = yyvsp[0]. listdef ; - ; - break;} -case 24: -#line 443 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 25: -#line 450 "./iid.y" -{ - /* make arg list holding names from set */ - - yyval. listdef = InitList() ; - yyval. listdef = SetList(yyval. listdef , yyvsp[0]. setdef ) ; - ; - break;} -case 26: -#line 457 "./iid.y" -{ - /* extend arg list with additional ID */ - - yyval. listdef = ExtendList(yyvsp[-1]. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 27: -#line 463 "./iid.y" -{ - /* extend arg list with additional file names */ - - yyval. listdef = SetList(yyvsp[-1]. listdef , yyvsp[0]. setdef ) ; - ; - break;} -case 28: -#line 472 "./iid.y" -{ - /* make arg list holding single ID */ - - yyval. listdef = InitList() ; - yyval. listdef = ExtendList(yyval. listdef , yyvsp[0]. strdef ) ; - ; - break;} -case 29: -#line 479 "./iid.y" -{ - /* extend arg list with additional ID */ - - yyval. listdef = ExtendList(yyvsp[-1]. listdef , yyvsp[0]. strdef ) ; - ; - break;} -} - /* the action file gets copied in in place of this dollarsign */ -#line 557 "/usr/lib/bison.simple" - - yyvsp -= yylen; - yyssp -= yylen; -#ifdef YYLSP_NEEDED - yylsp -= yylen; -#endif - -#if YYDEBUG != 0 - if (yydebug >= 3) - { - short *ssp1 = yyss - 1; -#ifndef __cplusplus - fprintf (stderr, "state stack now"); -#else /* __cplusplus */ - clog << "state stack now"; -#endif /* __cplusplus */ - while (ssp1 != yyssp) -#ifndef __cplusplus - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); -#else /* __cplusplus */ - clog << ' ' << *++ssp1; - clog << endl; -#endif /* __cplusplus */ - } -#endif - - *++yyvsp = yyval; - -#ifdef YYLSP_NEEDED - yylsp++; - if (yylen == 0) - { - yylsp->first_line = yylloc.first_line; - yylsp->first_column = yylloc.first_column; - yylsp->last_line = (yylsp-1)->last_line; - yylsp->last_column = (yylsp-1)->last_column; - yylsp->text = 0; - } - else - { - yylsp->last_line = (yylsp+yylen-1)->last_line; - yylsp->last_column = (yylsp+yylen-1)->last_column; - } -#endif - - /* Now "shift" the result of the reduction. - Determine what state that goes to, - based on the state we popped back to - and the rule number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTBASE] + *yyssp; - if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTBASE]; - - goto yynewstate; - -yyerrlab: /* here on detecting error */ - - if (! yyerrstatus) - /* If not already recovering from an error, report this error. */ - { - ++yynerrs; - -#ifdef YYERROR_VERBOSE - yyn = yypact[yystate]; - - if (yyn > YYFLAG && yyn < YYLAST) - { - int size = 0; - char *msg; - int x, count; - - count = 0; - /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - size += strlen(yytname[x]) + 15, count++; - msg = (char *) malloc(size + 15); - if (msg != 0) - { - strcpy(msg, "parse error"); - - if (count < 5) - { - count = 0; - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - { - strcat(msg, count == 0 ? ", expecting `" : " or `"); - strcat(msg, yytname[x]); - strcat(msg, "'"); - count++; - } - } - yyerror(msg); - free(msg); - } - else - yyerror ("parse error; also virtual memory exceeded"); - } - else -#endif /* YYERROR_VERBOSE */ - yyerror("parse error"); - } - - goto yyerrlab1; -yyerrlab1: /* here on error raised explicitly by an action */ - - if (yyerrstatus == 3) - { - /* if just tried and failed to reuse lookahead token after an error, discard it. */ - - /* return failure if at end of input */ - if (yychar == YYEOF) - YYABORT; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); -#else /* __cplusplus */ - clog << "Discarding token " << yychar << " (" << yytname[yychar1] << ")." << endl; -#endif /* __cplusplus */ -#endif - - yychar = YYEMPTY; - } - - /* Else will try to reuse lookahead token - after shifting the error token. */ - - yyerrstatus = 3; /* Each real token shifted decrements this */ - - goto yyerrhandle; - -yyerrdefault: /* current state does not do anything special for the error token. */ - -#if 0 - /* This is wrong; only states that explicitly want error tokens - should shift them. */ - yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ - if (yyn) goto yydefault; -#endif - -yyerrpop: /* pop the current state because it cannot handle the error token */ - - if (yyssp == yyss) YYABORT; - yyvsp--; - yystate = *--yyssp; -#ifdef YYLSP_NEEDED - yylsp--; -#endif - -#if YYDEBUG != 0 - if (yydebug) - { - short *ssp1 = yyss - 1; -#ifndef __cplusplus - fprintf (stderr, "Error: state stack now"); -#else /* __cplusplus */ - clog << "Error: state stack now"; -#endif /* __cplusplus */ - while (ssp1 != yyssp) -#ifndef __cplusplus - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); -#else /* __cplusplus */ - clog << ' ' << *++ssp1; - clog << endl; -#endif /* __cplusplus */ - } -#endif - -yyerrhandle: - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yyerrdefault; - - yyn += YYTERROR; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) - goto yyerrdefault; - - yyn = yytable[yyn]; - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrpop; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrpop; - - if (yyn == YYFINAL) - YYACCEPT; - -#if YYDEBUG != 0 - if (yydebug) -#ifndef __cplusplus - fprintf(stderr, "Shifting error token, "); -#else /* __cplusplus */ - clog << "Shifting error token, "; -#endif /* __cplusplus */ -#endif - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - yystate = yyn; - goto yynewstate; -} -#line 486 "./iid.y" - - -/* ScanLine - a global variable holding a pointer to the current - * command being scanned. - */ -char * ScanLine ; - -/* ScanPtr - a global pointer to the current scan position in ScanLine. - */ -char * ScanPtr ; - -/* yytext - buffer holding the token. - */ -char yytext [ MAXCMD ] ; - -/* yyerror - process syntax errors. - */ -int -yyerror( char const * s ) -{ - if (*ScanPtr == '\0') { - fprintf(stderr,"Syntax error near end of command.\n") ; - } else { - fprintf(stderr,"Syntax error on or before %s\n",ScanPtr) ; - } - return(0) ; -} - -/* ScanInit - initialize the yylex routine for the new line of input. - * Basically just initializes the global variables that hold the char - * ptrs the scanner uses. - */ -void -ScanInit( char * line ) -{ - /* skip the leading white space - the yylex routine is sensitive - * to keywords in the first position on the command line. - */ - - while (isspace(*line)) ++line ; - ScanLine = line ; - ScanPtr = line ; -} - -/* yylex - the scanner for iid. Basically a kludge ad-hoc piece of junk, - * but what the heck, if it works... - * - * Mostly just scans for non white space strings and returns ID for them. - * Does check especially for '(' and ')'. Just before returning ID it - * checks for command names if it is the first token on line or - * AND, OR, LID, AID if it is in the middle of a line. - */ -int -yylex( void ) -{ - char * bp ; - char c ; - int code = ID ; - char * dp ; - char * sp ; - int val ; - - bp = ScanPtr ; - while (isspace(*bp)) ++bp ; - sp = bp ; - c = *sp++ ; - if ((c == '(') || (c == ')') || (c == '\0')) { - ScanPtr = sp ; - if (c == '\0') { - --ScanPtr ; - } - return(c) ; - } else { - dp = yytext ; - while (! ((c == '(') || (c == ')') || (c == '\0') || isspace(c))) { - *dp++ = c ; - c = *sp++ ; - } - *dp++ = '\0' ; - ScanPtr = sp - 1 ; - if (bp == ScanLine) { - - /* first token on line, check for command names */ - - if (strcaseequ(yytext, "SS")) return(SS) ; - if (strcaseequ(yytext, "FILES")) return(FILES) ; - if (strcaseequ(yytext, "F")) return(FILES) ; - if (strcaseequ(yytext, "HELP")) return(HELP) ; - if (strcaseequ(yytext, "H")) return(HELP) ; - if (strcaseequ(yytext, "?")) return(HELP) ; - if (strcaseequ(yytext, "BEGIN")) return(BEGIN) ; - if (strcaseequ(yytext, "B")) return(BEGIN) ; - if (strcaseequ(yytext, "SETS")) return(SETS) ; - if (strcaseequ(yytext, "SHOW")) return(SHOW) ; - if (strcaseequ(yytext, "P")) return(SHOW) ; - if (strcaseequ(yytext, "OFF")) return(OFF) ; - if (strcaseequ(yytext, "Q")) return(OFF) ; - if (strcaseequ(yytext, "QUIT")) return(OFF) ; - if (yytext[0] == '!') { - code = SHELL_COMMAND ; - } else { - code = SHELL_QUERY ; - } - } else { - - /* not first token, check for operator names */ - - if (strcaseequ(yytext, "LID")) return(LID) ; - if (strcaseequ(yytext, "AID")) return(AID) ; - if (strcaseequ(yytext, "AND")) return(AND) ; - if (strcaseequ(yytext, "OR")) return(OR) ; - if (strcaseequ(yytext, "NOT")) return(NOT) ; - if (strcaseequ(yytext, "MATCH")) return(MATCH) ; - if ((yytext[0] == 's' || yytext[0] == 'S') && isdigit(yytext[1])) { - - /* this might be a set specification */ - - sp = &yytext[1] ; - val = 0 ; - for ( ; ; ) { - c = *sp++ ; - if (c == '\0') { - if (val < NextSetNum) { - yylval.setdef = TheSets[val] ; - return(SET) ; - } - } - if (isdigit(c)) { - val = (val * 10) + (c - '0') ; - } else { - break ; - } - } - } - } - yylval.strdef = (id_type *)malloc(sizeof(id_type) + strlen(yytext)) ; - if (yylval.strdef == NULL) { - fatal("Out of memory in yylex") ; - } - yylval.strdef->next_id = NULL ; - if (code == SHELL_COMMAND) { - strcpy(yylval.strdef->id, &yytext[1]) ; - } else { - strcpy(yylval.strdef->id, yytext) ; - } - return(code) ; - } -} - -/* The main program for iid - parse the command line, initialize processing, - * loop processing one command at a time. - */ -int -main( int argc , char * argv [ ] ) -{ - int c ; /* current option */ - char * CmdPtr = NULL ; /* Points to the command string */ - char Command [ MAXCMD ] ; /* Buffer for reading commands */ - int DoPrompt ; /* 1 if should write a prompt */ - int errors = 0 ; /* error count */ - - program_name = argv[0]; - DoPrompt = isatty(fileno(stdin)) ; - while ((c = getopt(argc, argv, "Hac:")) != EOF) { - switch(c) { - case 'a': - DefaultCommand = "aid -kmn" ; - break ; - case 'c': - CmdPtr = optarg ; - break ; - case 'H': - fputs("\ -iid: interactive ID database query tool. Call with:\n\ - iid [-a] [-c] [-H]\n\ -\n\ --a\tUse the aid as the default query command (not lid).\n\ --c cmd\tExecute the single query cmd and exit.\n\ --H\tPrint this message and exit.\n\ -\n\ -To get help after starting program type 'help'.\n\ -",stderr) ; - exit(0) ; - default: - ++errors ; - break ; - } - } - if (argc != optind) { - fputs("iid: Excess arguments ignored.\n",stderr) ; - ++errors ; - } - if (errors) { - fputs("run iid -H for help.\n",stderr) ; - exit(1) ; - } - - /* initialize global data */ - - InitIid() ; - - /* run the parser */ - - if (CmdPtr) { - ScanInit(CmdPtr) ; - exit(yyparse()) ; - } else { - for ( ; ; ) { - if (DoPrompt) { - fputs(Prompt, stdout) ; - fflush(stdout) ; - } - gets(Command) ; - if (feof(stdin)) { - if (DoPrompt) fputs("\n", stdout) ; - strcpy(Command, "off") ; - } - ScanInit(Command) ; - errors += yyparse() ; - } - } -} - - -/* ArgListSize - count the size of an arg list so can alloca() enough - * space for the command. - */ -int -ArgListSize( id_list_type * idlp ) -{ - id_type * idep ; - int size = 0; - - idep = idlp->id_list ; - while (idep != NULL) { - size += 1 + strlen(idep->id); - idep = idep->next_id; - } - return size; -} - -/* SetListSize - count the size of a string build up from a set so we can - * alloca() enough space for args. - */ -int -SetListSize( set_type * sp ) -{ - int i ; - int size = 0 ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - size += 1 + strlen(FileList[i]->name); - } - } - } - return size; -} - -/* FlushFiles - clear out the TheFiles array for the start of a new - * query. - */ -void -FlushFiles( void ) -{ - int i ; - - if (TheFiles != NULL) { - for (i = 0; i <= MaxCurFile; ++i) { - TheFiles[i] = 0 ; - } - } - MaxCurFile = 0 ; -} - -/* fatal - sometimes the only thing to do is die... - */ -void -fatal( char const * s ) -{ - fprintf(stderr,"Fatal error: %s\n", s) ; - exit(1) ; -} - -/* CountBits - count the number of bits in a bit set. Actually fairly - * tricky since it needs to deal with sets having infinite tails - * as a result of a NOT operation. - */ -int -CountBits( set_type * sp ) -{ - unsigned long bit_mask ; - int count = 0 ; - int i ; - - i = 0; - for ( ; ; ) { - for (bit_mask = high_bit; bit_mask != 0; bit_mask >>= 1) { - if (bit_mask == NextMaskBit && i == NextMaskWord) { - return(count) ; - } - if (i < sp->set_size) { - if (sp->set_data[i] & bit_mask) { - ++count ; - } - } else { - if (sp->set_tail == 0) return count; - if (sp->set_tail & bit_mask) { - ++count; - } - } - } - ++i; - } -} - -/* OneDescription - Print a description of a set. This includes - * the set number, the number of files in the set, and the - * set description string. - */ -void -OneDescription( set_type * sp ) -{ - int elt_count ; - char setnum[20] ; - - sprintf(setnum,"S%d",sp->set_num) ; - elt_count = CountBits(sp) ; - printf("%5s %6d %s\n",setnum,elt_count,sp->set_desc) ; -} - -/* DescribeSets - Print description of all the sets. - */ -void -DescribeSets( void ) -{ - int i ; - - if (NextSetNum > 0) { - for (i = 0; i < NextSetNum; ++i) { - OneDescription(TheSets[i]) ; - } - } else { - printf("No sets defined yet.\n") ; - } -} - -/* SetList - Go through the bit set and add the file names in - * it to an identifier list. - */ -id_list_type * -SetList( id_list_type * idlp , set_type * sp ) -{ - int i ; - id_type * idep ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - idep = (id_type *)malloc(sizeof(id_type) + - strlen(FileList[i]->name)) ; - if (idep == NULL) { - fatal("Out of memory in SetList") ; - } - idep->next_id = NULL ; - strcpy(idep->id, FileList[i]->name) ; - idlp = ExtendList(idlp, idep) ; - } - } - } - return(idlp) ; -} - -/* PrintSet - Go through the bit set and print the file names - * corresponding to all the set bits. - */ -void -PrintSet( set_type * sp ) -{ - int i ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - printf("%s\n",FileList[i]->name) ; - } - } - } -} - -/* Free up all space used by current set of sets and reset all - * set numbers. - */ -void -FlushSets( void ) -{ - int i ; - - for (i = 0; i < NextSetNum; ++i) { - free(TheSets[i]->set_desc) ; - free(TheSets[i]) ; - } - NextSetNum = 0 ; -} - -/* InitList - create an empty identifier list. - */ -id_list_type * -InitList( void ) -{ - id_list_type * idlp ; - - idlp = (id_list_type *)malloc(sizeof(id_list_type)) ; - if (idlp == NULL) { - fatal("Out of memory in InitList") ; - } - idlp->id_count = 0 ; - idlp->end_ptr_ptr = & (idlp->id_list) ; - idlp->id_list = NULL ; - return(idlp) ; -} - -/* ExtendList - add one identifier to an ID list. - */ -id_list_type * -ExtendList( id_list_type * idlp , id_type * idp ) -{ - *(idlp->end_ptr_ptr) = idp ; - idlp->end_ptr_ptr = &(idp->next_id) ; - return(idlp) ; -} - -/* InitIid - do all initial processing for iid. - * 1) Determine the size of a unsigned long for bit set stuff. - * 2) Find out the name of the pager program to use. - * 3) Create the HelpSet (pointing to the help file). - * 4) Setup the prompt. - */ -void -InitIid( void ) -{ - unsigned long bit_mask = 1 ; /* find number of bits in long */ - int i ; - char const * page ; /* pager program */ - - do { - high_bit = bit_mask ; - bit_mask <<= 1 ; - } while (bit_mask != 0) ; - - NextMaskBit = high_bit ; - - page = getenv("PAGER") ; - if (page == NULL) { - page = PAGER ; - } - strcpy(Pager, page) ; - - FlushFiles() ; - InstallFile(IID_HELP_FILE) ; - HelpSet = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (HelpSet == NULL) { - fatal("No memory for set in InitIid") ; - } - HelpSet->set_tail = 0 ; - HelpSet->set_desc = NULL ; - HelpSet->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - HelpSet->set_data[i] = TheFiles[i] ; - } - - page = getenv("PS1") ; - if (page == NULL) { - page = PROMPT ; - } - strcpy(Prompt, page) ; -} - -/* InstallFile - install a file name in the symtab. Return the - * symbol table pointer of the file. - */ -symtab_type * -InstallFile( char const * fp ) -{ - char c ; - unsigned long hash_code ; - int i ; - char const * sp ; - symtab_type * symp ; - - hash_code = 0 ; - sp = fp ; - while ((c = *sp++) != '\0') { - hash_code <<= 1 ; - hash_code ^= (unsigned long)(c) ; - if (hash_code & high_bit) { - hash_code &= ~ high_bit ; - hash_code ^= 1 ; - } - } - hash_code %= HASH_SIZE ; - symp = HashTable[hash_code] ; - while (symp != NULL && strcmp(symp->name, fp)) { - symp = symp->hash_link ; - } - if (symp == NULL) { - symp = (symtab_type *)malloc(sizeof(symtab_type) + strlen(fp)) ; - if (symp == NULL) { - fatal("No memory for symbol table entry in InstallFile") ; - } - strcpy(symp->name, fp) ; - symp->hash_link = HashTable[hash_code] ; - HashTable[hash_code] = symp ; - if (NextMaskWord >= FileSpace) { - FileSpace += 1000 ; - if (TheFiles != NULL) { - TheFiles = (unsigned long *) - realloc(TheFiles, sizeof(unsigned long) * FileSpace) ; - } else { - TheFiles = (unsigned long *) - malloc(sizeof(unsigned long) * FileSpace) ; - } - if (TheFiles == NULL) { - fatal("No memory for TheFiles in InstallFile") ; - } - for (i = NextMaskWord; i < FileSpace; ++i) { - TheFiles[i] = 0 ; - } - } - symp->mask_word = NextMaskWord ; - symp->mask_bit = NextMaskBit ; - NextMaskBit >>= 1 ; - if (NextMaskBit == 0) { - NextMaskBit = high_bit ; - ++NextMaskWord ; - } - if (NextFileNum >= ListSpace) { - ListSpace += 1000 ; - if (FileList == NULL) { - FileList = (symtab_type **) - malloc(sizeof(symtab_type *) * ListSpace) ; - } else { - FileList = (symtab_type **) - realloc(FileList, ListSpace * sizeof(symtab_type *)) ; - } - if (FileList == NULL) { - fatal("No memory for FileList in InstallFile") ; - } - } - FileList[NextFileNum++] = symp ; - /* put code here to sort the file list by name someday */ - } - TheFiles[symp->mask_word] |= symp->mask_bit ; - if (symp->mask_word > MaxCurFile) { - MaxCurFile = symp->mask_word ; - } - return(symp) ; -} - -/* RunPager - run the users pager program on the list of files - * in the set. - */ -void -RunPager( char * pp , set_type * sp ) -{ - char * cmd ; - int i ; - - cmd = (char *)TEMP_ALLOC(SetListSize(sp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - strcat(cmd, " ") ; - strcat(cmd, FileList[i]->name) ; - } - } - } - system(cmd) ; - TEMP_FREE(cmd) ; -} - -/* AddSet - add a new set to the universal list of sets. Assign - * it the next set number. - */ -void -AddSet( set_type * sp ) -{ - if (NextSetNum >= SetSpace) { - SetSpace += 1000 ; - if (TheSets != NULL) { - TheSets = (set_type **) - realloc(TheSets, sizeof(set_type *) * SetSpace) ; - } else { - TheSets = (set_type **) - malloc(sizeof(set_type *) * SetSpace) ; - } - if (TheSets == NULL) { - fatal("No memory for TheSets in AddSet") ; - } - } - sp->set_num = NextSetNum ; - TheSets[NextSetNum++] = sp ; -} - -/* RunProg - run a program with arguments from id_list and - * accept list of file names back from the program which - * are installed in the symbol table and used to construct - * a new set. - */ -set_type * -RunProg( char const * pp , id_list_type * idlp ) -{ - int c ; - char * cmd ; - char * dp ; - struct obstack pipe_output_obstack; - int i ; - id_type * idep ; - id_type * next_id ; - FILE * prog ; - set_type * sp ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - FlushFiles() ; - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - - /* run program with popen, reading the output. Assume each - * white space terminated string is a file name. - */ - - prog = popen(cmd, "r") ; - obstack_init (&pipe_output_obstack); - - while (1) - { - c = getc (prog); - if (c == EOF || isspace (c)) - { - int n; - if ((n = obstack_object_size (&pipe_output_obstack)) > 0) - { - char *_file; - - obstack_1grow (&pipe_output_obstack, 0); - ++n; - _file = obstack_finish (&pipe_output_obstack); - InstallFile(_file) ; - if (n != strlen (_file) + 1) - abort (); - obstack_free (&pipe_output_obstack, _file); - } - if (c == EOF) - break; - } - else - { - obstack_1grow (&pipe_output_obstack, c); - } - } - obstack_free (&pipe_output_obstack, NULL); - - if (pclose(prog) != 0) { - /* if there was an error make an empty set, who knows what - * garbage the program printed. - */ - FlushFiles() ; - } - - sp = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (sp == NULL) { - fatal("No memory for set in RunProg") ; - } - sp->set_tail = 0 ; - sp->set_desc = (char *)malloc(strlen(cmd) + 1) ; - if (sp->set_desc == NULL) { - fatal("No memory for set description in RunProg") ; - } - strcpy(sp->set_desc, cmd) ; - sp->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - sp->set_data[i] = TheFiles[i] ; - } - AddSet(sp) ; - TEMP_FREE(cmd); - return(sp) ; -} - -/* SetDirectory - change the working directory. This will - * determine which ID file is found by the subprograms. - */ -void -SetDirectory( id_type * dir ) -{ - if (chdir(dir->id) != 0) { - fprintf(stderr,"Directory %s not accessible.\n", dir->id) ; - } - free(dir) ; -} - -/* SetIntersect - construct a new set from the intersection - * of two others. Also construct a new description string. - */ -set_type * -SetIntersect( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - if (sp1->set_tail || sp2->set_tail) { - new_size = MAX(sp1->set_size, sp2->set_size) ; - } else { - new_size = MIN(sp1->set_size, sp2->set_size) ; - } - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetIntersect") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 10) ; - if (desc == NULL) { - fatal("No memory for set description in SetIntersect") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") AND (") ; - desc += 7 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? sp1->set_data[i] : sp1->set_tail) & - ((i < sp2->set_size) ? sp2->set_data[i] : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail & sp2->set_tail ; - return(new_set) ; -} - -/* SetUnion - construct a new set from the union of two others. - * Also construct a new description string. - */ -set_type * -SetUnion( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - new_size = MAX(sp1->set_size, sp2->set_size) ; - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetUnion") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 9) ; - if (desc == NULL) { - fatal("No memory for set description in SetUnion") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") OR (") ; - desc += 6 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? (sp1->set_data[i]) : sp1->set_tail) | - ((i < sp2->set_size) ? (sp2->set_data[i]) : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail | sp2->set_tail ; - return(new_set) ; -} - -/* SetInverse - construct a new set from the inverse of another. - * Also construct a new description string. - * - * This is kind of tricky. An inverse set in iid may grow during - * the course of a session. By NOTing the set_tail extension the - * inverse at any given time will be defined as the inverse against - * a universe that grows as additional queries are made and new files - * are added to the database. - * - * Several alternative definitions were possible (snapshot the - * universe at the time of the NOT, go read the ID file to - * determine the complete universe), but this one was the one - * I picked. - */ -set_type * -SetInverse( set_type * sp ) -{ - char * desc ; - int i ; - set_type * new_set ; - - new_set = (set_type *)malloc(sizeof(set_type) + - (sp->set_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetInverse") ; - } - desc = (char *)malloc(strlen(sp->set_desc) + 5) ; - if (desc == NULL) { - fatal("No memory for set description in SetInverse") ; - } - new_set->set_desc = desc ; - strcpy(desc,"NOT ") ; - desc += 4 ; - strcpy(desc, sp->set_desc) ; - AddSet(new_set) ; - new_set->set_size = sp->set_size ; - for (i = 0; i < sp->set_size; ++i) { - new_set->set_data[i] = ~ sp->set_data[i] ; - } - new_set->set_tail = ~ sp->set_tail ; - return(new_set) ; -} - -/* RunShell - run a program with arguments from id_list. - */ -void -RunShell( char * pp , id_list_type * idlp ) -{ - char * cmd ; - id_type * idep ; - id_type * next_id ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - system(cmd) ; - TEMP_FREE(cmd); -} diff --git a/iid.help b/iid.help deleted file mode 100644 index 6ec102c..0000000 --- a/iid.help +++ /dev/null @@ -1,92 +0,0 @@ -The iid program is an interactive shell on top of the mkid, lid, aid -database programs. It allows interactive queries of an ID database in -a fashion similar to a DIALOG session. Iid remembers the sets of files -that were reported by any lid or aid request. These sets are refered -to by set numbers. The commands available are: - -BEGIN cd to directory (presumably containing an ID file). -B short for BEGIN -SS run query displaying the sets generated -FILES run query listing the files in the final set -F short for FILES -SHOW run pager program on files in set -P short for SHOW -SETS show currently defined sets -HELP run pager on this file -? or H short commands for HELP -OFF exit iid - run a shell command as a file name query -! run a shell command - -A is the letter 's' (or 'S') followed (with no space) by -a number. Set numbers may be used as terms in a query. - -A is: - - - lid - aid - match - or - and - -The words "lid", "aid", "match", "or", and "and" are keywords, along -with any word that looks like a set number. If you have to use one of -these (or in arguments to lid, aid or match, shell escape characters) -then quote the name. - -The "match" operator constructs a set of files by running the "pid" -program with the wild card pattern as an argument. This is the only -operator which constructs sets based on file names rather than -contents. - -An identifier by itself is simply shorthand for "lid identifier". (If -the -a option was used to invoke iid, then a simple identifier is -shorthand for "aid identifier"). - -Example run: - -===> iid -===> ss lid "^get" or lid "Arg$" - S0 14 lid -kmn "^get" - S1 3 lid -kmn "Arg$" - S2 15 (lid -kmn "^get") OR (lid -kmn "Arg$") -===> f s1 -lid.c -paths.c -init.c -===> ls *.c - S3 28 ls *.c -===> ls s* - S4 9 ls s* -===> ss s3 and s4 - S5 4 (ls *.c) AND (ls s*) -===> !grep vhil s5 -scan-c.c: setCArgs("vhil",'+',"v"); -scan-c.c: setCArgs("vhil",'+',"v"); -===> off - -In this example the 'ss' command displays the sets it creats as it -does the parts of the query. In this case 3 sets are created, set S0 -has 14 files in it, set S1 has 3 files and the union of the two sets, -S2, has 15 files. A description of the query that created any given -set is kept along with the set and displayed when sets are printed. - -The 'f s1' command says list the files in set S1, and the three files -in the set are displayed. - -The 'ls' commands are examples of using arbitrary shell commands to -generate lists of files. In this case the 'ls' command. (This could -have been done as part of another query using the 'match' operator). - -The '!grep vhil s5' command runs the 'grep' shell command passing as -arguments 'vhil' and the names of all the files in s5. - -The 'off' command terminated the example session. - -Keywords, commands, and set numbers are recognized regardless of case -(and is And is aNd). Other parameters are case sensitive. - -The iid program can also be run in a batch mode using the -c option. -For more information on command line options, run "iid -H", or use the -Unix 'man' command. diff --git a/iid.y b/iid.y deleted file mode 100644 index f73f4b3..0000000 --- a/iid.y +++ /dev/null @@ -1,1359 +0,0 @@ -%{ -/* iid.y -- interactive mkid query language - Copyright (C) 1991 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include -#include -#include - -#include -#include "strxtra.h" -#include "obstack.h" -#include "xmalloc.h" - -FILE *popen (); - -#define obstack_chunk_alloc xmalloc -#define obstack_chunk_free free - -#if HAVE_ALLOCA - -#if HAVE_ALLOCA_H -#include -#endif -#define TEMP_ALLOC(s) alloca(s) -#define TEMP_FREE(s) - -#else /* not HAVE_ALLOCA */ - -#define TEMP_ALLOC(s) malloc(s) -#define TEMP_FREE(s) free(s) - -#endif /* not HAVE_ALLOCA */ - -#define HASH_SIZE 947 /* size of hash table for file names */ -#define INIT_FILES 8000 /* start with bits for this many */ -#define INIT_SETSPACE 500 /* start with room for this many */ -#define MAXCMD 1024 /* input command buffer size */ - -#define MAX(a,b) (((a)<(b))?(b):(a)) -#define MIN(a,b) (((a)>(b))?(b):(a)) - -#ifndef PAGER -#define PAGER "pg" -#endif - -#define PROMPT "iid> " - -/* set_type is the struct defining a set of file names - * The file names are stored in a symbol table and assigned - * unique numbers. The set is a bit set of file numbers. - * One of these set structs is calloced for each new set - * constructed, the size allocated depends on the max file - * bit number. An array of pointers to sets are kept to - * represent the complete set of sets. - */ - -struct set_struct { - char * set_desc ; /* string describing the set */ - int set_num ; /* the set number */ - int set_size ; /* number of long words in set */ - unsigned long int set_tail ; /* set extended with these bits */ - unsigned long int set_data[1] ;/* the actual set data (calloced) */ -} ; -typedef struct set_struct set_type ; - -/* id_type is one element of an id_list - */ - -struct id_struct { - struct id_struct * next_id ; /* Linked list of IDs */ - char id [ 1 ] ; /* calloced data holding id string */ -} ; -typedef struct id_struct id_type ; - -/* id_list_type is used during parsing to build lists of - * identifiers that will eventually represent arguments - * to be passed to the database query programs. - */ - -struct id_list_struct { - int id_count ; /* count of IDs in the list */ - id_type * * end_ptr_ptr ;/* pointer to link word at end of list */ - id_type * id_list ; /* pointer to list of IDs */ -} ; -typedef struct id_list_struct id_list_type ; - -/* symtab_type is used to record file names in the symbol table. - */ -struct symtab_struct { - struct symtab_struct * hash_link ; /* list of files with same hash code */ - int mask_word ; /* word in bit vector */ - unsigned long mask_bit ; /* bit in word */ - char name [ 1 ] ; /* the file name */ -} ; -typedef struct symtab_struct symtab_type ; - -/* LidCommand is the command to run for a Lid_group. It is set - * to "lid -kmn" if explicitly preceeded by "lid", otherwise - * it is the default command which is determined by an option. - */ -char const * LidCommand ; - -/* DefaultCommand is the default command for a Lid_group. If - * the -a option is given to iid, it is set to use 'aid'. - */ -char const * DefaultCommand = "lid -kmn" ; - -/* FileList is a lexically ordered list of file symbol table - * pointers. It is dynamically expanded when necessary. - */ -symtab_type * * FileList = NULL ; - -/* FileSpace is the number of long ints in TheFiles array. - */ -int FileSpace = 0 ; - -/* HashTable is the symbol table used to store file names. Each - * new name installed is assigned the next consecutive file number. - */ -symtab_type * HashTable [ HASH_SIZE ] ; - -/* HelpSet is a dummy set containing only one bit set which corresponds - * to the help file name. Simply a cheesy way to maximize sharing of - * the code that runs the pager. - */ -set_type * HelpSet ; - -/* high_bit is a unsigned long with the most significant bit set. - */ -unsigned long high_bit ; - -/* ListSpace is the amount of space avail in the FileList. - */ -int ListSpace = 0 ; - -/* MaxCurFile - max word that has any bit currently set in the - * TheFiles array. - */ -int MaxCurFile = 0 ; - -/* NextFileNum is the file number that will be assigned to the next - * new file name seen when it is installed in the symtab. - */ -int NextFileNum = 0 ; - -/* NextMaskBit is the bit within the next mask word that will - * correspond to the next file added to the symbol table. - */ -unsigned long NextMaskBit ; - -/* NextMaskWord is the next word number to be assigned to a file - * bit mask entry. - */ -int NextMaskWord = 0 ; - -/* NextSetNum is the number that will be assigned to the next set - * created. Starts at 0 because I am a C programmer. - */ -int NextSetNum = 0 ; - -/* The PAGER program to run on a SHOW command. - */ -char Pager[MAXCMD] ; - -/* Prompt - the string to use for a prompt. - */ -char Prompt[MAXCMD] ; - -/* SetSpace is the number of pointers available in TheSets. TheSets - * is realloced when we run out of space. - */ -int SetSpace = 0 ; - -/* TheFiles is a bit set used to construct the initial set of files - * generated while running one of the subprograms. It is copied to - * the alloced set once we know how many bits are set. - */ -unsigned long * TheFiles = NULL ; - -/* TheSets is a dynamically allocated array of pointers pointing - * the sets that have been allocated. It represents the set of - * sets. - */ -set_type * * TheSets = NULL ; - -/* VerboseQuery controls the actions of the semantic routines during - * the process of a query. If TRUE the sets are described as they - * are constructed. - */ -int VerboseQuery ; - -char const *program_name ; - -int yyerror __P(( char const * s )) ; -void ScanInit __P(( char * line )) ; -int yylex __P(( void )) ; -int ArgListSize __P(( id_list_type * idlp )) ; -int SetListSize __P(( set_type * sp )) ; -void FlushFiles __P(( void )) ; -void fatal __P(( char const * s )) ; -int CountBits __P(( set_type * sp )) ; -void OneDescription __P(( set_type * sp )) ; -void DescribeSets __P(( void )) ; -id_list_type * SetList __P(( id_list_type * idlp , set_type * sp )) ; -void PrintSet __P(( set_type * sp )) ; -void FlushSets __P(( void )) ; -id_list_type * InitList __P(( void )) ; -id_list_type * ExtendList __P(( id_list_type * idlp , id_type * idp )) ; -void InitIid __P(( void )) ; -symtab_type * InstallFile __P(( char const * fp )) ; -void RunPager __P(( char * pp , set_type * sp )) ; -void AddSet __P(( set_type * sp )) ; -set_type * RunProg __P(( char const * pp , id_list_type * idlp )) ; -void SetDirectory __P(( id_type * dir )) ; -set_type * SetIntersect __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetUnion __P(( set_type * sp1 , set_type * sp2 )) ; -set_type * SetInverse __P(( set_type * sp )) ; -void RunShell __P(( char * pp , id_list_type * idlp )) ; - -%} - -%union { - set_type * setdef ; - id_type * strdef ; - id_list_type * listdef ; -} - -%token < setdef > SET - -%token < strdef > ID SHELL_QUERY SHELL_COMMAND - -%type < setdef > Query Primitive - -%type < listdef > Lid_group Aid_group Id_list Command_list - -%token LID AID BEGIN SETS SS FILES SHOW HELP OFF MATCH - -%left OR - -%left AND - -%left NOT - -%start Command - -%% - -Command : - BEGIN ID - { - /* cd to the directory specified as argument, flush sets */ - - SetDirectory($2) ; - FlushSets() ; - } -| Set_query Query -| File_query Query - { - /* print the list of files resulting from Query */ - - PrintSet($2) ; - } -| SHOW SET - { - /* run PAGER on the list of files in SET */ - - RunPager(Pager, $2) ; - } -| SETS - { - /* describe sets created so far */ - - DescribeSets() ; - } -| HELP - { - /* run PAGER on the help file */ - - RunPager(Pager, HelpSet) ; - } -| OFF - { - exit(0) ; - } -| SHELL_QUERY Command_list - { - /* run the shell command and eat the results as a file set */ - - OneDescription(RunProg($1->id, $2)) ; - free($1) ; - } -| SHELL_COMMAND Command_list - { - /* run the shell command */ - - RunShell($1->id, $2) ; - free($1) ; - } -; - -Set_query : - SS - { - /* Turn on verbose query flag */ - - VerboseQuery = 1 ; - } -; - -File_query : - FILES - { - /* Turn off verbose query flag */ - - VerboseQuery = 0 ; - } -; - -Query : - Primitive - { - /* value of query is set associated with primitive */ - - $$ = $1 ; - } -| Query AND Query - { - /* value of query is intersection of the two query sets */ - - $$ = SetIntersect($1, $3) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| Query OR Query - { - /* value of query is union of the two query sets */ - - $$ = SetUnion($1, $3) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| NOT Query - { - /* value of query is inverse of other query */ - - $$ = SetInverse($2) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -; - -Primitive : - SET - { - /* Value of primitive is value of recorded set */ - - $$ = $1 ; - } -| Lid_group - { - /* Value of primitive is obtained by running an lid query */ - - $$ = RunProg(LidCommand, $1) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| Aid_group - { - /* Value of primitive is obtained by running an aid query */ - - $$ = RunProg("aid -kmn", $1) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| MATCH Id_list - { - /* Match names from database against pattern */ - $$ = RunProg("pid -kmn", $2) ; - if (VerboseQuery) { - OneDescription($$) ; - } - } -| '(' Query ')' - { - /* value of primitive is value of query */ - - $$ = $2 ; - } -; - -Lid_group : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - LidCommand = DefaultCommand ; - } -| LID Id_list - { - /* arg list is Id_list */ - - $$ = $2 ; - LidCommand = "lid -kmn" ; - } -; - -Aid_group : - AID Id_list - { - /* arg list is Id_list */ - - $$ = $2 ; - } -; - -Command_list : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - } -| SET - { - /* make arg list holding names from set */ - - $$ = InitList() ; - $$ = SetList($$, $1) ; - } -| Command_list ID - { - /* extend arg list with additional ID */ - - $$ = ExtendList($1, $2) ; - } -| Command_list SET - { - /* extend arg list with additional file names */ - - $$ = SetList($1, $2) ; - } -; - -Id_list : - ID - { - /* make arg list holding single ID */ - - $$ = InitList() ; - $$ = ExtendList($$, $1) ; - } -| Id_list ID - { - /* extend arg list with additional ID */ - - $$ = ExtendList($1, $2) ; - } -; - -%% - -/* ScanLine - a global variable holding a pointer to the current - * command being scanned. - */ -char * ScanLine ; - -/* ScanPtr - a global pointer to the current scan position in ScanLine. - */ -char * ScanPtr ; - -/* yytext - buffer holding the token. - */ -char yytext [ MAXCMD ] ; - -/* yyerror - process syntax errors. - */ -int -yyerror( char const * s ) -{ - if (*ScanPtr == '\0') { - fprintf(stderr,"Syntax error near end of command.\n") ; - } else { - fprintf(stderr,"Syntax error on or before %s\n",ScanPtr) ; - } - return(0) ; -} - -/* ScanInit - initialize the yylex routine for the new line of input. - * Basically just initializes the global variables that hold the char - * ptrs the scanner uses. - */ -void -ScanInit( char * line ) -{ - /* skip the leading white space - the yylex routine is sensitive - * to keywords in the first position on the command line. - */ - - while (isspace(*line)) ++line ; - ScanLine = line ; - ScanPtr = line ; -} - -/* yylex - the scanner for iid. Basically a kludge ad-hoc piece of junk, - * but what the heck, if it works... - * - * Mostly just scans for non white space strings and returns ID for them. - * Does check especially for '(' and ')'. Just before returning ID it - * checks for command names if it is the first token on line or - * AND, OR, LID, AID if it is in the middle of a line. - */ -int -yylex( void ) -{ - char * bp ; - char c ; - int code = ID ; - char * dp ; - char * sp ; - int val ; - - bp = ScanPtr ; - while (isspace(*bp)) ++bp ; - sp = bp ; - c = *sp++ ; - if ((c == '(') || (c == ')') || (c == '\0')) { - ScanPtr = sp ; - if (c == '\0') { - --ScanPtr ; - } - return(c) ; - } else { - dp = yytext ; - while (! ((c == '(') || (c == ')') || (c == '\0') || isspace(c))) { - *dp++ = c ; - c = *sp++ ; - } - *dp++ = '\0' ; - ScanPtr = sp - 1 ; - if (bp == ScanLine) { - - /* first token on line, check for command names */ - - if (strcaseequ(yytext, "SS")) return(SS) ; - if (strcaseequ(yytext, "FILES")) return(FILES) ; - if (strcaseequ(yytext, "F")) return(FILES) ; - if (strcaseequ(yytext, "HELP")) return(HELP) ; - if (strcaseequ(yytext, "H")) return(HELP) ; - if (strcaseequ(yytext, "?")) return(HELP) ; - if (strcaseequ(yytext, "BEGIN")) return(BEGIN) ; - if (strcaseequ(yytext, "B")) return(BEGIN) ; - if (strcaseequ(yytext, "SETS")) return(SETS) ; - if (strcaseequ(yytext, "SHOW")) return(SHOW) ; - if (strcaseequ(yytext, "P")) return(SHOW) ; - if (strcaseequ(yytext, "OFF")) return(OFF) ; - if (strcaseequ(yytext, "Q")) return(OFF) ; - if (strcaseequ(yytext, "QUIT")) return(OFF) ; - if (yytext[0] == '!') { - code = SHELL_COMMAND ; - } else { - code = SHELL_QUERY ; - } - } else { - - /* not first token, check for operator names */ - - if (strcaseequ(yytext, "LID")) return(LID) ; - if (strcaseequ(yytext, "AID")) return(AID) ; - if (strcaseequ(yytext, "AND")) return(AND) ; - if (strcaseequ(yytext, "OR")) return(OR) ; - if (strcaseequ(yytext, "NOT")) return(NOT) ; - if (strcaseequ(yytext, "MATCH")) return(MATCH) ; - if ((yytext[0] == 's' || yytext[0] == 'S') && isdigit(yytext[1])) { - - /* this might be a set specification */ - - sp = &yytext[1] ; - val = 0 ; - for ( ; ; ) { - c = *sp++ ; - if (c == '\0') { - if (val < NextSetNum) { - yylval.setdef = TheSets[val] ; - return(SET) ; - } - } - if (isdigit(c)) { - val = (val * 10) + (c - '0') ; - } else { - break ; - } - } - } - } - yylval.strdef = (id_type *)malloc(sizeof(id_type) + strlen(yytext)) ; - if (yylval.strdef == NULL) { - fatal("Out of memory in yylex") ; - } - yylval.strdef->next_id = NULL ; - if (code == SHELL_COMMAND) { - strcpy(yylval.strdef->id, &yytext[1]) ; - } else { - strcpy(yylval.strdef->id, yytext) ; - } - return(code) ; - } -} - -/* The main program for iid - parse the command line, initialize processing, - * loop processing one command at a time. - */ -int -main( int argc , char * argv [ ] ) -{ - int c ; /* current option */ - char * CmdPtr = NULL ; /* Points to the command string */ - char Command [ MAXCMD ] ; /* Buffer for reading commands */ - int DoPrompt ; /* 1 if should write a prompt */ - int errors = 0 ; /* error count */ - - program_name = argv[0]; - DoPrompt = isatty(fileno(stdin)) ; - while ((c = getopt(argc, argv, "Hac:")) != EOF) { - switch(c) { - case 'a': - DefaultCommand = "aid -kmn" ; - break ; - case 'c': - CmdPtr = optarg ; - break ; - case 'H': - fputs("\ -iid: interactive ID database query tool. Call with:\n\ - iid [-a] [-c] [-H]\n\ -\n\ --a\tUse the aid as the default query command (not lid).\n\ --c cmd\tExecute the single query cmd and exit.\n\ --H\tPrint this message and exit.\n\ -\n\ -To get help after starting program type 'help'.\n\ -",stderr) ; - exit(0) ; - default: - ++errors ; - break ; - } - } - if (argc != optind) { - fputs("iid: Excess arguments ignored.\n",stderr) ; - ++errors ; - } - if (errors) { - fputs("run iid -H for help.\n",stderr) ; - exit(1) ; - } - - /* initialize global data */ - - InitIid() ; - - /* run the parser */ - - if (CmdPtr) { - ScanInit(CmdPtr) ; - exit(yyparse()) ; - } else { - for ( ; ; ) { - if (DoPrompt) { - fputs(Prompt, stdout) ; - fflush(stdout) ; - } - gets(Command) ; - if (feof(stdin)) { - if (DoPrompt) fputs("\n", stdout) ; - strcpy(Command, "off") ; - } - ScanInit(Command) ; - errors += yyparse() ; - } - } -} - - -/* ArgListSize - count the size of an arg list so can alloca() enough - * space for the command. - */ -int -ArgListSize( id_list_type * idlp ) -{ - id_type * idep ; - int size = 0; - - idep = idlp->id_list ; - while (idep != NULL) { - size += 1 + strlen(idep->id); - idep = idep->next_id; - } - return size; -} - -/* SetListSize - count the size of a string build up from a set so we can - * alloca() enough space for args. - */ -int -SetListSize( set_type * sp ) -{ - int i ; - int size = 0 ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - size += 1 + strlen(FileList[i]->name); - } - } - } - return size; -} - -/* FlushFiles - clear out the TheFiles array for the start of a new - * query. - */ -void -FlushFiles( void ) -{ - int i ; - - if (TheFiles != NULL) { - for (i = 0; i <= MaxCurFile; ++i) { - TheFiles[i] = 0 ; - } - } - MaxCurFile = 0 ; -} - -/* fatal - sometimes the only thing to do is die... - */ -void -fatal( char const * s ) -{ - fprintf(stderr,"Fatal error: %s\n", s) ; - exit(1) ; -} - -/* CountBits - count the number of bits in a bit set. Actually fairly - * tricky since it needs to deal with sets having infinite tails - * as a result of a NOT operation. - */ -int -CountBits( set_type * sp ) -{ - unsigned long bit_mask ; - int count = 0 ; - int i ; - - i = 0; - for ( ; ; ) { - for (bit_mask = high_bit; bit_mask != 0; bit_mask >>= 1) { - if (bit_mask == NextMaskBit && i == NextMaskWord) { - return(count) ; - } - if (i < sp->set_size) { - if (sp->set_data[i] & bit_mask) { - ++count ; - } - } else { - if (sp->set_tail == 0) return count; - if (sp->set_tail & bit_mask) { - ++count; - } - } - } - ++i; - } -} - -/* OneDescription - Print a description of a set. This includes - * the set number, the number of files in the set, and the - * set description string. - */ -void -OneDescription( set_type * sp ) -{ - int elt_count ; - char setnum[20] ; - - sprintf(setnum,"S%d",sp->set_num) ; - elt_count = CountBits(sp) ; - printf("%5s %6d %s\n",setnum,elt_count,sp->set_desc) ; -} - -/* DescribeSets - Print description of all the sets. - */ -void -DescribeSets( void ) -{ - int i ; - - if (NextSetNum > 0) { - for (i = 0; i < NextSetNum; ++i) { - OneDescription(TheSets[i]) ; - } - } else { - printf("No sets defined yet.\n") ; - } -} - -/* SetList - Go through the bit set and add the file names in - * it to an identifier list. - */ -id_list_type * -SetList( id_list_type * idlp , set_type * sp ) -{ - int i ; - id_type * idep ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - idep = (id_type *)malloc(sizeof(id_type) + - strlen(FileList[i]->name)) ; - if (idep == NULL) { - fatal("Out of memory in SetList") ; - } - idep->next_id = NULL ; - strcpy(idep->id, FileList[i]->name) ; - idlp = ExtendList(idlp, idep) ; - } - } - } - return(idlp) ; -} - -/* PrintSet - Go through the bit set and print the file names - * corresponding to all the set bits. - */ -void -PrintSet( set_type * sp ) -{ - int i ; - - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - printf("%s\n",FileList[i]->name) ; - } - } - } -} - -/* Free up all space used by current set of sets and reset all - * set numbers. - */ -void -FlushSets( void ) -{ - int i ; - - for (i = 0; i < NextSetNum; ++i) { - free(TheSets[i]->set_desc) ; - free(TheSets[i]) ; - } - NextSetNum = 0 ; -} - -/* InitList - create an empty identifier list. - */ -id_list_type * -InitList( void ) -{ - id_list_type * idlp ; - - idlp = (id_list_type *)malloc(sizeof(id_list_type)) ; - if (idlp == NULL) { - fatal("Out of memory in InitList") ; - } - idlp->id_count = 0 ; - idlp->end_ptr_ptr = & (idlp->id_list) ; - idlp->id_list = NULL ; - return(idlp) ; -} - -/* ExtendList - add one identifier to an ID list. - */ -id_list_type * -ExtendList( id_list_type * idlp , id_type * idp ) -{ - *(idlp->end_ptr_ptr) = idp ; - idlp->end_ptr_ptr = &(idp->next_id) ; - return(idlp) ; -} - -/* InitIid - do all initial processing for iid. - * 1) Determine the size of a unsigned long for bit set stuff. - * 2) Find out the name of the pager program to use. - * 3) Create the HelpSet (pointing to the help file). - * 4) Setup the prompt. - */ -void -InitIid( void ) -{ - unsigned long bit_mask = 1 ; /* find number of bits in long */ - int i ; - char const * page ; /* pager program */ - - do { - high_bit = bit_mask ; - bit_mask <<= 1 ; - } while (bit_mask != 0) ; - - NextMaskBit = high_bit ; - - page = getenv("PAGER") ; - if (page == NULL) { - page = PAGER ; - } - strcpy(Pager, page) ; - - FlushFiles() ; - InstallFile(IID_HELP_FILE) ; - HelpSet = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (HelpSet == NULL) { - fatal("No memory for set in InitIid") ; - } - HelpSet->set_tail = 0 ; - HelpSet->set_desc = NULL ; - HelpSet->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - HelpSet->set_data[i] = TheFiles[i] ; - } - - page = getenv("PS1") ; - if (page == NULL) { - page = PROMPT ; - } - strcpy(Prompt, page) ; -} - -/* InstallFile - install a file name in the symtab. Return the - * symbol table pointer of the file. - */ -symtab_type * -InstallFile( char const * fp ) -{ - char c ; - unsigned long hash_code ; - int i ; - char const * sp ; - symtab_type * symp ; - - hash_code = 0 ; - sp = fp ; - while ((c = *sp++) != '\0') { - hash_code <<= 1 ; - hash_code ^= (unsigned long)(c) ; - if (hash_code & high_bit) { - hash_code &= ~ high_bit ; - hash_code ^= 1 ; - } - } - hash_code %= HASH_SIZE ; - symp = HashTable[hash_code] ; - while (symp != NULL && strcmp(symp->name, fp)) { - symp = symp->hash_link ; - } - if (symp == NULL) { - symp = (symtab_type *)malloc(sizeof(symtab_type) + strlen(fp)) ; - if (symp == NULL) { - fatal("No memory for symbol table entry in InstallFile") ; - } - strcpy(symp->name, fp) ; - symp->hash_link = HashTable[hash_code] ; - HashTable[hash_code] = symp ; - if (NextMaskWord >= FileSpace) { - FileSpace += 1000 ; - if (TheFiles != NULL) { - TheFiles = (unsigned long *) - realloc(TheFiles, sizeof(unsigned long) * FileSpace) ; - } else { - TheFiles = (unsigned long *) - malloc(sizeof(unsigned long) * FileSpace) ; - } - if (TheFiles == NULL) { - fatal("No memory for TheFiles in InstallFile") ; - } - for (i = NextMaskWord; i < FileSpace; ++i) { - TheFiles[i] = 0 ; - } - } - symp->mask_word = NextMaskWord ; - symp->mask_bit = NextMaskBit ; - NextMaskBit >>= 1 ; - if (NextMaskBit == 0) { - NextMaskBit = high_bit ; - ++NextMaskWord ; - } - if (NextFileNum >= ListSpace) { - ListSpace += 1000 ; - if (FileList == NULL) { - FileList = (symtab_type **) - malloc(sizeof(symtab_type *) * ListSpace) ; - } else { - FileList = (symtab_type **) - realloc(FileList, ListSpace * sizeof(symtab_type *)) ; - } - if (FileList == NULL) { - fatal("No memory for FileList in InstallFile") ; - } - } - FileList[NextFileNum++] = symp ; - /* put code here to sort the file list by name someday */ - } - TheFiles[symp->mask_word] |= symp->mask_bit ; - if (symp->mask_word > MaxCurFile) { - MaxCurFile = symp->mask_word ; - } - return(symp) ; -} - -/* RunPager - run the users pager program on the list of files - * in the set. - */ -void -RunPager( char * pp , set_type * sp ) -{ - char * cmd ; - int i ; - - cmd = (char *)TEMP_ALLOC(SetListSize(sp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - for (i = 0; i < NextFileNum; ++i) { - if (FileList[i]->mask_word < sp->set_size) { - if (sp->set_data[FileList[i]->mask_word] & FileList[i]->mask_bit) { - strcat(cmd, " ") ; - strcat(cmd, FileList[i]->name) ; - } - } - } - system(cmd) ; - TEMP_FREE(cmd) ; -} - -/* AddSet - add a new set to the universal list of sets. Assign - * it the next set number. - */ -void -AddSet( set_type * sp ) -{ - if (NextSetNum >= SetSpace) { - SetSpace += 1000 ; - if (TheSets != NULL) { - TheSets = (set_type **) - realloc(TheSets, sizeof(set_type *) * SetSpace) ; - } else { - TheSets = (set_type **) - malloc(sizeof(set_type *) * SetSpace) ; - } - if (TheSets == NULL) { - fatal("No memory for TheSets in AddSet") ; - } - } - sp->set_num = NextSetNum ; - TheSets[NextSetNum++] = sp ; -} - -/* RunProg - run a program with arguments from id_list and - * accept list of file names back from the program which - * are installed in the symbol table and used to construct - * a new set. - */ -set_type * -RunProg( char const * pp , id_list_type * idlp ) -{ - int c ; - char * cmd ; - char * dp ; - struct obstack pipe_output_obstack; - int i ; - id_type * idep ; - id_type * next_id ; - FILE * prog ; - set_type * sp ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - FlushFiles() ; - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - - /* run program with popen, reading the output. Assume each - * white space terminated string is a file name. - */ - - prog = popen(cmd, "r") ; - obstack_init (&pipe_output_obstack); - - while (1) - { - c = getc (prog); - if (c == EOF || isspace (c)) - { - int n; - if ((n = obstack_object_size (&pipe_output_obstack)) > 0) - { - char *_file; - - obstack_1grow (&pipe_output_obstack, 0); - ++n; - _file = obstack_finish (&pipe_output_obstack); - InstallFile(_file) ; - if (n != strlen (_file) + 1) - abort (); - obstack_free (&pipe_output_obstack, _file); - } - if (c == EOF) - break; - } - else - { - obstack_1grow (&pipe_output_obstack, c); - } - } - obstack_free (&pipe_output_obstack, NULL); - - if (pclose(prog) != 0) { - /* if there was an error make an empty set, who knows what - * garbage the program printed. - */ - FlushFiles() ; - } - - sp = (set_type *) - malloc(sizeof(set_type) + sizeof(unsigned long) * MaxCurFile) ; - if (sp == NULL) { - fatal("No memory for set in RunProg") ; - } - sp->set_tail = 0 ; - sp->set_desc = (char *)malloc(strlen(cmd) + 1) ; - if (sp->set_desc == NULL) { - fatal("No memory for set description in RunProg") ; - } - strcpy(sp->set_desc, cmd) ; - sp->set_size = MaxCurFile + 1 ; - for (i = 0; i <= MaxCurFile; ++i) { - sp->set_data[i] = TheFiles[i] ; - } - AddSet(sp) ; - TEMP_FREE(cmd); - return(sp) ; -} - -/* SetDirectory - change the working directory. This will - * determine which ID file is found by the subprograms. - */ -void -SetDirectory( id_type * dir ) -{ - if (chdir(dir->id) != 0) { - fprintf(stderr,"Directory %s not accessible.\n", dir->id) ; - } - free(dir) ; -} - -/* SetIntersect - construct a new set from the intersection - * of two others. Also construct a new description string. - */ -set_type * -SetIntersect( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - if (sp1->set_tail || sp2->set_tail) { - new_size = MAX(sp1->set_size, sp2->set_size) ; - } else { - new_size = MIN(sp1->set_size, sp2->set_size) ; - } - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetIntersect") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 10) ; - if (desc == NULL) { - fatal("No memory for set description in SetIntersect") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") AND (") ; - desc += 7 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? sp1->set_data[i] : sp1->set_tail) & - ((i < sp2->set_size) ? sp2->set_data[i] : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail & sp2->set_tail ; - return(new_set) ; -} - -/* SetUnion - construct a new set from the union of two others. - * Also construct a new description string. - */ -set_type * -SetUnion( set_type * sp1 , set_type * sp2 ) -{ - char * desc ; - int i ; - int len1 ; - int len2 ; - set_type * new_set ; - int new_size ; - - new_size = MAX(sp1->set_size, sp2->set_size) ; - new_set = (set_type *)malloc(sizeof(set_type) + - (new_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetUnion") ; - } - len1 = strlen(sp1->set_desc) ; - len2 = strlen(sp2->set_desc) ; - desc = (char *)malloc(len1 + len2 + 9) ; - if (desc == NULL) { - fatal("No memory for set description in SetUnion") ; - } - new_set->set_desc = desc ; - strcpy(desc,"(") ; - ++desc ; - strcpy(desc, sp1->set_desc) ; - desc += len1 ; - strcpy(desc, ") OR (") ; - desc += 6 ; - strcpy(desc, sp2->set_desc) ; - desc += len2 ; - strcpy(desc, ")") ; - AddSet(new_set) ; - new_set->set_size = new_size ; - for (i = 0; i < new_size; ++i) { - new_set->set_data[i] = - ((i < sp1->set_size) ? (sp1->set_data[i]) : sp1->set_tail) | - ((i < sp2->set_size) ? (sp2->set_data[i]) : sp2->set_tail) ; - } - new_set->set_tail = sp1->set_tail | sp2->set_tail ; - return(new_set) ; -} - -/* SetInverse - construct a new set from the inverse of another. - * Also construct a new description string. - * - * This is kind of tricky. An inverse set in iid may grow during - * the course of a session. By NOTing the set_tail extension the - * inverse at any given time will be defined as the inverse against - * a universe that grows as additional queries are made and new files - * are added to the database. - * - * Several alternative definitions were possible (snapshot the - * universe at the time of the NOT, go read the ID file to - * determine the complete universe), but this one was the one - * I picked. - */ -set_type * -SetInverse( set_type * sp ) -{ - char * desc ; - int i ; - set_type * new_set ; - - new_set = (set_type *)malloc(sizeof(set_type) + - (sp->set_size - 1) * sizeof(unsigned long)) ; - if (new_set == NULL) { - fatal("No memory for set in SetInverse") ; - } - desc = (char *)malloc(strlen(sp->set_desc) + 5) ; - if (desc == NULL) { - fatal("No memory for set description in SetInverse") ; - } - new_set->set_desc = desc ; - strcpy(desc,"NOT ") ; - desc += 4 ; - strcpy(desc, sp->set_desc) ; - AddSet(new_set) ; - new_set->set_size = sp->set_size ; - for (i = 0; i < sp->set_size; ++i) { - new_set->set_data[i] = ~ sp->set_data[i] ; - } - new_set->set_tail = ~ sp->set_tail ; - return(new_set) ; -} - -/* RunShell - run a program with arguments from id_list. - */ -void -RunShell( char * pp , id_list_type * idlp ) -{ - char * cmd ; - id_type * idep ; - id_type * next_id ; - - cmd = (char *)TEMP_ALLOC(ArgListSize(idlp) + strlen(pp) + 2); - strcpy(cmd, pp) ; - idep = idlp->id_list ; - while (idep != NULL) { - strcat(cmd, " ") ; - strcat(cmd, idep->id) ; - next_id = idep->next_id ; - free(idep) ; - idep = next_id ; - } - free(idlp) ; - system(cmd) ; - TEMP_FREE(cmd); -} diff --git a/lid.1 b/lid.1 deleted file mode 100644 index ce98212..0000000 --- a/lid.1 +++ /dev/null @@ -1,211 +0,0 @@ -.TH LID 1 -.SH NAME -lid, gid, eid, aid, pid \- query id database -.SH SYNOPSIS -.B lid -.RB [ \-f \^file] -.RB [ \-u \^n] -.RB [ \-r \^dir] -.RB [ \-edoxamseknc] -patterns... -.PP -.B gid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-edoxamsec] -patterns... -.PP -.B eid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-doxamsec] -patterns... -.PP -.B aid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-doxamsc] -patterns... -.PP -.B pid -.RB [ \-f \^file] -.RB [ \-r \^dir] -.RB [ \-ekncb] -patterns... -.SH DESCRIPTION -These commands provide a flexible query interface to the -.I id -database. -.I Lid\^ -does a lookup on -.IR patters -and prints out lines in this way: -.PP -.nf -idname ../hdir/hfile.h ../cdir/{cfile1,cfile2}.c -.fi -.PP -Notice that multiple files with the same directory prefix -and suffix are concatenated in the globbing-set-notation of -.IR csh (1). -Also notice that all of the -.I id -database query commands adjust the list of pathnames to be relative -to your current working directory, provided that -.IR mkid (1) -was used to build the database, and your working directory -is located within the sub-tree covered by the -.I id -database. -.PP -If multiple names match on pattern, then there will be one line -of output per name. The mnemonic significance of the name is -\fI\|l(ookup) id\fP. -.PP -.I Gid -does a lookup and then searches for the names it matches in the -files where they occur. The mnemonic for this name is -\fI\|g(rep)id\fP. -.PP -.I Eid -does a lookup, and then invokes an editor on all files with -the matched name as an initial search string. Of course, this -name stands for -\fI\|e(dit) id\fP. -.PP -.I Eid -uses four environment variables to control its invocation of the -editor. -Naturally, -.B EDITOR -is used to locate the editing program. -.B EIDARG -is a -.IR printf (3S) -string used to specify the form of the initial-search-string -argument. If the editor does not support such an argument, -this variable may be left unset. -.B EIDLDEL -and -.B EIDRDEL -specify the form of the left and right word-delimiters respectively. -The best way to explain the use of these last three variables is -with an example. Here are the proper settings for vi(1): -.nf -EIDARG='+/%s/' # initial search argument template -EIDLDEL='\\<' # left word-delimiter -EIDRDEL='\\>' # right word-delimiter -.fi -.PP -.I Patterns -may be simple alpha-numeric strings, or regular expressions in the -style of -.IR regcmp (3). -If the string contains no regular-expression meta-characters, it is -searched for as a -.IR word . -If the string contains meta-characters, or if the \-e argument is -supplied, it is searched for as regular-expression. -.PP -.I Aid\^ -produces output in the style of -.I lid\^ -but its pattern arguments are searched for as substrings within -the identifiers in the database. No regular-expression search -is performed, even if the pattern contains meta-characters. -The search is conducted in an alphabetic case insensitive manner. -The mnemonic for this name is -\fI\|a(propos) id\fP. -.PP -.I Pid\^ -is used to match the input patterns against the names of the files -in the database rather than the contents of the files. The pattern -is assumed to be a simple shell wild card pattern unless the -.B \-e -option is given, in which case full regular expression matching -is used. -The -.B \-b -option can be used to restrict the match to just the basename portion -of the full absolute path name of the file. -The mnemonic for this name is -\fI\|p(ath) id\fP. -.PP -The following options are recognized: -.TP 10 -.BR \-f file\^ -Use -.I file\^ -as the database instead of the default -.BR ID . -.TP 10 -.BR \-u n -Lists all identifiers in the database that are non-unique within the first -.I n -characters. This facility is particularly helpful when porting a program -to a system whose compiler or linker has fewer significant characters -for identifiers. -.TP 10 -.BR \-r dir\^ -Assume the names stored in the database are relative to this directory. -This option is useful if you create the database in one place, then move -it somewhere else. Normally all the query tools assume the names in -the database are relative to the location of the database. -.TP 10 -.B \-c -This option is similar to -.BR \-r , -but it tells the id query tool to assume the names in the ID database -are stored relative to the current working directory. -.TP 10 -.B \-k -Suppresses the use of \fL{\fP and \fL}\fP as a shorthand in the -generated list of file names. Each name is output in full. -.TP 10 -.B \-n -Suppresses printing the name of the search string, only the names of -the files containing the string are printed. Together with the \fB\-k\fP -option this can be used to generate lists of files to pass to other -programs. -.PP -.TP 10 -.B \-b -In the -.I pid -program, the -.B \-b -option is used to force pattern matching on just the base names of the -file, otherwise the pattern matching is done on the full absolute file -name. -.PP -The remaining options are for use in conjunction with numeric patterns: -.TP 10 -.B \-doxa -These options may be specified in any combination. -They limit numeric matches to specific radixes. -The -.BR \-d , -.BR \-o , -and -.B \-x -options limit matches to decimal, octal, and hexadecimal respectively. -The -.BR \-a -option is a shorthand for specifying all three radixes. -.PP -Searches for numbers -are conducted numerically rather than lexically, so that all -representations for a given number are potentially available -from a single search. -.TP 10 -.B \-m -Merge multiple lines of output into a single line. -.TP 10 -.B \-s -Limit the results of the search to identifiers that occur only -once in the entire set of sources covered by the database. -This option is useful for finding identifiers that are defined -but never used. -.SH SEE ALSO -mkid(1), -fid(1). diff --git a/lid.c b/lid.c deleted file mode 100644 index 3ce5f85..0000000 --- a/lid.c +++ /dev/null @@ -1,1365 +0,0 @@ -/* lid.c -- primary query interface for mkid database - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "alloc.h" -#include "idfile.h" -#include "token.h" -#include "bitops.h" -#include "strxtra.h" -#include "misc.h" -#include "filenames.h" - -typedef void (*doit_t) __P((char const *name, char **argv)); - -unsigned char *tree8_to_bits __P((unsigned char *bits_vec, unsigned char const *hits_tree8)); -void tree8_to_bits_1 __P((unsigned char **bits_vec, unsigned char const **hits_tree8, int level)); -char **tree8_to_argv __P((unsigned char const *hits_tree8)); -char **bits_to_argv __P((unsigned char const *bits_vec)); - -static void usage __P((void)); -int common_prefix_suffix __P((char const *path1, char const *path2)); -void look_id __P((char const *name, char **argv)); -void grep_id __P((char const *name, char **argv)); -void edit_id __P((char const *name, char **argv)); -int skip_to_argv __P((char **argv)); -int find_plain __P((char const *arg, doit_t doit)); -int find_anchor __P((char const *arg, doit_t doit)); -int find_regexp __P((char const *arg, doit_t doit)); -int find_number __P((char const *arg, doit_t doit)); -int find_non_unique __P((unsigned int, doit_t doit)); -int find_apropos __P((char const *arg, doit_t doit)); -void parse_frequency_arg __P((char const *arg)); -int frequency_wanted __P((char const *tok)); -char const *strcpos __P((char const *s1, char const *s2)); -char const *file_regexp __P((char const *name0, char const *left_delimit, char const *right_delimit)); -off_t find_token __P((char const *token)); -int is_regexp __P((char *name)); -char **vec_to_argv __P((int const *vec)); -int file_name_wildcard __P((char const *re, char const *fn)); -int match_file_names __P((char const *re, doit_t doit)); -int word_match __P((char const *name0, char const *line)); -int radix __P((char const *name)); -int stoi __P((char const *name)); -int otoi __P((char const *name)); -int dtoi __P((char const *name)); -int xtoi __P((char const *name)); -void savetty __P((void)); -void restoretty __P((void)); -void linetty __P((void)); -void chartty __P((void)); - -enum radix { - RADIX_OCT = 1, - RADIX_DEC = 2, - RADIX_HEX = 4, - RADIX_ALL = RADIX_DEC | RADIX_OCT | RADIX_HEX -}; - -#define TOLOWER(c) (isupper (c) ? tolower (c) : (c)) -#define IS_ALNUM(c) (isalnum (c) || (c) == '_') - -#ifndef BRACE_NOTATION_DEFAULT -#define BRACE_NOTATION_DEFAULT 1 -#endif - -/* Sorry about all the globals, but it's really cleaner this way. */ -FILE *id_FILE; -int merging; -int radix_arg; -int echo_on = 1; -int brace_notation_on = BRACE_NOTATION_DEFAULT; -int file_name_regexp = 0; -int match_base = 0; -char *anchor_dir; -int tree8_levels; -unsigned int bits_vec_size; -char PWD_buf[MAXPATHLEN]; -struct idhead idh; -int (*find_func) __P((char const *, doit_t)); -unsigned short frequency_low = 1; -unsigned short frequency_high = USHRT_MAX; -char *buf; -char *buf2; -unsigned char *bits_vec; - -char const *program_name; - -static void -usage (void) -{ - fprintf (stderr, "Usage: %s [-f] [-u] [-r] [-mewdoxaskncg] patterns...\n", program_name); - exit (1); -} - -int -main (int argc, char **argv) -{ - char const *id_file_name = IDFILE; - doit_t doit = look_id; - int force_merge = 0; - unsigned int unique_limit = 0; - int (*forced_find_func) __P((char const *, doit_t)) = NULL; - - program_name = basename ((argc--, *argv++)); - - while (argc) - { - char const *arg = (argc--, *argv++); - int op = *arg++; - switch (op) - { - case '-': - case '+': - break; - default: - (argc++, --argv); - goto argsdone; - } - while (*arg) - switch (*arg++) - { - case 'f': - id_file_name = arg; - goto nextarg; - case 'u': - unique_limit = stoi (arg); - goto nextarg; - case 'm': - force_merge = 1; - break; - case 'e': - forced_find_func = find_regexp; - file_name_regexp = 1; - break; - case 'w': - forced_find_func = find_plain; - break; - case 'd': - radix_arg |= RADIX_DEC; - break; - case 'o': - radix_arg |= RADIX_OCT; - break; - case 'x': - radix_arg |= RADIX_HEX; - break; - case 'a': - radix_arg |= RADIX_ALL; - break; - case 'F': - parse_frequency_arg (arg); - goto nextarg; - case 'k': - brace_notation_on = 0; - break; - case 'g': - brace_notation_on = 1; - break; - case 'n': - echo_on = 0; - break; - case 'b': - match_base = 1; - break; - case 'c': - maybe_anchor_usage (); - anchor_dir = PWD_buf; - break; - case 'r': - maybe_anchor_usage (); - anchor_dir = arg; - goto nextarg; - default: - usage (); - } - nextarg:; - } -argsdone: - - get_PWD (PWD_buf); - id_file_name = find_id_file (id_file_name); - - if (anchor_dir == NULL) - anchor_dir = strdup (span_dir_name (PWD_buf, id_file_name)); - else if (anchor_dir != PWD_buf) - anchor_dir = strdup (span_dir_name (PWD_buf, anchor_dir)); - - id_FILE = init_id_file (id_file_name, &idh); - bits_vec_size = (idh.idh_files + 7) >> 3; - tree8_levels = tree8_count_levels (idh.idh_files); - - switch (program_name[0]) - { - case 'a': - forced_find_func = find_apropos; - /*FALLTHROUGH*/ - case 'l': - doit = look_id; - break; - case 'g': - doit = grep_id; - break; - case 'e': - doit = edit_id; - break; - case 'p': - forced_find_func = match_file_names; - doit = look_id; - break; - default: - program_name = "[algep]id"; - usage (); - } - - if (argc == 0) - { - (argc++, --argv); - *(char const **)argv = "."; - } - - while (argc) - { - long val = -1; - char *arg = (argc--, *argv++); - - if (forced_find_func) - find_func = forced_find_func; - else if (radix (arg) && (val = stoi (arg)) >= 0) - find_func = find_number; - else if (is_regexp (arg)) - find_func = find_regexp; - else if (arg[0] == '^') - find_func = find_anchor; - else - find_func = find_plain; - - if ((doit == look_id && !force_merge) - || (find_func == find_number - && val > 7 - && radix_arg != RADIX_DEC - && radix_arg != RADIX_OCT - && radix_arg != RADIX_HEX)) - merging = 0; - else - merging = 1; - - buf = malloc (idh.idh_buf_size); - buf2 = malloc (idh.idh_buf_size); - bits_vec = MALLOC (unsigned char, bits_vec_size); - - if (unique_limit) - { - if (!find_non_unique (unique_limit, doit)) - fprintf (stderr, "All identifiers are unique within the first %d characters\n", unique_limit); - exit (0); - } - else if (!(*find_func) (arg, doit)) - { - fprintf (stderr, "%s: not found\n", arg); - continue; - } - } - exit (0); -} - -/* common_prefix_suffix returns non-zero if two file names have a - fully common directory prefix and a common suffix (i.e., they're - eligible for coalescing with brace notation. */ - -int -common_prefix_suffix (char const *file_name_1, char const *file_name_2) -{ - char const *slash_1; - char const *slash_2; - - slash_1 = strrchr (file_name_1, '/'); - slash_2 = strrchr (file_name_2, '/'); - - if (slash_1 == NULL && slash_2 == NULL) - return strequ (suff_name (file_name_1), suff_name (file_name_2)); - if ((slash_1 - file_name_1) != (slash_2 - file_name_2)) - return 0; - if (!strnequ (file_name_1, file_name_2, slash_1 - file_name_1)) - return 0; - return strequ (suff_name (slash_1), suff_name (slash_2)); -} - -void -look_id (char const *name, char **argv) -{ - char const *arg; - char const *dir; - int using_braces = 0; - - if (echo_on) - printf ("%-14s ", name); - while (*argv) - { - arg = *argv++; - if (*argv && brace_notation_on && common_prefix_suffix (arg, *argv)) - { - if (using_braces) - printf (",%s", root_name (arg)); - else - { - dir = dirname (arg); - if (dir && !strequ (dir, ".")) - printf ("%s/", dir); - printf ("{%s", root_name (arg)); - } - using_braces = 1; - } - else - { - if (using_braces) - printf (",%s}%s", root_name (arg), suff_name (arg)); - else - fputs (arg, stdout); - using_braces = 0; - if (*argv) - putchar (' '); - } - } - putchar ('\n'); -} - -void -grep_id (char const *name, char **argv) -{ - char line[BUFSIZ]; - char const *re = NULL; - int line_number; - - if (merging) - { - re = file_regexp (name, "[^a-zA-Z0-9_]_*", "[^a-zA-Z0-9_]"); - if (re) - { - char const *regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return; - } - } - } - - line[0] = ' '; /* sentry */ - while (*argv) - { - char const *file_name = *argv++; - FILE *gid_FILE = fopen (file_name, "r"); - - if (gid_FILE == NULL) - { - filerr ("open", file_name); - continue; - } - line_number = 0; - while (fgets (&line[1], sizeof (line), gid_FILE)) - { - line_number++; - if (re) - { - if (!re_exec (line)) - continue; - } - else if (!word_match (name, line)) - continue; - printf ("%s:%d: %s", file_name, line_number, &line[1]); - } - fclose (gid_FILE); - } -} - -void -edit_id (char const *name, char **argv) -{ - char re_buffer[BUFSIZ]; - char ed_arg_buffer[BUFSIZ]; - char const *re; - int c; - int skip; - static char const *editor; - static char const *eid_arg; - static char const *eid_right_del; - static char const *eid_left_del; - - if (editor == NULL) - { - editor = getenv ("EDITOR"); - if (editor == NULL) - { - editor = "vi"; - eid_arg = "+1;/%s/"; - eid_left_del = "\\<"; - eid_right_del = "\\>"; - } - } - if (eid_left_del == NULL) - { - eid_arg = getenv ("EIDARG"); - eid_left_del = getenv ("EIDLDEL"); - if (eid_left_del == NULL) - eid_left_del = ""; - eid_right_del = getenv ("EIDRDEL"); - if (eid_right_del == NULL) - eid_right_del = ""; - } - - look_id (name, argv); - savetty (); - for (;;) - { - printf ("Edit? [y1-9^S/nq] "); - fflush (stdout); - chartty (); - c = (getchar () & 0177); - restoretty (); - switch (TOLOWER (c)) - { - case '/': - case ('s' & 037): - putchar ('/'); - skip = skip_to_argv (argv); - if (skip < 0) - continue; - argv += skip; - goto editit; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - putchar (c); - skip = c - '0'; - break; - case 'y': - putchar (c); - /*FALLTHROUGH*/ - case '\n': - case '\r': - skip = 0; - break; - case 'q': - putchar (c); - putchar ('\n'); - exit (0); - case 'n': - putchar (c); - putchar ('\n'); - return; - default: - putchar (c); - putchar ('\n'); - continue; - } - - putchar ('\n'); - while (skip--) - if (*++argv == NULL) - continue; - break; - } -editit: - - if (merging) - re = file_regexp (name, eid_left_del, eid_right_del); - else - re = NULL; - if (re == NULL) - { - re = re_buffer; - sprintf (re_buffer, "%s%s%s", eid_left_del, name, eid_right_del); - } - - switch (fork ()) - { - case -1: - fprintf (stderr, "%s: Cannot fork (%s)\n", program_name, strerror (errno)); - exit (1); - case 0: - argv--; - if (eid_arg) - { - argv--; - sprintf (ed_arg_buffer, eid_arg, re); - argv[1] = ed_arg_buffer; - } - *(char const **) argv = editor; - execvp (editor, argv); - filerr ("exec", editor); - default: - { - void (*oldint) __P((int)) = signal (SIGINT, SIG_IGN); - void (*oldquit) __P((int)) = signal (SIGQUIT, SIG_IGN); - - while (wait (0) == -1 && errno == EINTR) - ; - - signal (SIGINT, oldint); - signal (SIGQUIT, oldquit); - } - break; - } -} - -int -skip_to_argv (char **argv) -{ - char pattern[BUFSIZ]; - unsigned int count; - - if (gets (pattern) == NULL) - return -1; - - for (count = 0; *argv; count++, argv++) - if (strcpos (*argv, pattern)) - return count; - return -1; -} - -int -find_plain (char const *arg, doit_t doit) -{ - if (find_token (arg) == 0) - return 0; - gets_past_00 (buf, id_FILE); - assert (*buf); - if (!frequency_wanted (buf)) - return 0; - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - return 1; -} - -int -find_anchor (char const *arg, doit_t doit) -{ - int count; - unsigned int length; - - if (find_token (++arg) == 0) - return 0; - - length = strlen (arg); - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (!strnequ (arg, buf, length)) - break; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (--arg, bits_to_argv (bits_vec)); - - return count; -} - -int -find_regexp (char const *re, doit_t doit) -{ - int count; - char const *regexp_error; - - regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return 0; - } - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (!re_exec (buf)) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (re, bits_to_argv (bits_vec)); - - return count; -} - -int -find_number (char const *arg, doit_t doit) -{ - int count; - int rdx; - int val; - int hit_digits = 0; - - rdx = (val = stoi (arg)) ? RADIX_ALL : radix (arg); - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - if (hit_digits) - { - if (!isdigit (*buf)) - break; - } - else - { - if (isdigit (*buf)) - hit_digits = 1; - } - - if (!((radix_arg ? radix_arg : rdx) & radix (buf)) - || stoi (buf) != val) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (arg, bits_to_argv (bits_vec)); - - return count; -} - -/* Find identifiers that are non-unique within the first `count' - characters. */ -int -find_non_unique (unsigned int limit, doit_t doit) -{ - char *old = buf; - char *new = buf2; - int consecutive = 0; - int count = 0; - char name[1024]; - - if (limit <= 1) - usage (); - assert (limit < sizeof(name)); - - name[0] = '^'; - *new = '\0'; - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - while (gets_past_00 (old, id_FILE) > 0) - { - char *tmp; - if (!(tok_flags (old) & TOK_NAME)) - continue; - tmp = old; - old = new; - new = tmp; - if (!strnequ (new, old, limit)) - { - if (consecutive && merging) - { - strncpy (&name[1], old, limit); - (*doit) (name, bits_to_argv (bits_vec)); - } - consecutive = 0; - continue; - } - if (!consecutive++) - { - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (old)); - else - (*doit) (old, tree8_to_argv (tok_hits_addr (old))); - count++; - } - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (new)); - else - (*doit) (new, tree8_to_argv (tok_hits_addr (new))); - count++; - } - if (consecutive && merging) - { - strncpy (&name[1], new, limit); - (*doit) (name, bits_to_argv (bits_vec)); - } - return count; -} - -int -find_apropos (char const *arg, doit_t doit) -{ - int count; - - fseek (id_FILE, idh.idh_tokens_offset, SEEK_SET); - - count = 0; - if (merging) - memset (bits_vec, 0, bits_vec_size); - while (gets_past_00 (buf, id_FILE) > 0) - { - assert (*buf); - if (!frequency_wanted (buf)) - continue; - if (strcpos (buf, arg) == NULL) - continue; - if (merging) - tree8_to_bits (bits_vec, tok_hits_addr (buf)); - else - (*doit) (buf, tree8_to_argv (tok_hits_addr (buf))); - count++; - } - if (merging && count) - (*doit) (arg, bits_to_argv (bits_vec)); - - return count; -} - -void -parse_frequency_arg (char const *arg) -{ - if (*arg == '-') - frequency_low = 1; - else - { - frequency_low = atoi (arg); - while (isdigit (*arg)) - arg++; - if (*arg == '-') - arg++; - } - if (*arg) - frequency_high = atoi (arg); - else if (arg[-1] == '-') - frequency_high = USHRT_MAX; - else - frequency_high = frequency_low; - if (frequency_low > frequency_high) - fprintf (stderr, "Bogus frequencies: %u > %u\n", frequency_low, frequency_high); -} - -int -frequency_wanted (char const *tok) -{ - unsigned int count = tok_count (tok); - return (frequency_low <= count && count <= frequency_high); -} - -/* if string `s2' occurs in `s1', return a pointer to the first match. - Ignore differences in alphabetic case. */ -char const * -strcpos (char const *s1, char const *s2) -{ - char const *s1p; - char const *s2p; - char const *s1last; - - for (s1last = &s1[strlen (s1) - strlen (s2)]; s1 <= s1last; s1++) - for (s1p = s1, s2p = s2; TOLOWER (*s1p) == TOLOWER (*s2p); s1p++) - if (*++s2p == '\0') - return s1; - return NULL; -} - -/* Convert the regular expression that we used to locate identifiers - in the id database into one suitable for locating the identifiers - in files. */ -char const * -file_regexp (char const *name0, char const *left_delimit, char const *right_delimit) -{ - static char re_buffer[BUFSIZ]; - char *name = (char *) name0; - - if (find_func == find_number && merging) - { - sprintf (re_buffer, "%s0*[Xx]*0*%d[Ll]*%s", left_delimit, stoi (name), right_delimit); - return re_buffer; - } - - if (!is_regexp (name) && name[0] != '^') - return NULL; - - if (name[0] == '^') - name0++; - else - left_delimit = ""; - while (*++name) - ; - if (*--name == '$') - *name = '\0'; - else - right_delimit = ""; - - sprintf (re_buffer, "%s%s%s", left_delimit, name0, right_delimit); - return re_buffer; -} - -off_t -find_token (char const *token_0) -{ - off_t offset = 0; - off_t start = idh.idh_tokens_offset - 2; - off_t end = idh.idh_end_offset; - off_t anchor_offset = 0; - int order = -1; - - while (start < end) - { - int c; - int incr = 1; - char const *token; - - offset = start + (end - start) / 2; - fseek (id_FILE, offset, SEEK_SET); - offset += skip_past_00 (id_FILE); - if (offset >= end) - { - offset = start + 2; - fseek (id_FILE, offset, SEEK_SET); - } - - /* compare the token names */ - token = token_0; - while (*token == (c = getc (id_FILE)) && *token && c) - { - token++; - incr++; - } - if (c && !*token && find_func == find_anchor) - anchor_offset = offset; - order = *token - c; - - if (order < 0) - end = offset - 2; - else if (order > 0) - start = offset + incr + skip_past_00 (id_FILE) - 2; - else - break; - } - - if (order) - { - if (anchor_offset) - offset = anchor_offset; - else - return 0; - } - fseek (id_FILE, offset, SEEK_SET); - return offset; -} - -/* Are there any regexp meta-characters in name?? */ -int -is_regexp (char *name) -{ - int backslash = 0; - - if (*name == '^') - name++; - while (*name) - { - if (*name == '\\') - { - if (strchr ("<>", name[1])) - return 1; - name++, backslash++; - } - else if (strchr ("[]{}().*+^$", *name)) - return 1; - name++; - } - if (backslash) - while (*name) - { - if (*name == '\\') - strcpy (name, name + 1); - name++; - } - return 0; -} - -/* file_name_wildcard implements a simple pattern matcher that - emulates the shell wild card capability. - - * - any string of chars - ? - any char - [] - any char in set (if first char is !, any not in set) - \ - literal match next char */ -int -file_name_wildcard (char const *re, char const *fn) -{ - int c; - int i; - char set[256]; - int revset; - - while ((c = *re++) != '\0') - { - if (c == '*') - { - if (*re == '\0') - return 1; /* match anything at end */ - while (*fn != '\0') - { - if (file_name_wildcard (re, fn)) - return 1; - ++fn; - } - return 0; - } - else if (c == '?') - { - if (*fn++ == '\0') - return 0; - } - else if (c == '[') - { - c = *re++; - memset (set, 0, 256); - if (c == '!') - { - revset = 1; - c = *re++; - } - else - revset = 0; - while (c != ']') - { - if (c == '\\') - c = *re++; - set[c] = 1; - if ((*re == '-') && (*(re + 1) != ']')) - { - re += 1; - while (++c <= *re) - set[c] = 1; - ++re; - } - c = *re++; - } - if (revset) - for (i = 1; i < 256; ++i) - set[i] = !set[i]; - if (!set[(int)*fn++]) - return 0; - } - else - { - if (c == '\\') - c = *re++; - if (c != *fn++) - return 0; - } - } - return (*fn == '\0'); -} - -/* match_file_names implements the pid tool. This matches the *names* - of files in the database against the input pattern rather than the - *contents* of the files. */ - -int -match_file_names (char const *re, doit_t doit) -{ - char const *abs_name; - struct idarg *ida = id_args; - int i; - int count = 0; - int matched; - - if (file_name_regexp) - { - char const *regexp_error = re_comp (re); - if (regexp_error) - { - fprintf (stderr, "%s: Syntax Error: %s (%s)\n", program_name, re, regexp_error); - return 0; - } - } - - for (i = 0; i < idh.idh_files; i++, ida++) - { - if (*ida->ida_arg == 0) - continue; - if (match_base) - { - abs_name = strrchr (ida->ida_arg, '/'); - if (abs_name == NULL) - abs_name = ida->ida_arg; - } - else - abs_name = span_file_name (anchor_dir, ida->ida_arg); - if (file_name_regexp) - matched = re_exec (abs_name); - else - matched = file_name_wildcard (re, abs_name); - if (matched) - { - BITSET (bits_vec, i); - ++count; - } - } - if (count) - (*doit) (re, bits_to_argv (bits_vec)); - return count; -} - -/* Does `name' occur in `line' delimited by non-alphanumerics?? */ -int -word_match (char const *name0, char const *line) -{ - char const *name = name0; - - for (;;) - { - /* find an initial-character match */ - while (*line != *name) - { - if (*line == '\0' || *line == '\n') - return 0; - line++; - } - /* do we have a word delimiter on the left ?? */ - if (isalnum (line[-1])) - { - line++; - continue; - } - /* march down both strings as long as we match */ - while (*++name == *++line) - ; - /* is this the end of `name', is there a word delimiter ?? */ - if (*name == '\0' && !IS_ALNUM (*line)) - return 1; - name = name0; - } -} - -/* Use the C lexical rules to determine an ascii number's radix. The - radix is returned as a bit map, so that more than one radix may - apply. In particular, it is impossible to determine the radix of - 0, so return all possibilities. */ -int -radix (char const *name) -{ - if (!isdigit (*name)) - return 0; - if (*name != '0') - return RADIX_DEC; - name++; - if (*name == 'x' || *name == 'X') - return RADIX_HEX; - while (*name && *name == '0') - name++; - return (RADIX_OCT | ((*name) ? 0 : RADIX_DEC)); -} - -/* Convert an ascii string number to an integer. Determine the radix - before converting. */ -int -stoi (char const *name) -{ - switch (radix (name)) - { - case RADIX_DEC: - return (dtoi (name)); - case RADIX_OCT: - return (otoi (&name[1])); - case RADIX_HEX: - return (xtoi (&name[2])); - case RADIX_DEC | RADIX_OCT: - return 0; - default: - return -1; - } -} - -/* Convert an ascii octal number to an integer. */ -int -otoi (char const *name) -{ - int n = 0; - - while (*name >= '0' && *name <= '7') - { - n *= 010; - n += *name++ - '0'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -/* Convert an ascii decimal number to an integer. */ -int -dtoi (char const *name) -{ - int n = 0; - - while (isdigit (*name)) - { - n *= 10; - n += *name++ - '0'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -/* Convert an ascii hex number to an integer. */ -int -xtoi (char const *name) -{ - int n = 0; - - while (isxdigit (*name)) - { - n *= 0x10; - if (isdigit (*name)) - n += *name++ - '0'; - else if (islower (*name)) - n += 0xa + *name++ - 'a'; - else - n += 0xA + *name++ - 'A'; - } - if (*name == 'l' || *name == 'L') - name++; - return (*name ? -1 : n); -} - -unsigned char * -tree8_to_bits (unsigned char *bv_0, unsigned char const *hits_tree8) -{ - unsigned char* bv = bv_0; - tree8_to_bits_1 (&bv, &hits_tree8, tree8_levels); - return bv_0; -} - -void -tree8_to_bits_1 (unsigned char **bv, unsigned char const **hits_tree8, int level) -{ - int hits = *(*hits_tree8)++; - - if (--level) - { - int incr = 1 << ((level - 1) * 3); - int bit; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (bit & hits) - tree8_to_bits_1 (bv, hits_tree8, level); - else - *bv += incr; - } - } - else - *(*bv)++ |= hits; -} - -char ** -bits_to_argv (unsigned char const *bv) -{ - int const reserved_argv_slots = 3; - static char **argv_0; - char **argv; - struct idarg *ida = id_args; - struct idarg *end = &id_args[idh.idh_files]; - - if (argv_0 == NULL) - argv_0 = MALLOC (char *, idh.idh_files + reserved_argv_slots + 2); - argv = &argv_0[reserved_argv_slots]; - - for (;;) - { - int hits; - int bit; - - while (*bv == 0) - { - bv++; - ida += 8; - if (ida >= end) - goto out; - } - hits = *bv++; - for (bit = 1; bit & 0xff; bit <<= 1) - { - if (bit & hits) - { - if (!(ida->ida_flags & IDA_RELATIVE)) - { - char const *abs_name = span_file_name (anchor_dir, ida->ida_arg); - char const *rel_name = relative_file_name (PWD_buf, abs_name); - char const *short_name = (strlen (rel_name) > strlen (abs_name) - ? abs_name : rel_name); - if (!strequ (short_name, ida->ida_arg)) - ida->ida_arg = strdup (short_name); - ida->ida_flags |= IDA_RELATIVE; - } - *argv++ = ida->ida_arg; - } - if (++ida >= end) - goto out; - } - } -out: - *argv = NULL; - return &argv_0[reserved_argv_slots]; -} - -char ** -tree8_to_argv (unsigned char const *hits_tree8) -{ - memset (bits_vec, 0, bits_vec_size); - return bits_to_argv (tree8_to_bits (bits_vec, hits_tree8)); -} - -#if HAVE_TERMIOS_H - -#include -struct termios linemode; -struct termios charmode; -struct termios savemode; -#define GET_TTY_MODES(modes) tcgetattr (0, (modes)) -#define SET_TTY_MODES(modes) tcsetattr(0, TCSANOW, (modes)) - -#else /* not HAVE_TERMIOS_H */ - -# if HAVE_SYS_IOCTL_H -# include -# endif - -# if HAVE_TERMIO_H - -# include -struct termio linemode; -struct termio charmode; -struct termio savemode; -#define GET_TTY_MODES(modes) ioctl (0, TCGETA, (modes)) -#define SET_TTY_MODES(modes) ioctl (0, TCSETA, (modes)) - -# else /* not HAVE_TERMIO_H */ - -# if HAVE_SGTTY_H - -# include -struct sgttyb linemode; -struct sgttyb charmode; -struct sgttyb savemode; - -# ifdef TIOCGETP -#define GET_TTY_MODES(modes) ioctl (0, TIOCGETP, (modes)) -#define SET_TTY_MODES(modes) ioctl (0, TIOCSETP, (modes)) -# else -#define GET_TTY_MODES(modes) gtty (0, (modes)) -#define SET_TTY_MODES(modes) stty (0, (modes)) -# endif - -void -savetty (void) -{ -# ifdef TIOCGETP - ioctl(0, TIOCGETP, &savemode); -# else - gtty(0, &savemode); -# endif - charmode = linemode = savemode; - - charmode.sg_flags &= ~ECHO; - charmode.sg_flags |= RAW; - - linemode.sg_flags |= ECHO; - linemode.sg_flags &= ~RAW; -} - -# endif /* not HAVE_SGTTY_H */ -# endif /* not HAVE_TERMIO_H */ -#endif /* not HAVE_TERMIOS_H */ - -#if HAVE_TERMIOS_H || HAVE_TERMIO_H - -void -savetty (void) -{ - GET_TTY_MODES (&savemode); - charmode = linemode = savemode; - - charmode.c_lflag &= ~(ECHO | ICANON | ISIG); - charmode.c_cc[VMIN] = 1; - charmode.c_cc[VTIME] = 0; - - linemode.c_lflag |= (ECHO | ICANON | ISIG); - linemode.c_cc[VEOF] = 'd' & 037; - linemode.c_cc[VEOL] = 0377; -} - -#endif - -#if HAVE_TERMIOS_H || HAVE_TERMIO_H || HAVE_SGTTY_H - -void -restoretty (void) -{ - SET_TTY_MODES (&savemode); -} - -void -linetty (void) -{ - SET_TTY_MODES (&linemode); -} - -void -chartty (void) -{ - SET_TTY_MODES (&charmode); -} - -#endif diff --git a/misc.c b/misc.c deleted file mode 100644 index 8e98763..0000000 --- a/misc.c +++ /dev/null @@ -1,126 +0,0 @@ -/* misc.c -- miscellaneous common functions - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include - -#include -#include "strxtra.h" -#include "misc.h" - -#if !HAVE_BASENAME -char * -basename (char const *path) -{ - char *base; - - base = strrchr (path, '/'); - if (base) - return ++base; - else - return path; -} -#endif - -#if !HAVE_DIRNAME -char * -dirname (char const *path) -{ - char *base; - - base = strrchr (path, '/'); - if (base) - return strndup (path, base - path); - else - return "."; -} -#endif - -/* This is like fgets(3s), except that lines are delimited by NULs - rather than newlines. Also, we return the number of characters - gotten rather than the address of buf0. */ -int -fgets0 (char *buf0, int size, FILE * in_FILE) -{ - char *buf; - int c; - char *end; - - buf = buf0; - end = &buf[size]; - while ((c = getc (in_FILE)) > 0 && buf < end) - *buf++ = c; - *buf = '\0'; - return (buf - buf0); -} - -extern char const *program_name; - -void -filerr (char const *syscall, char const *file_name) -{ - fprintf (stderr, "%s: Cannot %s `%s' (%s)\n", program_name, syscall, file_name, strerror (errno)); -} - -int -tree8_count_levels (unsigned int cardinality) -{ - int levels = 1; - cardinality--; - while (cardinality >>= 3) - ++levels; - return levels; -} - -int -gets_past_00 (char *tok, FILE *input_FILE) -{ - int got = 0; - int c; - do - { - do - { - got++; - c = getc (input_FILE); - *tok++ = c; - } - while (c > 0); - got++; - c = getc (input_FILE); - *tok++ = c; - } - while (c > 0); - return got - 2; -} - -int -skip_past_00 (FILE *input_FILE) -{ - int skipped = 0; - do - { - do - skipped++; - while (getc (input_FILE) > 0); - skipped++; - } - while (getc (input_FILE) > 0); - return skipped; -} diff --git a/misc.h b/misc.h deleted file mode 100644 index e4ee028..0000000 --- a/misc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* misc.c -- defs for interface to misc.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _misc_h_ -#define _misc_h_ - -#if HAVE_BASENAME -char *basename (); -#else -char *basename __P((char const *path)); -#endif -#if HAVE_DIRNAME -char *dirname (); -#else -char *dirname __P((char const *path)); -#endif -int fgets0 __P((char *buf0, int size, FILE *in_FILE)); -void filerr __P((char const *syscall, char const *file_name)); -int tree8_count_levels __P((unsigned int cardinality)); -int gets_past_00 __P((char *tok, FILE *input_FILE)); -int skip_past_00 __P((FILE *input_FILE)); - -#endif /* not _misc_h_ */ diff --git a/mkid.1 b/mkid.1 deleted file mode 100644 index 6cdf1a7..0000000 --- a/mkid.1 +++ /dev/null @@ -1,187 +0,0 @@ -.TH MKID 1 -.SH NAME -mkid \- make an id database -.SH SYNOPSIS -.B mkid -.RB [ \-v ] -.RB [ \-f \^out-file] -.RB [ \-s \^directory] -.RB [ \-r \^directory] -.RB [ \-S \^scanarg] -.RB [ \-a \^arg-file] -.RB [ \- ] -.RB [ \-u ] -.RB [ files... ] -.SH DESCRIPTION -.I Mkid\^ -builds a database that stores numbers and identifier names, as well -as the names of the files in which they occur. -.I Mkid\^ -is particularly useful with large programs spread out across multiple -source files. It serves as an aid for program maintenance and as a -.I guide\^ -for perusing a program. -.PP -The following options are recognized: -.TP 10 -.B \-v -Verbose. -Report -.IR mkid 's -progress in building the database. The output comes on standard error. -.TP 10 -.BI \-f out-file\^ -Write the finished database into -.IR out-file . -.B ID\^ -is the default. -Normally the names of the files scanned are written to the database -as specified in the argument list. If the database sepcified with -.B \-f -is not located in the current directory, then the file names are -adjusted so that they are relative to the directory that the -database is located in. -.TP 10 -.BI \-s directory\^ -.TP 10 -.BI \-r directory\^ -If -.IR mkid 's -attempt to open a source-file fails, it will try to checkout the -corresponding SCCS or RCS file if present. The -.B \-s -option tells -.I mkid\^ -which directory holds the SCCS file. -Similarly, the -.B \-r -option tells -.I mkid\^ -which directory holds the RCS file. -If neither the RCS or SCCS directories are specified, -.I mkid\^ -will first look for an SCCS file in the current directory, then in -.BI sccs , -and finally in -.BI SCCS . -It will then look for an RCS file in the current directory, and finally in -.BI RCS . -.TP 10 -.BI \-a arg-file\^ -Open and read -.I arg-file\^ -in order to obtain a list of source file arguments. Source file names -must appear one to a line. -.BI \-S , -.BI \-r , -and -.BI \-s -arguments may also be placed one per line in -.IR file . -They are distinguished from source file names by their leading `-'. If a file name begins -with `-', it can be distinguished from an argument by explicitly prepending the current -directory string: `./'. -.TP 10 -.B \- -This operates in the same manner as the -.B \-a -option described above, but reads from the standard input instead of a file. -.TP 10 -.B \-u -Update an existing database. Only those files that have been modified -since the database was built will be rescanned. This is a significant -time-saver for updating large databases where few sources have changed. -.TP 10 -.B files... -If neither the -.BI \-a , -.BI \- , -nor -.BI \-u , -arguments have been specified, take file names from the command line. -.TP 10 -.BI \-S scanarg\^ -.I Mkid\^ -scans source files in order to obtain numbers and identifier names. -Since the lexical rules of languages differ, -.I mkid\^ -applies a different scanning function to each language in order -to conform to that language's lexical rules. -.I Mkid\^ -determines the source file's language by examining its filename -suffix which commonly occurs after a dot (`.'). -The -.B \-S -argument is a way of passing language specific arguments to the -scanner for that language. This argument takes a number of forms: -.br --S= -.br --S- -.br -+S- -.br --S// -.br -The first form associates a suffix with a language. -For example -S.c=vhil would cause all .c files to be scanned -as though they were language vhil rather than c. -You may find -out which suffixes are defined for which languages with the following -options: `-S=?' tells which language is bound to -.IR , -`-S?=' tells which suffixes are bound to -.IR , -and `-S?=?' reports all bindings between suffixes and languages. -.PP -The second form passes an argument for processing by the scanner -for a specific language. The third form passes an argument to -all scanners. -.PP -Finally, the // form defines a shell command -to filter the file with. This can be used to run an arbitrary -program to filter the contents of a file before it is passed -to one of the existing language scanners. It is typically -used in conjunction with the plain text scanner. -The first defines a new language, the second -specifies an existing language whose scanner will be used, -and the remaining is an arbitrary shell command. -.PP -You may get a brief summary of the scanner-specific options for a -language by supplying the following option: `-S?'. -.PP -Here is a brief summary of the options for the -.I `asm'\^ -(assembler) language. -.PP -The -.B \-u\^ -option controls whether or not the assembler scanner should strip -off a leading -.I underscore\^ -(`_') character. If your assembler prepends an -.I underscore\^ -to external symbols, then you should tell the scanner to strip it -off, so that references to the same symbol from assembly and from -a high-level language will look the same. -.PP -The -.B \-c\^ -option supplies the character(s) used to begin a comment that extends -to the end of the line. -.PP -The -.B \-a\^ -option indicates character(s) that are legal in names, in addition to -the alpha-numeric characters. If the option appears as `-a', names -that contain these characters are ignored. If it appears as `+a', these -names are added to the database. -.SH BUGS -This manual page needs to be more complete about the scanner-specific -arguments. -.PP -At the moment, the only scanners implemented are for C, assembly -language, and plain text. There ought to be scanners for Ada, Pascal, -Fortran, and Lisp. -.SH SEE ALSO -lid(1), deroff(1), detex(1). diff --git a/mkid.c b/mkid.c deleted file mode 100644 index daee166..0000000 --- a/mkid.c +++ /dev/null @@ -1,999 +0,0 @@ -/* mkid.c -- build an identifer database - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "strxtra.h" -#include "alloc.h" -#include "idfile.h" -#include "token.h" -#include "bitops.h" -#include "misc.h" -#include "filenames.h" -#include "hash.h" -#include "scanners.h" - -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif - -struct summary -{ - struct token **sum_tokens; - unsigned char const *sum_hits; - struct summary *sum_parent; - union { - struct summary *u_kids[8]; /* when sum_level > 0 */ - struct idarg *u_files[8]; /* when sum_level == 0 */ - } sum_u; -#define sum_kids sum_u.u_kids -#define sum_files sum_u.u_files - unsigned long sum_tokens_size; - unsigned long sum_hits_count; - int sum_free_index; - int sum_level; -}; - -#define MAX_LEVELS 5 /* log_8 of the max # of files: log_8(32768) == 5 */ - -struct token -{ - unsigned short tok_count; - unsigned char tok_flags; - unsigned char tok_hits[MAX_LEVELS]; - char tok_name[1]; -}; - -char *bitsset __P((char *s1, char const *s2, int n)); -char *bitsclr __P((char *s1, char const *s2, int n)); -char *bitsand __P((char *s1, char const *s2, int n)); -char *bitsxor __P((char *s1, char const *s2, int n)); -int bitstst __P((char const *s1, char const *s2, int n)); -int bitsany __P((char const *s, int n)); -struct token *make_token __P((char const *name, int)); -void scan_1_file __P((char const *(*get_token) (FILE*, int*), FILE *source_FILE)); -struct idarg *parse_idargs __P((int argc, char **argv)); -struct idarg *parse_idargs_from_FILE __P((FILE *arg_FILE, struct idarg *idarg)); -void scan_files __P((struct idarg *idarg)); -void report_statistics __P((void)); - -unsigned long token_hash_1 __P((void const *key)); -unsigned long token_hash_2 __P((void const *key)); -int token_hash_cmp __P((void const *x, void const *y)); - -void write_idfile __P((char const *id_file, struct idarg *idargs)); -void bump_current_hits_signature __P((void)); -void init_hits_signature __P((int i)); -int bit_to_index __P((int bit)); -int token_qsort_cmp __P((void const *x, void const *y)); -void free_summary_tokens __P((void)); -void summarize __P((void)); -void assert_hits __P((struct summary *summary)); -void write_hits __P((FILE *fp, struct summary *summary, unsigned char const *tail_hits)); -void sign_token __P((struct token *token)); -void add_token_to_summary __P((struct summary *summary, struct token *token)); -void init_summary __P((void)); -struct summary *make_sibling_summary __P((struct summary *summary)); -int count_vec_size __P((struct summary *summary, unsigned char const *tail_hits)); -int count_buf_size __P((struct summary *summary, unsigned char const *tail_hits)); -void usage __P((void)); - -struct hash_table token_table; -struct hash_table file_table; - -/* Miscellaneous statistics */ -long input_chars; -long name_tokens; -long number_tokens; -long string_tokens; -long literal_tokens; -long comment_tokens; -long occurrences; -long heap_size; -long hits_length = 0; -long tokens_length = 0; -long output_length = 0; - -int verbose_flag = 0; -int statistics_flag = 1; - -int args_count = 0; /* # of args to save */ -int scan_count = 0; /* # of files to scan */ -int file_name_count = 0; /* # of files in database */ -int levels = 0; /* ceil(log(8)) of file_name_count */ - -unsigned char current_hits_signature[MAX_LEVELS]; -#define INIT_TOKENS_SIZE(level) (1 << ((level) + 13)) -struct summary *summary_root; -struct summary *summary_leaf; - -char PWD_buf[BUFSIZ]; /* The current working directory */ -char absolute_idfile_name[BUFSIZ]; /* The absolute name of the database */ -char const *id_file_name = IDFILE; - -char const *program_name; - -void -usage (void) -{ - fprintf (stderr, "\ -Usage: %s [-v] [-f] [(+|-)l[]] [(+|-)S] [-a] [-] [files...]\n\ - -v Verbose: print reports of progress\n\ - -a Open file for arguments\n\ - - Read newline-separated args from stdin\n\ - -l Force files to be scanned as until +l\n\ - -S- Pass arg to scanner\n\ - -S.= Scan files with . as \n\ - -S? Print usage documentation for \n\ -\n\ -Version %s", - program_name, VERSION); -#ifdef __DATE__ - fprintf (stderr, "; Made %s %s", __DATE__, __TIME__); -#endif - fputc ('\n', stderr); - exit (1); -} - -void *sbrk (); - -int -main (int argc, char **argv) -{ - struct idarg *idarg_0; - char const *sbrk0; - - program_name = basename ((argc--, *argv++)); - init_scanners (); - - idarg_0 = parse_idargs (argc, argv); - if (idarg_0 == NULL) - { - fprintf (stderr, "Nothing to do...\n"); - return 0; - } - - sbrk0 = (char const *) sbrk (0); - hash_init (&token_table, scan_count * 64, token_hash_1, token_hash_2, token_hash_cmp); - - get_PWD (PWD_buf); - strcpy (absolute_idfile_name, span_file_name (PWD_buf, id_file_name)); - if (access (id_file_name, 06) < 0 - && (errno != ENOENT || access (dirname (id_file_name), 06) < 0)) - { - filerr ("modify", id_file_name); - return 1; - } - - init_hits_signature (0); - init_summary (); - - scan_files (idarg_0); - - if (token_table.ht_fill == 0) - return 0; - - free_summary_tokens (); - free (token_table.ht_vec); - - write_idfile (id_file_name, idarg_0); - heap_size = (char const *) sbrk (0) - sbrk0; - - if (statistics_flag) - report_statistics (); - return 0; -} - -void -scan_files (struct idarg *idarg) -{ - int keep_lang = 0; - - for ( ; idarg->ida_next; idarg = idarg->ida_next) - { - char const *(*scanner) __P((FILE*, int*)); - FILE *source_FILE; - char *arg = idarg->ida_arg; - char const *lang_name = NULL; - char const *suff; - char const *filter; - - if (idarg->ida_index < 0) - { - int op = *arg++; - switch (*arg++) - { - case 'l': - if (*arg == '\0') - { - keep_lang = 0; - lang_name = NULL; - break; - } - if (op == '+') - keep_lang = 1; - lang_name = arg; - break; - case 'S': - set_scan_args (op, strdup (arg)); - break; - default: - usage (); - } - continue; - } - if (!(idarg->ida_flags & IDA_SCAN_ME)) - goto skip; - - suff = strrchr (arg, '.'); - if (lang_name == NULL) - { - if (suff == NULL) - suff = ""; - lang_name = get_lang_name (suff); - if (lang_name == NULL) - lang_name = get_lang_name (""); - if (lang_name == NULL) - { - fprintf (stderr, "%s: No language assigned to suffix: `%s'\n", program_name, suff); - goto skip; - } - } - scanner = get_scanner (lang_name); - if (scanner == NULL) - { - fprintf (stderr, "%s: No scanner for language: `%s'\n", program_name, lang_name); - goto skip; - } - filter = get_filter (suff); - source_FILE = open_source_FILE (arg, filter); - if (source_FILE == NULL) - goto skip; - if (verbose_flag) - { - printf ("%s: ", lang_name); - printf (filter ? filter : "%s", arg); - fflush (stdout); - } - scan_1_file (scanner, source_FILE); - if (verbose_flag) - putchar ('\n'); - close_source_FILE (source_FILE, filter); - skip: - if (!keep_lang) - lang_name = NULL; - if (idarg->ida_index < file_name_count) - { - if (current_hits_signature[0] & 0x80) - summarize (); - bump_current_hits_signature (); - } - } -} - -void -report_statistics (void) -{ - printf ("Name=%ld, ", name_tokens); - printf ("Number=%ld, ", number_tokens); - printf ("String=%ld, ", string_tokens); - printf ("Literal=%ld, ", literal_tokens); - printf ("Comment=%ld\n", comment_tokens); - - printf ("Files=%d, ", scan_count); - printf ("Tokens=%ld, ", occurrences); - printf ("Bytes=%ld Kb, ", input_chars / 1024); - printf ("Heap=%ld Kb, ", heap_size / 1024); - printf ("Output=%ld (%ld tok, %ld hit)\n", output_length, tokens_length, hits_length); - - printf ("Load=%ld/%ld=%.2f, ", token_table.ht_fill, token_table.ht_size, - (double) token_table.ht_fill / (double) token_table.ht_size); - printf ("Rehash=%d, ", token_table.ht_rehashes); - printf ("Probes=%ld/%ld=%.2f, ", token_table.ht_probes, token_table.ht_lookups, - (double) token_table.ht_probes / (double) token_table.ht_lookups); - printf ("Freq=%ld/%ld=%.2f\n", occurrences, token_table.ht_fill, - (double) occurrences / (double) token_table.ht_fill); -} - -struct idarg * -parse_idargs (int argc, char **argv) -{ - struct idarg *idarg; - struct idarg *idarg_0; - char *arg; - int op; - FILE *arg_FILE = NULL; - int args_from = 0; - enum { - AF_CMDLINE = 0x1, /* file args came on command line */ - AF_FILE = 0x2, /* file args came from a file (-f) */ - AF_USAGE = 0x8 - }; /* no file args necessary: usage query */ - - idarg = idarg_0 = CALLOC (struct idarg, 1); - - /* Process some arguments, and snarf-up some others for processing - later. */ - while (argc) - { - arg = (argc--, *argv++); - if (*arg != '-' && *arg != '+') - { - /* arguments are from command line (not pipe) */ - args_from |= AF_CMDLINE; - idarg->ida_arg = arg; - idarg->ida_flags = IDA_SCAN_ME; - idarg->ida_index = file_name_count++; - scan_count++; - idarg = (idarg->ida_next = CALLOC (struct idarg, 1)); - - continue; - } - op = *arg++; - switch (*arg++) - { - case '\0': - args_from |= AF_FILE; - idarg = parse_idargs_from_FILE (stdin, idarg); - break; - case 'a': - arg_FILE = fopen (arg, "r"); - if (arg_FILE == NULL) - filerr ("open", arg); - else - { - args_from |= AF_FILE; - idarg = parse_idargs_from_FILE (arg_FILE, idarg); - } - break; - case 'f': - id_file_name = arg; - break; - case 'v': - verbose_flag = 1; - break; - case 'S': - if (strchr (&arg[-2], '?')) - { - set_scan_args (op, arg); - args_from |= AF_USAGE; - } - /*FALLTHROUGH */ - case 'l': - idarg->ida_arg = &arg[-2]; - idarg->ida_index = -1; - idarg = (idarg->ida_next = CALLOC (struct idarg, 1)); - - args_count++; - break; - default: - usage (); - } - } - - if (args_from & AF_USAGE) - exit (0); - /* File args should only come from one place. Ding the user if - arguments came from multiple places, or if none were supplied at - all. */ - switch (args_from) - { - case AF_CMDLINE: - case AF_FILE: - if (file_name_count > 0) - break; - /*FALLTHROUGH */ - case 0: - fprintf (stderr, "%s: Use -u, -f, or cmd-line for file args!\n", program_name); - usage (); - default: - fprintf (stderr, "%s: Use only one of: -u, -f, or cmd-line for file args!\n", program_name); - usage (); - } - - if (scan_count == 0) - return NULL; - - return idarg_0; -} - - -/* Cons up a list of idarg as supplied in a file. */ -struct idarg * -parse_idargs_from_FILE (FILE *arg_FILE, struct idarg *idarg) -{ - int file_count; - char buf[BUFSIZ]; - char *arg; - - file_count = 0; - while (fgets (buf, sizeof (buf), arg_FILE)) - { - idarg->ida_arg = arg = strndup (buf, strlen (buf) - 1); - if (*arg == '+' || *arg == '-') - idarg->ida_index = -1; - else - { - idarg->ida_flags = IDA_SCAN_ME; - idarg->ida_index = file_name_count++; - scan_count++; - } - idarg = idarg->ida_next = CALLOC (struct idarg, 1); - } - return idarg; -} - -void -scan_1_file (get_token_t get_token, FILE *source_FILE) -{ - struct stat stat_buf; - struct token **slot; - char const *key; - int bytes = 0; - int total_tokens = 0; - int new_tokens = 0; - int distinct_tokens = 0; - int flags; - struct token *token; - - if (fstat (fileno (source_FILE), &stat_buf) == 0) - { - bytes = stat_buf.st_size; - input_chars += bytes; - } - - while ((key = (*get_token) (source_FILE, &flags)) != NULL) - { - if (*key == '\0') - continue; - total_tokens++; - slot = (struct token **) hash_lookup (&token_table, key - offsetof (struct token, tok_name)); - token = *slot; - if (token) - { - token->tok_flags |= flags; - if (token->tok_count < USHRT_MAX) - token->tok_count++; - if (!(token->tok_hits[0] & current_hits_signature[0])) - { - sign_token (token); - distinct_tokens++; - } - } else { - *slot = token = make_token (key, flags); - sign_token (token); - distinct_tokens++; - new_tokens++; - if (token_table.ht_fill++ >= token_table.ht_capacity) - rehash (&token_table); - } - } - if (verbose_flag) - { - printf (" uniq=%d/%d", distinct_tokens, total_tokens); - if (total_tokens != 0) - printf ("=%.2f", (double) distinct_tokens / (double) total_tokens); - printf (", new=%d/%d", new_tokens, distinct_tokens); - if (distinct_tokens != 0) - printf ("=%.2f", (double) new_tokens / (double) distinct_tokens); - } -} - -/* As the database is written, may need to adjust the file names. If - we are generating the ID file in a remote directory, then adjust - the file names to be relative to the location of the ID database. - - (This would be a common useage if you want to make a database for a - directory which you have no write access to, so you cannot create - the ID file.) */ -void -write_idfile (char const *file_name, struct idarg *idarg) -{ - struct token **tokens; - int i; - FILE *id_FILE; - struct idhead idh; - int fixup_names; - char *lsl; - int buf_size; - int vec_size; - int tok_size; - int max_buf_size = 0; - int max_vec_size = 0; - - if (verbose_flag) - printf ("Sorting tokens...\n"); - assert (summary_root->sum_hits_count == token_table.ht_fill); - tokens = REALLOC (summary_root->sum_tokens, struct token *, token_table.ht_fill); - qsort (tokens, token_table.ht_fill, sizeof (struct token *), token_qsort_cmp); - - if (verbose_flag) - printf ("Writing `%s'...\n", file_name); - lsl = strrchr (relative_file_name (PWD_buf, absolute_idfile_name), '/'); - if (lsl == NULL) - { - /* The database is in the cwd, don't adjust the names */ - fixup_names = 0; - } - else - { - /* The database is not in cwd, adjust names so they are relative - to the location of the database, make absolute_idfile_name just be the - directory path to ID. */ - fixup_names = 1; - *(lsl + 1) = '\0'; - } - id_FILE = fopen (file_name, "w+b"); - if (id_FILE == NULL) - { - filerr ("create", file_name); - exit (1); - } - idh.idh_magic[0] = IDH_MAGIC_0; - idh.idh_magic[1] = IDH_MAGIC_1; - idh.idh_version = IDH_VERSION; - idh.idh_flags = IDH_COUNTS; - - /* write out the list of pathnames */ - fseek (id_FILE, sizeof_idhead (), 0); - idh.idh_args_offset = ftell (id_FILE); - for ( ; idarg->ida_next; idarg = idarg->ida_next) - { - if (*idarg->ida_arg != '-' && fixup_names) - fputs (relative_file_name (absolute_idfile_name, span_file_name (PWD_buf, idarg->ida_arg)), id_FILE); - else - fputs (idarg->ida_arg, id_FILE); - putc ('\0', id_FILE); - } - idh.idh_files = file_name_count; - - /* write out the list of identifiers */ - - putc ('\0', id_FILE); - putc ('\0', id_FILE); - idh.idh_tokens_offset = ftell (id_FILE); - - for (i = 0; i < token_table.ht_fill; i++, tokens++) - { - struct token *token = *tokens; - occurrences += token->tok_count; - if (token->tok_flags & TOK_NUMBER) - number_tokens++; - if (token->tok_flags & TOK_NAME) - name_tokens++; - if (token->tok_flags & TOK_STRING) - string_tokens++; - if (token->tok_flags & TOK_LITERAL) - literal_tokens++; - if (token->tok_flags & TOK_COMMENT) - comment_tokens++; - - fputs (token->tok_name, id_FILE); - putc ('\0', id_FILE); - if (token->tok_count > 0xff) - token->tok_flags |= TOK_SHORT_COUNT; - putc (token->tok_flags, id_FILE); - putc (token->tok_count & 0xff, id_FILE); - if (token->tok_flags & TOK_SHORT_COUNT) - putc (token->tok_count >> 8, id_FILE); - - vec_size = count_vec_size (summary_root, token->tok_hits + levels); - buf_size = count_buf_size (summary_root, token->tok_hits + levels); - hits_length += buf_size; - tok_size = strlen (token->tok_name) + 1; - tokens_length += tok_size; - buf_size += tok_size + sizeof (token->tok_flags) + sizeof (token->tok_count) + 2; - if (buf_size > max_buf_size) - max_buf_size = buf_size; - if (vec_size > max_vec_size) - max_vec_size = vec_size; - - write_hits (id_FILE, summary_root, token->tok_hits + levels); - putc ('\0', id_FILE); - putc ('\0', id_FILE); - } - assert_hits (summary_root); - idh.idh_tokens = token_table.ht_fill; - output_length = ftell (id_FILE); - idh.idh_end_offset = output_length - 2; - idh.idh_buf_size = max_buf_size; - idh.idh_vec_size = max_vec_size; - - write_idhead (id_FILE, &idh); - fclose (id_FILE); -} - -unsigned long -token_hash_1 (void const *key) -{ - return_STRING_HASH_1 (((struct token const *) key)->tok_name); -} - -unsigned long -token_hash_2 (void const *key) -{ - return_STRING_HASH_2 (((struct token const *) key)->tok_name); -} - -int -token_hash_cmp (void const *x, void const *y) -{ - return_STRING_COMPARE (((struct token const *) x)->tok_name, - ((struct token const *) y)->tok_name); -} - -int -token_qsort_cmp (void const *x, void const *y) -{ - return_STRING_COMPARE ((*(struct token const *const *) x)->tok_name, - (*(struct token const *const *) y)->tok_name); -} - -struct token * -make_token (char const *name, int flags) -{ - struct token *token = (struct token *) malloc (sizeof (struct token) + strlen (name)); - - if (!token) - { - fprintf (stderr, "malloc failure! \n"); - exit (1); - } - token->tok_count = 1; - token->tok_flags = flags; - memset (token->tok_hits, 0, sizeof (token->tok_hits)); - strcpy (token->tok_name, name); - - return token; -} - -/* ///////////// summary stuff //////////////////////////////////////////// */ - -void -bump_current_hits_signature (void) -{ - unsigned char *hits = current_hits_signature; - while (*hits & 0x80) - *hits++ = 1; - *hits <<= 1; -} - -void -init_hits_signature (int i) -{ - unsigned char *hits = current_hits_signature; - unsigned char const *end = ¤t_hits_signature[MAX_LEVELS]; - while (hits < end) - { - *hits = 1 << (i & 7); - i >>= 3; - hits++; - } -} - -int -bit_to_index (int bit) -{ - int i = 0; - while (bit >>= 1) - i++; - return i; -} - -void -free_summary_tokens (void) -{ - struct summary *summary = summary_leaf; - while (summary != summary_root) - { - free (summary->sum_tokens); - summary = summary->sum_parent; - } -} - -void -summarize (void) -{ - unsigned char const *hits_sig = current_hits_signature; - struct summary *summary = summary_leaf; - - do - { - unsigned long count = summary->sum_hits_count; - unsigned char *hits = MALLOC (unsigned char, count + 1); - unsigned int level = summary->sum_level; - struct token **tokens = summary->sum_tokens; - unsigned long init_size = INIT_TOKENS_SIZE (summary->sum_level); - - if (verbose_flag) - { - char const *fmt; - if (count < init_size / 2) - fmt = "level %d: %ld < %ld/2\n"; - else if (count > init_size * 2) - fmt = "level %d: %ld > %ld*2\n"; - else if (count < init_size) - fmt = "level %d: %ld < %ld\n"; - else if (count > init_size) - fmt = "level %d: %ld > %ld\n"; - else - fmt = "level %d: %ld == %ld\n"; - printf (fmt, summary->sum_level, count, init_size); - } - - qsort (tokens, count, sizeof (struct token *), token_qsort_cmp); - summary->sum_hits = hits; - while (count--) - { - unsigned char *hit = &(*tokens++)->tok_hits[level]; - *hits++ = *hit; - *hit = 0; - } - *hits++ = 0; - if (summary->sum_parent) - { - free (summary->sum_tokens); - summary->sum_tokens = 0; - } - summary = summary->sum_parent; - } - while (*++hits_sig & 0x80); - summary_leaf = make_sibling_summary (summary_leaf); -} - -void -init_summary (void) -{ - unsigned long size = INIT_TOKENS_SIZE (0); - summary_root = summary_leaf = CALLOC (struct summary, 1); - summary_root->sum_tokens_size = size; - summary_root->sum_tokens = MALLOC (struct token *, size); -} - -struct summary * -make_sibling_summary (struct summary *summary) -{ - struct summary *parent = summary->sum_parent; - unsigned long size; - - if (parent == NULL) - { - levels++; - summary_root = summary->sum_parent = parent = CALLOC (struct summary, 1); - parent->sum_level = levels; - parent->sum_kids[0] = summary; - parent->sum_hits_count = summary->sum_hits_count; - parent->sum_free_index = 1; - size = INIT_TOKENS_SIZE (levels); - if (summary->sum_tokens_size >= size) - { - parent->sum_tokens_size = summary->sum_tokens_size; - parent->sum_tokens = summary->sum_tokens; - } - else - { - parent->sum_tokens_size = size; - parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size); - } - summary->sum_tokens = 0; - } - if (parent->sum_free_index == 8) - parent = make_sibling_summary (parent); - summary = CALLOC (struct summary, 1); - summary->sum_level = parent->sum_level - 1; - parent->sum_kids[parent->sum_free_index++] = summary; - summary->sum_parent = parent; - size = INIT_TOKENS_SIZE (summary->sum_level); - summary->sum_tokens_size = size; - summary->sum_tokens = MALLOC (struct token *, size); - return summary; -} - -int -count_vec_size (struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits); - - kids = summary->sum_kids; - if (*kids == NULL) - { - static char bits_per_nybble[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; - return bits_per_nybble[hits & 0xf] + bits_per_nybble[hits >> 4]; - } - else - { - int bit; - int count = 0; - --tail_hits; - for (bit = 1; bit & 0xff; bit <<= 1, ++kids) - if (bit & hits) - count += count_vec_size (*kids, tail_hits); - return count; - } -} - -int -count_buf_size (struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits); - - kids = summary->sum_kids; - if (*kids == NULL) - return 1; - else - { - int bit; - int count = 1; - --tail_hits; - for (bit = 1; bit & 0xff; bit <<= 1, ++kids) - if (bit & hits) - count += count_buf_size (*kids, tail_hits); - return count; - } -} - -void -assert_hits (struct summary* summary) -{ - struct summary **kids = summary->sum_kids; - struct summary **end = &kids[8]; - - assert (summary->sum_hits == NULL || *summary->sum_hits == 0); - - if (end[-1] == 0) - while (*--end == 0) - ; - while (kids < end) - assert_hits (*kids++); -} - -void -write_hits (FILE *fp, struct summary *summary, unsigned char const *tail_hits) -{ - struct summary **kids; - unsigned int hits = (summary->sum_hits ? *summary->sum_hits++ : *tail_hits); - - assert (hits); - putc (hits, fp); - - kids = summary->sum_kids; - if (*kids) - { - int bit; - --tail_hits; - for (bit = 1; (bit & 0xff) && *kids; bit <<= 1, ++kids) - if (bit & hits) - write_hits (fp, *kids, tail_hits); - } -} - -void -sign_token (struct token *token) -{ - unsigned char *tok_hits = token->tok_hits; - unsigned char *hits_sig = current_hits_signature; - unsigned char *end = ¤t_hits_signature[MAX_LEVELS]; - struct summary *summary = summary_leaf; - - while (summary) - { - if (*tok_hits == 0) - add_token_to_summary (summary, token); - if (*tok_hits & *hits_sig) - break; - *tok_hits |= *hits_sig; - summary = summary->sum_parent; - tok_hits++; - hits_sig++; - } - while (hits_sig < end) - { - if (*tok_hits & *hits_sig) - break; - *tok_hits |= *hits_sig; - tok_hits++; - hits_sig++; - } -} - -void -add_token_to_summary (struct summary *summary, struct token *token) -{ - unsigned long size = summary->sum_tokens_size; - - if (summary->sum_hits_count >= size) - { - size *= 2; - summary->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size); - summary->sum_tokens_size = size; - } - summary->sum_tokens[summary->sum_hits_count++] = token; -} - -int -bitsany (char const *s, int n) -{ - while (n--) - if (*s++) - return 1; - - return 0; -} - -char * -bitsset (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ |= *s2++; - - return s1; -} - -char * -bitsclr (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ &= ~*s2++; - - return s1; -} - -#if 0 - -char * -bitsand (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ &= *s2++; - - return s1; -} - -char * -bitsxor (char *s1, char const *s2, int n) -{ - while (n--) - *s1++ ^= *s2++; - - return s1; -} - -int -bitstst (char const *s1, char const *s2, int n) -{ - while (n--) - if (*s1++ & *s2++) - return 1; - - return 0; -} - -#endif diff --git a/mkid.info b/mkid.info deleted file mode 100644 index 72eee25..0000000 --- a/mkid.info +++ /dev/null @@ -1,1097 +0,0 @@ -This is Info file mkid.info, produced by Makeinfo-1.55 from the input -file mkid.texinfo. - -START-INFO-DIR-ENTRY -* mkid: (mkid). Identifier database utilities -END-INFO-DIR-ENTRY - - This file documents the `mkid' identifier database utilities. - - Copyright (C) 1991 Tom Horsley - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation. - - -File: mkid.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir) - -GNU `mkid' -********** - -* Menu: - -* Overview:: What is an ID database and what tools manipulate it? -* Mkid:: Mkid -* Database Query Tools:: Database Query Tools -* Iid:: Iid -* Other Tools:: Other Tools -* Command Index:: Command Index - - -File: mkid.info, Node: Overview, Next: Mkid, Prev: Top, Up: Top - -Overview -******** - - An ID database is simply a file containing a list of file names, a -list of identifiers, and a binary relation (stored as a bit matrix) -indicating which of the identifiers appear in each file. With this -database and some tools to manipulate the data, a host of tasks become -simpler and faster. You can `grep' through hundreds of files for a -name, skipping the files that don't contain the name. You can search -for all the memos containing references to a project. You can edit -every file that calls some function, adding a new required argument. -Anyone with a large software project to maintain, or a large set of -text files to organize can benefit from the ID database and the tools -that manipulate it. - - There are several programs in the ID family. The `mkid' program -scans the files, finds the identifiers and builds the ID database. The -`lid' and `aid' tools are used to generate lists of file names -containing an identifier (perhaps to recompile every file that -references a macro which just changed). The `eid' program will invoke -an editor on each of the files containing an identifier and the `gid' -program will `grep' for an identifier in the subset of files known to -contain it. The `pid' tool is used to query the path names of the -files in the database (rather than the contents). Finally, the `iid' -tool is an interactive program supporting complex queries to intersect -and join sets of file names. - -* Menu: - -* History:: History - - -File: mkid.info, Node: History, Prev: Overview, Up: Overview - -History -======= - - Greg McGary conceived of the ideas behind mkid when he began hacking -the UNIX kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first mkid-like tools -were built with shell scripts, and produced an ascii database that looks -much like the output of `lid' with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSDish kernel. Lookups -were done with the UNIX command `look', modified to handle very long -lines. - - In 1986, Greg rewrote mkid, lid, fid and idx in C to improve -performance. Database-build times were shortened by an order of -magnitude. The mkid tools were first posted to `comp.sources.unix' -September of 1987. - - Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the `iid' program. A pre-release of `mkid2' -was posted to `alt.sources' near the end of 1990. At that time Tom -wrote this texinfo manual with the encouragement the net community. -(Tom thanks Doug Scofield and Bill Leonard whom I dragooned into -helping me poorf raed and edit -- they found several problems in the -initial version.) - - In January, 1995, Greg McGary reemerged as the primary maintaner and -is hereby launching `mkid-3' whose primary new feature is an efficient -algorithm for building databases that is linear over the size of the -input text for both time and space. (The old algorithm was quadratic -for space and choked on very large source trees.) The code is now under -GPL and might become a part of the GNU system. `Mkid-3' is an interim -release, since several significant enhacements are in the works. These -include an optional coupling with GNU grep, so that grep can use an ID -database for hints; a cscope work-alike query interface; incremental -update of the ID database; and an automatic file-tree walker so you -need not explicitly supply every file name argument to the `mkid' -program. - - -File: mkid.info, Node: Mkid, Next: Database Query Tools, Prev: Overview, Up: Top - -Mkid -**** - - The `mkid' program builds the ID database. To do this it must scan -each of the files included in the database. This takes some time, but -once the work is done the query programs run very rapidly. - - The `mkid' program knows how to scan a variety of of files. For -example, it knows how to skip over comments and strings in a C program, -only picking out the identifiers used in the code. - - Identifiers are not the only thing included in the database. -Numbers are also scanned and included in the database indexed by their -binary value. Since the same number can be written many different ways -(47, 0x2f, 057 in a C program for instance), this feature allows you to -find hard coded uses of constants without regard to the radix used to -specify them. - - All the places in this document where identifiers are written about -should really mention identifiers and numbers, but that gets fairly -clumsy after a while, so you should always keep in mind that numbers are -included in the database as well as identifiers. - -* Menu: - -* Mkid Command Line Options:: Mkid Command Line Options -* Builtin Scanners:: Builtin Scanners -* Adding Your Own Scanner:: Adding Your Own Scanner -* Mkid Examples:: Mkid Examples - - -File: mkid.info, Node: Mkid Command Line Options, Next: Builtin Scanners, Prev: Mkid, Up: Mkid - -Mkid Command Line Options -========================= - - - Command: mkid [`-v'] [`-SSCANARG'] [`-aARG-FILE'] [`-'] - [`-fOUT-FILE'] [`-u'] [`files'...] - `-v' - Verbose. Mkid tells you as it scans each file and indicates - which scanner it is using. It also summarizes some statistics - about the database at the end. - - `-SSCANARG' - The `-S' option is used to specify arguments to the various - language scanners. *Note Scanner Arguments::, for details. - - `-aARG-FILE' - Name a file containing additional command line arguments (one - per line). This may be used to specify lists of file names - longer than will fit on a command line. - - `-' - A simple `-' by itself means read arguments from stdin. - - `-fOUT-FILE' - Specify the name of the database file to create. The default - name is `ID' (in the current directory), but you may specify - any name. The file names stored in the database will be - stored relative to the directory containing the database, so - if you move the database after creating it, you may have - trouble finding files unless they remain in the same relative - position. - - `-u' - The `-u' option updates an existing database by rescanning - any files that have changed since the database was written. - Unfortunately you cannot incrementally add new files to a - database. - - `files' - Remaining arguments are names of files to be scanned and - included in the database. - -* Menu: - -* Scanner Arguments:: Scanner Arguments - - -File: mkid.info, Node: Scanner Arguments, Prev: Mkid Command Line Options, Up: Mkid Command Line Options - -Scanner Arguments ------------------ - - Scanner arguments all start with `-S'. Scanner arguments are used to -tell `mkid' which language scanner to use for which files, to pass -language specific options to the individual scanners, and to get some -limited online help about scanner options. - - `Mkid' usually determines which language scanner to use on a file by -looking at the suffix of the file name. The suffix starts at the last -`.' in a file name and includes the `.' and all remaining characters -(for example the suffix of `fred.c' is `.c'). Not all files have a -suffix, and not all suffixes are bound to a specific language by mkid. -If `mkid' cannot determine what language a file is, it will use the -language bound to the `.default' suffix. The plain text scanner is -normally bound to `.default', but the `-S' option can be used to change -any language bindings. - - There are several different forms for scanner options: -`-S.=' - `Mkid' determines which language scanner to use on a file by - examining the file name suffix. The `.' is part of the suffix and - must be specified in this form of the `-S' option. For example - `-S.y=c' tells `mkid' to use the `c' language scanner for all - files ending in the `.y' suffix. - -`-S.=?' - `Mkid' has several built in suffixes it already recognizes. Passing - a `?' will cause it to print the language it will use to scan files - with that suffix. - -`-S?=' - This form will print which suffixes are scanned with the given - language. - -`-S?=?' - This prints all the suffix==>language bindings recognized by - `mkid'. - -`-S-' - Each language scanner accepts scanner dependent arguments. This - form of the `-S' option is used to pass arbitrary arguments to the - language scanners. - -`-S?' - Passing a `?' instead of a language option will print a brief - summary of the options recognized by the specified language - scanner. - -`-S//' - This form specifies a new language defined in terms of a builtin - language and a shell command that will be used to filter the file - prior to passing on to the builtin language scanner. - - -File: mkid.info, Node: Builtin Scanners, Next: Adding Your Own Scanner, Prev: Mkid Command Line Options, Up: Mkid - -Builtin Scanners -================ - - If you run `mkid -S?=?' you will find bindings for a number of -languages; unfortunately pascal, though mentioned in the list, is not -actually supported. The supported languages are documented below (1). - -* Menu: - -* C:: C -* Plain Text:: Plain Text -* Assembler:: Assembler - - ---------- Footnotes ---------- - - (1) This is not strictly true -- vhil is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored - - -File: mkid.info, Node: C, Next: Plain Text, Prev: Builtin Scanners, Up: Builtin Scanners - -C -- - - The C scanner is probably the most popular. It scans identifiers out -of C programs, skipping over comments and strings in the process. The -normal `.c' and `.h' suffixes are automatically recognized as C -language, as well as the more obscure `.y' (yacc) and `.l' (lex) -suffixes. - - The `-S' options recognized by the C scanner are: - -`-Sc-s' - Allow the specified in identifiers (some dialects of C - allow `$' in identifiers, so you could say `-Sc-s$' to accept that - dialect). - -`-Sc-u' - Don't strip leading underscores from identifier names (this is the - default mode of operation). - -`-Sc+u' - Do strip leading underscores from identifier names (I don't know - why you would want to do this in C programs, but the option is - available). - - -File: mkid.info, Node: Plain Text, Next: Assembler, Prev: C, Up: Builtin Scanners - -Plain Text ----------- - - The plain text scanner is designed for scanning documents. This is -typically the scanner used when adding custom scanners, and several -custom scanners are built in to `mkid' and defined in terms of filters -and the text scanner. A troff scanner runs `deroff' over the file then -feeds the result to the text scanner. A compressed man page scanner -runs `pcat' piped into `col -b', and a TeX scanner runs `detex'. - - Options: - -`-Stext+a' - Include the specified character in identifiers. By default, - standard C identifiers are recognized. - -`-Stext-a' - Exclude the specified character from identifiers. - -`-Stext+s' - Squeeze the specified character out of identifiers. By default, the - characters `'', `-', and `.' are squeezed out of identifiers. - This generates transformations like FRED'S==>FREDS or - A.S.P.C.A.==>ASPCA. - -`-Stext-s' - Do not squeeze out the specified character. - - -File: mkid.info, Node: Assembler, Prev: Plain Text, Up: Builtin Scanners - -Assembler ---------- - - Assemblers come in several flavors, so there are several options to -control scanning of assembly code: - -`-Sasm-c' - The specified character starts a comment that extends to end of - line (in many assemblers this is a semicolon or number sign -- - there is no default value for this). - -`-Sasm+u' - Strip the leading underscores off identifiers (the default - behavior). - -`-Sasm-u' - Do not strip the leading underscores. - -`-Sasm+a' - The specified character is allowed in identifiers. - -`-Sasm-a' - The specified character is allowed in identifiers, but any - identifier containing that character is ignored (often a `.' or `@' - will be used to indicate an internal temp label, you may want to - ignore these). - -`-Sasm+p' - Recognize C preprocessor directives in assembler source (default). - -`-Sasm-p' - Do not recognize C preprocessor directives in assembler source. - -`-Sasm+C' - Skip over C style comments in assembler source (default). - -`-Sasm-C' - Do not skip over C style comments in assembler source. - - -File: mkid.info, Node: Adding Your Own Scanner, Next: Mkid Examples, Prev: Builtin Scanners, Up: Mkid - -Adding Your Own Scanner -======================= - - There are two ways to add new scanners to `mkid'. The first is to -modify the code in `getscan.c' and add a new `scan-*.c' file with the -code for your scanner. This is not too hard, but it requires relinking -and installing a new version of `mkid', which might be inconvenient, -and would lead to the proliferation of `mkid' versions. - - The second technique uses the `-S//' form of -the `-S' option to specify a new language scanner. In this form the -first language is the name of the new language to be defined, the -second language is the name of an existing language scanner to be -invoked on the output of the filter command specified as the third -component of the `-S' option. - - The filter is an arbitrary shell command. Somewhere in the filter -string, a `%s' should occur. This `%s' is replaced by the name of the -source file being scanned, the shell command is invoked, and whatever -comes out on STDOUT is scanned using the builtin scanner. - - For example, no scanner is provided for texinfo files (like this -one). If I wished to index the contents of this file, but avoid -indexing the texinfo directives, I would need a filter that stripped -out the texinfo directives, but left the remainder of the file intact. -I could then use the plain text scanner on the remainder. A quick way -to specify this might be: - - '-S/texinfo/text/sed s,@[a-z]*,,g < %s' - - This defines a new language scanner (TEXINFO) defined in terms of a -`sed' command to strip out texinfo directives (at signs followed by -letters). Once the directives are stripped, the remaining text is run -through the plain text scanner. - - This is just an example, to do a better job I would actually need to -delete some lines (such as those beginning with `@end') as well as -deleting the `@' directives embedded in the text. - - -File: mkid.info, Node: Mkid Examples, Prev: Adding Your Own Scanner, Up: Mkid - -Mkid Examples -============= - - The simplest example of `mkid' is something like: - - mkid *.[chy] - - This will build an ID database indexing all the identifiers and -numbers in the `.c', `.h', and `.y' files in the current directory. -Because those suffixes are already known to `mkid' as C language files, -no other special arguments are required. - - From a simple example, lets go to a more complex one. Suppose you -want to build a database indexing the contents of all the MAN pages. -Since `mkid' already knows how to deal with `.z' files, let's assume -your system is using the `compress' program to store compressed -cattable versions of the MAN pages. The `compress' program creates -files with a `.Z' suffix, so `mkid' will have to be told how to scan -`.Z' files. The following code shows how to combine the `find' command -with the special scanner arguments to `mkid' to generate the required ID -database: - - cd /usr/catman - find . -name '*.Z' -print | mkid '-Sman/text/uncompress -c < %s' -S.Z=man - - - This example first switches to the `/usr/catman' directory where the -compressed MAN pages are stored. The `find' command then finds all the -`.Z' files under that directory and prints their names. This list is -piped into the `mkid' program. The `-' argument by itself (at the end -of the line) tells `mkid' to read arguments (in this case the list of -file names) from STDIN. The first `-S' argument defines a new language -(MAN) in terms of the `uncompress' utility and the existing text -scanner. The second `-S' argument tells `mkid' to treat all `.Z' files -as language MAN. In practice, you might find the `mkid' arguments need -to be even more complex, something like: - - mkid '-Sman/text/uncompress -c < %s | col -b' -S.Z=man - - - This will take the additional step of getting rid of any underlining -and backspacing which might be present in the compressed MAN pages. - - -File: mkid.info, Node: Database Query Tools, Next: Iid, Prev: Mkid, Up: Top - -Database Query Tools -******************** - - The ID database is useless without database query tools. The -remainder of this document describes those tools. - - The `lid', `gid', `aid', `eid', and `pid' programs are all the same -program installed with links to different names. The name used to -invoke the program determines how it will act. - - The `iid' program is an interactive query shell that sits on top of -the other query tools. - -* Menu: - -* Common Options:: Common command line options -* Patterns:: Identifier pattern matching -* Lid:: Look up identifiers -* Aid:: Case insensitive lid -* Gid:: Grep for identifiers -* Eid:: Edit files with matching identifiers -* Pid:: Look up path names in database - - -File: mkid.info, Node: Common Options, Next: Patterns, Prev: Database Query Tools, Up: Database Query Tools - -Common Options -============== - - Since many of the programs are really links to one common program, it -is only reasonable to expect that most of the query tools would share -common command line options. Not all options make sense for all -programs, but they are all described here. The description of each -program gives the options that program uses. - -`-f' - Read the database specified by . Normally the tools look for - a file named `ID' in either the current directory or in any of the - directories above the current directory. This means you can keep a - global `ID' database in the root of a large source tree and use - the query tools from anywhere within that tree. - -`-r' - The query tools usually assume the file names in the database are - relative to the directory holding the database. The `-r' option - tells the tools to look for the files relative to - regardless of the location of the database. - -`-c' - This is shorthand for `-r`pwd`'. It tells the query tools to assume - the file names are stored relative to the current working - directory. - -`-e' - Force the pattern arguments to be treated as regular expressions. - Normally the query tools attempt to guess if the patterns are - regular expressions or simple identifiers by looking for special - characters in the pattern. - -`-w' - Force the pattern arguments to be treated as simple words even if - they contain special regular expression characters. - -`-k' - Normally the query tools that generate lists of file names attempt - to compress the lists using the `csh' brace notation. This option - suppresses the file name compression and outputs each name in full. - (This is particularly useful if you are a `ksh' user and want to - feed the list of names to another command -- the `-k' option comes - from the `k' in `ksh'). - -`-g' - It is possible to build the query tools so the `-k' option is the - default behavior. If this is the case for your system, the `-g' - option turns on the globbing of file names using the `csh' brace - notation. - -`-n' - Normally the query tools that generate lists of file names also - list the matching identifier at the head of the list of names. - This is irritating if you want just a list of names to feed to - another command, so the `-n' option suppresses the identifier and - lists only file names. - -`-b' - This option is only used by the `pid' tool. It restricts `pid' to - pattern match only the basename part of a file name. Normally the - absolute file name is matched against the pattern. - -`-d -o -x -a' - These options may be used in any combination to limit the radix of - numeric matches. The `-d' option will allow matches on decimal - numbers, `-o' on octal, and `-x' on hexadecimal numbers. The `-a' - option is shorthand for specifying all three. Any combination of - these options may be used. - -`-m' - Merge multiple lines of output into a single line. (If your query - matches more than one identifier the default action is to generate - a separate line of output for each matching identifier). - -`-s' - Search for identifiers that appear only once in the database. This - helps to locate identifiers that are defined but never used. - -`-u' - List identifiers that conflict in the first characters. - This could be useful porting programs to brain-dead computers that - refuse to support long identifiers, but your best long term option - is to set such computers on fire. - - -File: mkid.info, Node: Patterns, Next: Lid, Prev: Common Options, Up: Database Query Tools - -Patterns -======== - - You can attempt to match either simple identifiers or numbers in a -query, or you can specify a regular expression pattern which may match -many different identifiers in the database. The query programs use -either REGEX and REGCMP or RE_COMP and RE_EXEC, depending on which one -is available in the library on your system. These might not always -support the exact same regular expression syntax, so consult your local -MAN pages to find out. Any regular expression routines should support -the following syntax: - -`.' - A dot matches any character. - -`[ ]' - Brackets match any of the characters specified within the - brackets. You can match any characters *except* the ones in - brackets by typing `^' as the first character. A range of - characters can be specified using `-'. - -`*' - An asterisk means repeat the previous pattern zero or more times. - -`^' - An `^' at the beginning of a pattern means the pattern must match - starting at the first character of the identifier. - -`$' - A `$' at the end of the pattern means the pattern must match ending - at the last character in the identifier. - - -File: mkid.info, Node: Lid, Next: Aid, Prev: Patterns, Up: Database Query Tools - -Lid -=== - - - Command: lid [`-f'] [`-u'] [`-r'] [`-ewdoxamskgnc'] - PATTERNS... - - The `lid' program stands for LOOKUP IDENTIFIER. It searches the -database for any identifiers matching the patterns and prints the names -of the files that match each pattern. The exact format of the output -depends on the options. - - -File: mkid.info, Node: Aid, Next: Gid, Prev: Lid, Up: Database Query Tools - -Aid -=== - - - Command: aid [`-f'] [`-u'] [`-r'] [`-doxamskgnc'] - PATTERNS... - - The `aid' command is an abbreviation for APROPOS IDENTIFIER. The -patterns cannot be regular expressions, but it looks for them using a -case insensitive match, and any pattern that is a substring of an -identifier in the database will match that identifier. - - For example `aid get' might match the identifiers `fgets', -`GETLINE', and `getchar'. - - -File: mkid.info, Node: Gid, Next: Eid, Prev: Aid, Up: Database Query Tools - -Gid -=== - - - Command: gid [`-f'] [`-u'] [`-r'] [`-doxasc'] - PATTERNS... - - The `gid' command stands for GREP FOR IDENTIFIERS. It finds -identifiers in the database that match the specified patterns, then -`greps' for those identifiers in just the set of files containing -matches. In a large source tree, this saves a fantastic amount of time. - - There is an EMACS interface to this program (*note GNU Emacs -Interface::.). If you are an EMACS user, you will probably prefer the -EMACS interface over the `eid' tool. - - -File: mkid.info, Node: Eid, Next: Pid, Prev: Gid, Up: Database Query Tools - -Eid -=== - - - Command: eid [`-f'] [`-u'] [`-r'] [`-doxasc'] - PATTERNS... - - The `eid' command allows you to invoke an editor on each file -containing a matching pattern. The `EDITOR' environment variable is the -name of the program to be invoked. If the specified editor can accept -an initial search argument on the command line, you can use the -`EIDARG', `EIDLDEL', and `EIDRDEL' environment variables to specify the -form of that argument. - -`EDITOR' - The name of the editor program to invoke. - -`EIDARG' - A printf string giving the form of the argument to pass containing - the initial search string (the matching identifier). For `vi' it - should be set to `+/%s/''. - -`EIDLDEL' - A string giving the regular expression pattern that forces a match - at the beginning (left end) of a word. This string is inserted in - front of the matching identifier when composing the search - argument. For `vi', this should be `\<'. - -`EIDRDEL' - The matching right end word delimiter. For `vi', use `\>'. - - -File: mkid.info, Node: Pid, Prev: Eid, Up: Database Query Tools - -Pid -=== - - - Command: pid [`-f'] [`-u'] [`-r'] [`-ebkgnc'] - PATTERNS... - - The `pid' tool is unlike all the other tools. It matches the -patterns against the file names in the database rather than the -identifiers in the database. Patterns are treated as shell wild card -patterns unless the `-e' option is given, in which case full regular -expression matching is done. - - The wild card pattern is matched against the absolute path name of -the file. Most shells treat slashes `/' and file names that start with -dot `.' specially, `pid' does not do this. It simply attempts to match -the absolute path name string against the wild card pattern. - - The `-b' option restricts the pattern matching to the base name of -the file (all the leading directory names are stripped prior to pattern -matching). - - -File: mkid.info, Node: Iid, Next: Other Tools, Prev: Database Query Tools, Up: Top - -Iid -*** - - - Command: iid [`-a'] [`-c'] [`-H'] - `-a' - Normally `iid' uses the `lid' command to search for names. - If you give the `-a' option on the command line, then it will - use `aid' as the default search engine. - - `-c' - In normal operation, `iid' starts up and prompts you for - commands used to build sets of files. The `-c' option is used - to pass a single query command to `iid' which it then - executes and exits. - - `-H' - The `-H' option prints a short help message and exits. To get - more help use the `help' command from inside `iid'. - - The `iid' program is an interactive ID query tool. It operates by -running the other query programs (such as `lid' and `aid') and creating -sets of file names returned by these queries. It also provides -operators for `anding' and `oring' these sets to create new sets. - - The `PAGER' environment variable names the program `iid' uses to -display files. If you use `emacs', you might want to set `PAGER' so it -invokes the `emacsclient' program. Check the file `lisp/server.el' in -the emacs source tree for documentation on this. It is useful not only -with X windows, but also when running `iid' from an emacs shell buffer. -There is also a somewhat spiffier version called gnuserv by Andy Norman -(`ange%anorman@hplabs.hp.com') which appeared in `comp.emacs' sometime -in 1989. - -* Menu: - -* Ss and Files commands:: Ss and Files commands -* Sets:: Sets -* Show:: Show -* Begin:: Begin -* Help:: Help -* Off:: Off -* Shell Commands as Queries:: Shell Commands as Queries -* Shell Escape:: Shell Escape - - -File: mkid.info, Node: Ss and Files commands, Next: Sets, Prev: Iid, Up: Iid - -Ss and Files commands -===================== - - The primary query commands are `ss' (for select sets) and `files' -(for show file names). These commands both take a query expression as an -argument. - - - Subcommand: ss QUERY - The `ss' command runs a query and builds a set (or sets) of file - names. The result is printed as a summary of the sets constructed - showing how many file names are in each set. - - - Subcommand: files QUERY - The `files' command is like the `ss' command, but rather than - printing a summary, it displays the full list of matching file - names. - - - Subcommand: f QUERY - The `f' command is merely a shorthand notation for `files'. - - Database queries are simple expressions with operators like `and' -and `or'. Parentheses can be used to group operations. The complete set -of operators is summarized below: - -`PATTERN' - Any pattern not recognized as one of the keywords in this table is - treated as an identifier to be searched for in the database. It is - passed as an argument to the default search program (normally - `lid', but `aid' is used if the `-a' option was given when `iid' - was started). The result of this operation is a set of file - names, and it is assigned a unique set number. - -`lid' - `lid' is a keyword. It is used to invoke `lid' with the list of - identifiers following it as arguments. This forces the use of `lid' - regardless of the state of the `-a' option (*note Lid::.). - -`aid' - The `aid' keyword is like the `lid' keyword, but it forces the use - of the `aid' program (*note Aid::.). - -`match' - The `match' operator invokes the `pid' program to do pattern - matching on file names rather than identifiers. The set generated - contains the file names that match the specified patterns (*note - Pid::.). - -`or' - The `or' operator takes two sets of file names as arguments and - generates a new set containing all the files from both sets. - -`and' - The `and' operator takes two sets of file names and generates a new - set containing only files from both sets. - -`not' - The `not' operator inverts a set of file names, producing the set - of all files not in the input set. - -`set number' - A set number consists of the letter `s' followed immediately by a - number. This refers to one of the sets created by a previous - query operation. During one `iid' session, each query generates a - unique set number, so any previously generated set may be used as - part of any new query by referring to the set number. - - The `not' operator has the highest precedence with `and' coming in -the middle and `or' having the lowest precedence. The operator names -are recognized using case insensitive matching, so `AND', `and', and -`aNd' are all the same as far as `iid' is concerned. If you wish to use -a keyword as an operand to one of the query programs, you must enclose -it in quotes. Any patterns containing shell special characters must -also be properly quoted or escaped, since the query commands are run by -invoking them with the shell. - - Summary of query expression syntax: - - A is: - - - lid - aid - match - or - and - not - ( ) - - -File: mkid.info, Node: Sets, Next: Show, Prev: Ss and Files commands, Up: Iid - -Sets -==== - - - Subcommand: sets - - The `sets' command displays all the sets created so far. Each one is -described by the query command that generated it. - - -File: mkid.info, Node: Show, Next: Begin, Prev: Sets, Up: Iid - -Show -==== - - - Subcommand: show SET - - - Subcommand: p SET - - The `show' and `p' commands are equivalent. They both accept a set -number as an argument and run the program given in the `PAGER' -environment variable with the file names in that set as arguments. - - -File: mkid.info, Node: Begin, Next: Help, Prev: Show, Up: Iid - -Begin -===== - - - Subcommand: begin DIRECTORY - - - Subcommand: b DIRECTORY - - The `begin' command (and its abbreviated version `b') is used to -begin a new `iid' session in a different directory (which presumably -contains a different database). It flushes all the sets created so far -and switches to the specified directory. It is equivalent to exiting -`iid', changing directories in the shell, and running `iid' again. - - -File: mkid.info, Node: Help, Next: Off, Prev: Begin, Up: Iid - -Help -==== - - - Subcommand: help - - - Subcommand: h - - - Subcommand: ? - - The `help', `h', and `?' command are three different ways to ask for -help. They all invoke the `PAGER' program to display a short help file. - - -File: mkid.info, Node: Off, Next: Shell Commands as Queries, Prev: Help, Up: Iid - -Off -=== - - - Subcommand: off - - - Subcommand: quit - - - Subcommand: q - - These three command (or just an end of file) all cause `iid' to exit. - - -File: mkid.info, Node: Shell Commands as Queries, Next: Shell Escape, Prev: Off, Up: Iid - -Shell Commands as Queries -========================= - - When the first word on an `iid' command is not recognized as a -builtin `iid' command, `iid' assumes the command is a shell command -which will write a list of file names to STDOUT. This list of file -names is used to generate a new set of files. - - Any set numbers that appear as arguments to this command are expanded -into lists of file names prior to running the command. - - -File: mkid.info, Node: Shell Escape, Prev: Shell Commands as Queries, Up: Iid - -Shell Escape -============ - - If a command starts with a bang (`!') character, the remainder of -the line is run as a shell command. Any set numbers that appear as -arguments to this command are expanded into lists of file names prior to -running the command. - - -File: mkid.info, Node: Other Tools, Next: Command Index, Prev: Iid, Up: Top - -Other Tools -*********** - - This chapter describes some support tools that work with the other ID -programs. - -* Menu: - -* GNU Emacs Interface:: Using gid.el -* Fid:: List identifiers in a file. -* Idx:: Extract identifiers from source file. - - -File: mkid.info, Node: GNU Emacs Interface, Next: Fid, Prev: Other Tools, Up: Other Tools - -GNU Emacs Interface -=================== - - The source distribution comes with a file named `gid.el'. This is a -GNU emacs interface to the `gid' tool. If you put the file where emacs -can find it (somewhere in your `EMACSLOADPATH') and put `(autoload 'gid -"gid" nil t)' in your `.emacs' file, you will be able to invoke the -`gid' function using `M-x gid'. - - This function prompts you with the word the cursor is on. If you want -to search for a different pattern, simply delete the line and type the -pattern of interest. - - It runs `gid' in a `*compilation*' buffer, so the normal -`next-error' function can be used to visit all the places the -identifier is found (*note Compilation: (emacs)Compilation.). - - -File: mkid.info, Node: Fid, Next: Idx, Prev: GNU Emacs Interface, Up: Other Tools - -Fid -=== - - - Command: fid [`-f'] FILE1 [FILE2] - `-f' - Look in the named database. - - `FILE1' - List the identifiers contained in file1 according to the - database. - - `FILE2' - If a second file is given, list only the identifiers both - files have in common. - - The `fid' program provides an inverse query. Instead of listing -files containing some identifier, it lists the identifiers found in a -file. - - -File: mkid.info, Node: Idx, Prev: Fid, Up: Other Tools - -Idx -=== - - - Command: idx [`-s'] [`-r'] [`-S'] - FILES... - The `-s', `-r', and `-S' arguments to `idx' are identical to the - same arguments on `mkid' (*note Mkid Command Line Options::.). - - The `idx' command is more of a test frame for scanners than a tool -designed to be independently useful. It takes the same scanner arguments -as `mkid', but rather than building a database, it prints the -identifiers found to STDOUT, one per line. You can use it to try out a -scanner on a sample file to make sure it is extracting the identifiers -you believe it should extract. - - -File: mkid.info, Node: Command Index, Prev: Other Tools, Up: Top - -Command Index -************* - -* Menu: - -* ?: Help. -* aid: Aid. -* b: Begin. -* begin: Begin. -* eid: Eid. -* f: Ss and Files commands. -* fid: Fid. -* files: Ss and Files commands. -* gid: Gid. -* h: Help. -* help: Help. -* idx: Idx. -* iid: Iid. -* lid: Lid. -* mkid: Mkid Command Line Options. -* off: Off. -* p: Show. -* pid: Pid. -* q: Off. -* quit: Off. -* sets: Sets. -* show: Show. -* ss: Ss and Files commands. - - - -Tag Table: -Node: Top913 -Node: Overview1321 -Node: History2885 -Node: Mkid5050 -Node: Mkid Command Line Options6386 -Node: Scanner Arguments8147 -Node: Builtin Scanners10502 -Node: C11167 -Node: Plain Text12062 -Node: Assembler13130 -Node: Adding Your Own Scanner14318 -Node: Mkid Examples16295 -Node: Database Query Tools18272 -Node: Common Options19213 -Node: Patterns22929 -Node: Lid24171 -Node: Aid24593 -Node: Gid25124 -Node: Eid25744 -Node: Pid26868 -Node: Iid27758 -Node: Ss and Files commands29628 -Node: Sets33091 -Node: Show33331 -Node: Begin33659 -Node: Help34146 -Node: Off34427 -Node: Shell Commands as Queries34657 -Node: Shell Escape35183 -Node: Other Tools35525 -Node: GNU Emacs Interface35902 -Node: Fid36708 -Node: Idx37260 -Node: Command Index37935 - -End Tag Table diff --git a/mkid.texinfo b/mkid.texinfo deleted file mode 100644 index 076b313..0000000 --- a/mkid.texinfo +++ /dev/null @@ -1,957 +0,0 @@ -\input texinfo -@comment %**start of header (This is for running Texinfo on a region.) -@setfilename mkid.info -@settitle The ID Database -@setchapternewpage odd -@comment %**end of header (This is for running Texinfo on a region.) - -@include version.texi - -@ifinfo -@format -START-INFO-DIR-ENTRY -* mkid: (mkid). Identifier database utilities -END-INFO-DIR-ENTRY -@end format -@end ifinfo - -@ifinfo -This file documents the @code{mkid} identifier database utilities. - -Copyright (C) 1991 Tom Horsley - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end ifinfo - -@titlepage -@title The MKID Identifier Database, version @value{VERSION} -@subtitle A Simple, Fast, High-Capacity Cross-Referencer -@subtitle lid, gid, aid, eid, pid, iid -@author by Tom Horsley - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1991 Tom Horsley - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation. -@end titlepage - -@ifinfo -@node Top, Overview, (dir), (dir) -@top GNU @code{mkid} - -@menu -* Overview:: What is an ID database and what tools manipulate it? -* Mkid:: Mkid -* Database Query Tools:: Database Query Tools -* Iid:: Iid -* Other Tools:: Other Tools -* Command Index:: Command Index -@end menu - -@end ifinfo - -@node Overview, Mkid, Top, Top -@chapter Overview -@cindex Reference to First Chapter -An ID database is simply a file containing a list of file names, a list of -identifiers, and a binary relation (stored as a bit matrix) indicating which -of the identifiers appear in each file. With this database and some tools -to manipulate the data, a host of tasks become simpler and faster. You can -@code{grep} through hundreds of files for a name, skipping the files that -don't contain the name. You can search for all the memos containing -references to a project. You can edit every file that calls some function, -adding a new required argument. Anyone with a large software project to -maintain, or a large set of text files to organize can benefit from the ID -database and the tools that manipulate it. - -There are several programs in the ID family. The @code{mkid} program -scans the files, finds the identifiers and builds the ID database. The -@code{lid} and @code{aid} tools are used to generate lists of file names -containing an identifier (perhaps to recompile every file that -references a macro which just changed). The @code{eid} program will -invoke an editor on each of the files containing an identifier and the -@code{gid} program will @code{grep} for an identifier in the subset of -files known to contain it. The @code{pid} tool is used to query the -path names of the files in the database (rather than the contents). -Finally, the @code{iid} tool is an interactive program supporting -complex queries to intersect and join sets of file names. - -@menu -* History:: History -@end menu - -@node History, , Overview, Overview -@section History -Greg McGary conceived of the ideas behind mkid when he began hacking -the UNIX kernel in 1984. He needed a navigation tool to help him find -his way the expansive, unfamiliar landscape. The first mkid-like tools -were built with shell scripts, and produced an ascii database that looks -much like the output of `lid' with no arguments. It took over an hour -on a VAX 11/750 to build a database for a 4.1BSDish kernel. Lookups were -done with the UNIX command @code{look}, modified to handle very long lines. - -In 1986, Greg rewrote mkid, lid, fid and idx in C to improve -performance. Database-build times were shortened by an order of -magnitude. The mkid tools were first posted to @file{comp.sources.unix} -September of 1987. - -Over the next few years, several versions diverged from the original -source. Tom Horsley at Harris Computer Systems Division stepped forward -to take over maintenance and integrated some of the fixes from divergent -versions. He also wrote the @code{iid} program. A pre-release of -@code{mkid2} was posted to @file{alt.sources} near the end of 1990. At -that time Tom wrote this texinfo manual with the encouragement the net -community. (Tom thanks Doug Scofield and Bill Leonard whom I dragooned -into helping me poorf raed and edit --- they found several problems in -the initial version.) - -In January, 1995, Greg McGary reemerged as the primary maintaner and is -hereby launching @code{mkid-3} whose primary new feature is an efficient -algorithm for building databases that is linear over the size of the -input text for both time and space. (The old algorithm was quadratic -for space and choked on very large source trees.) The code is now under -GPL and might become a part of the GNU system. @code{Mkid-3} is an -interim release, since several significant enhacements are in the works. -These include an optional coupling with GNU grep, so that grep can use -an ID database for hints; a cscope work-alike query interface; -incremental update of the ID database; and an automatic file-tree walker -so you need not explicitly supply every file name argument to -the @code{mkid} program. - -@node Mkid, Database Query Tools, Overview, Top -@chapter Mkid -The @code{mkid} program builds the ID database. To do this it must scan -each of the files included in the database. This takes some time, but -once the work is done the query programs run very rapidly. - -The @code{mkid} program knows how to scan a variety of of files. For -example, it knows how to skip over comments and strings in a C program, -only picking out the identifiers used in the code. - -Identifiers are not the only thing included in the database. -Numbers are also scanned and included in the database indexed by -their binary value. Since the same number can be written many -different ways (47, 0x2f, 057 in a C program for instance), this -feature allows you to find hard coded uses of constants without -regard to the radix used to specify them. - -All the places in this document where identifiers are written about -should really mention identifiers and numbers, but that gets fairly -clumsy after a while, so you should always keep in mind that numbers are -included in the database as well as identifiers. - -@menu -* Mkid Command Line Options:: Mkid Command Line Options -* Builtin Scanners:: Builtin Scanners -* Adding Your Own Scanner:: Adding Your Own Scanner -* Mkid Examples:: Mkid Examples -@end menu - -@node Mkid Command Line Options, Builtin Scanners, Mkid, Mkid -@section Mkid Command Line Options -@deffn Command mkid [@code{-v}] [@code{-S@var{scanarg}}] [@code{-a@var{arg-file}}] [@code{-}] [@code{-f@var{out-file}}] [@code{-u}] [@code{files}@dots{}] -@table @code -@item -v -Verbose. Mkid tells you as it scans each file and indicates which scanner -it is using. It also summarizes some statistics about the database at -the end. -@item -S@var{scanarg} -The @code{-S} option is used to specify arguments to the various language -scanners. @xref{Scanner Arguments}, for details. -@item -a@var{arg-file} -Name a file containing additional command line arguments (one per line). This -may be used to specify lists of file names longer than will fit on a command -line. -@item - -A simple @code{-} by itself means read arguments from stdin. -@item -f@var{out-file} -Specify the name of the database file to create. The default name is @code{ID} -(in the current directory), but you may specify any name. The file names -stored in the database will be stored relative to the directory containing -the database, so if you move the database after creating it, you may have -trouble finding files unless they remain in the same relative position. -@item -u -The @code{-u} option updates an existing database by rescanning any files -that have changed since the database was written. Unfortunately you cannot -incrementally add new files to a database. -@item files -Remaining arguments are names of files to be scanned and included in the -database. -@end table -@end deffn - -@menu -* Scanner Arguments:: Scanner Arguments -@end menu - -@node Scanner Arguments, , Mkid Command Line Options, Mkid Command Line Options -@subsection Scanner Arguments -Scanner arguments all start with @code{-S}. Scanner arguments are used to tell -@code{mkid} which language scanner to use for which files, to pass language -specific options to the individual scanners, and to get some limited -online help about scanner options. - -@code{Mkid} usually determines which language scanner to use on a file -by looking at the suffix of the file name. The suffix starts at the last -@samp{.} in a file name and includes the @samp{.} and all remaining -characters (for example the suffix of @file{fred.c} is @file{.c}). Not -all files have a suffix, and not all suffixes are bound to a specific -language by mkid. If @code{mkid} cannot determine what language a file -is, it will use the language bound to the @file{.default} suffix. The -plain text scanner is normally bound to @file{.default}, but the -@code{-S} option can be used to change any language bindings. - -There are several different forms for scanner options: -@table @code -@item -S.@var{}=@var{} -@code{Mkid} determines which language scanner to use on a file by examining the -file name suffix. The @samp{.} is part of the suffix and must be specified -in this form of the @code{-S} option. For example @samp{-S.y=c} tells -@code{mkid} to use the @samp{c} language scanner for all files ending in -the @samp{.y} suffix. -@item -S.@var{}=? -@code{Mkid} has several built in suffixes it already recognizes. Passing -a @samp{?} will cause it to print the language it will use to scan files -with that suffix. -@item -S?=@var{} -This form will print which suffixes are scanned with the given language. -@item -S?=? -This prints all the suffix@expansion{}language bindings recognized by -@code{mkid}. -@item -S@var{}-@var{} -Each language scanner accepts scanner dependent arguments. This form of the -@code{-S} option is used to pass arbitrary arguments to the language scanners. -@item -S@var{}? -Passing a @samp{?} instead of a language option will print a brief summary -of the options recognized by the specified language scanner. -@item -S@var{}/@var{}/@var{} -This form specifies a new language defined in terms of a builtin language -and a shell command that will be used to filter the file prior to passing -on to the builtin language scanner. -@end table - -@node Builtin Scanners, Adding Your Own Scanner, Mkid Command Line Options, Mkid -@section Builtin Scanners -If you run @code{mkid -S?=?} you will find bindings for a number of -languages; unfortunately pascal, though mentioned in the list, is not -actually supported. The supported languages are documented below -@footnote{This is not strictly true --- vhil is a supported language, but -it is an obsolete and arcane dialect of C and should be ignored}. - -@menu -* C:: C -* Plain Text:: Plain Text -* Assembler:: Assembler -@end menu - -@node C, Plain Text, Builtin Scanners, Builtin Scanners -@subsection C - -The C scanner is probably the most popular. It scans identifiers out of -C programs, skipping over comments and strings in the process. The -normal @file{.c} and @file{.h} suffixes are automatically recognized as -C language, as well as the more obscure @file{.y} (yacc) and @file{.l} -(lex) suffixes. - -The @code{-S} options recognized by the C scanner are: - -@table @code -@item -Sc-s@var{} -Allow the specified @var{} in identifiers (some dialects of -C allow @code{$} in identifiers, so you could say @code{-Sc-s$} to -accept that dialect). -@item -Sc-u -Don't strip leading underscores from identifier names (this is the default -mode of operation). -@item -Sc+u -Do strip leading underscores from identifier names (I don't know why you -would want to do this in C programs, but the option is available). -@end table - -@node Plain Text, Assembler, C, Builtin Scanners -@subsection Plain Text -The plain text scanner is designed for scanning documents. This is -typically the scanner used when adding custom scanners, and several -custom scanners are built in to @code{mkid} and defined in terms of filters -and the text scanner. A troff scanner runs @code{deroff} over the file -then feeds the result to the text scanner. A compressed man page scanner -runs @code{pcat} piped into @code{col -b}, and a @TeX{} scanner runs -@code{detex}. - -Options: - -@table @code -@item -Stext+a@var{} -Include the specified character in identifiers. By default, standard -C identifiers are recognized. -@item -Stext-a@var{} -Exclude the specified character from identifiers. -@item -Stext+s@var{} -Squeeze the specified character out of identifiers. By default, the -characters @samp{'}, @samp{-}, and @samp{.} are squeezed out of identifiers. -This generates transformations like @var{fred's}@expansion{}@var{freds} or -@var{a.s.p.c.a.}@expansion{}@var{aspca}. -@item -Stext-s@var{} -Do not squeeze out the specified character. -@end table - -@node Assembler, , Plain Text, Builtin Scanners -@subsection Assembler -Assemblers come in several flavors, so there are several options to -control scanning of assembly code: - -@table @code -@item -Sasm-c@var{} -The specified character starts a comment that extends to end of line -(in many assemblers this is a semicolon or number sign --- there is -no default value for this). -@item -Sasm+u -Strip the leading underscores off identifiers (the default behavior). -@item -Sasm-u -Do not strip the leading underscores. -@item -Sasm+a@var{} -The specified character is allowed in identifiers. -@item -Sasm-a@var{} -The specified character is allowed in identifiers, but any identifier -containing that character is ignored (often a @samp{.} or @samp{@@} -will be used to indicate an internal temp label, you may want to -ignore these). -@item -Sasm+p -Recognize C preprocessor directives in assembler source (default). -@item -Sasm-p -Do not recognize C preprocessor directives in assembler source. -@item -Sasm+C -Skip over C style comments in assembler source (default). -@item -Sasm-C -Do not skip over C style comments in assembler source. -@end table - -@node Adding Your Own Scanner, Mkid Examples, Builtin Scanners, Mkid -@section Adding Your Own Scanner - -There are two ways to add new scanners to @code{mkid}. The first is to -modify the code in @file{getscan.c} and add a new @file{scan-*.c} file -with the code for your scanner. This is not too hard, but it requires -relinking and installing a new version of @code{mkid}, which might be -inconvenient, and would lead to the proliferation of @code{mkid} -versions. - -The second technique uses the @code{-S//} form -of the @code{-S} option to specify a new language scanner. In this form -the first language is the name of the new language to be defined, -the second language is the name of an existing language scanner to -be invoked on the output of the filter command specified as the -third component of the @code{-S} option. - -The filter is an arbitrary shell command. Somewhere in the filter string, -a @code{%s} should occur. This @code{%s} is replaced by the name of the -source file being scanned, the shell command is invoked, and whatever -comes out on @var{stdout} is scanned using the builtin scanner. - -For example, no scanner is provided for texinfo files (like this one). -If I wished to index the contents of this file, but avoid indexing the -texinfo directives, I would need a filter that stripped out the texinfo -directives, but left the remainder of the file intact. I could then use -the plain text scanner on the remainder. A quick way to specify this -might be: - -@example -'-S/texinfo/text/sed s,@@[a-z]*,,g < %s' -@end example - -This defines a new language scanner (@var{texinfo}) defined in terms of -a @code{sed} command to strip out texinfo directives (at signs followed -by letters). Once the directives are stripped, the remaining text is run -through the plain text scanner. - -This is just an example, to do a better job I would actually need to -delete some lines (such as those beginning with @code{@@end}) as well -as deleting the @code{@@} directives embedded in the text. - -@node Mkid Examples, , Adding Your Own Scanner, Mkid -@section Mkid Examples - -The simplest example of @code{mkid} is something like: - -@example -mkid *.[chy] -@end example - -This will build an ID database indexing all the -identifiers and numbers in the @file{.c}, @file{.h}, and @file{.y} files -in the current directory. Because those suffixes are already known to -@code{mkid} as C language files, no other special arguments are required. - -From a simple example, lets go to a more complex one. Suppose you want -to build a database indexing the contents of all the @var{man} pages. -Since @code{mkid} already knows how to deal with @file{.z} files, let's -assume your system is using the @code{compress} program to store -compressed cattable versions of the @var{man} pages. The -@code{compress} program creates files with a @code{.Z} suffix, so -@code{mkid} will have to be told how to scan @file{.Z} files. The -following code shows how to combine the @code{find} command with the -special scanner arguments to @code{mkid} to generate the required ID -database: - -@example -cd /usr/catman -find . -name '*.Z' -print | mkid '-Sman/text/uncompress -c < %s' -S.Z=man - -@end example - -This example first switches to the @file{/usr/catman} directory where -the compressed @var{man} pages are stored. The @code{find} command then -finds all the @file{.Z} files under that directory and prints their -names. This list is piped into the @code{mkid} program. The @code{-} -argument by itself (at the end of the line) tells @code{mkid} to read -arguments (in this case the list of file names) from @var{stdin}. The -first @code{-S} argument defines a new language (@var{man}) in terms of -the @code{uncompress} utility and the existing text scanner. The second -@code{-S} argument tells @code{mkid} to treat all @file{.Z} files as -language @var{man}. In practice, you might find the @code{mkid} -arguments need to be even more complex, something like: - -@example -mkid '-Sman/text/uncompress -c < %s | col -b' -S.Z=man - -@end example - -This will take the additional step of getting rid of any underlining and -backspacing which might be present in the compressed @var{man} pages. - -@node Database Query Tools, Iid, Mkid, Top -@chapter Database Query Tools - -The ID database is useless without database query tools. The remainder -of this document describes those tools. - -The @code{lid}, @code{gid}, -@code{aid}, @code{eid}, and @code{pid} programs are all the same program -installed with links to different names. The name used to invoke the -program determines how it will act. - -The @code{iid} program is an interactive query shell that sits on top -of the other query tools. - -@menu -* Common Options:: Common command line options -* Patterns:: Identifier pattern matching -* Lid:: Look up identifiers -* Aid:: Case insensitive lid -* Gid:: Grep for identifiers -* Eid:: Edit files with matching identifiers -* Pid:: Look up path names in database -@end menu - -@node Common Options, Patterns, Database Query Tools, Database Query Tools -@section Common Options - -Since many of the programs are really links to one common program, it -is only reasonable to expect that most of the query tools would share -common command line options. Not all options make sense for all programs, -but they are all described here. The description of each program -gives the options that program uses. - -@table @code -@item -f@var{} -Read the database specified by @var{}. Normally the tools look -for a file named @file{ID} in either the current directory or in any -of the directories above the current directory. This means you can keep -a global @file{ID} database in the root of a large source tree and use -the query tools from anywhere within that tree. -@item -r@var{} -The query tools usually assume the file names in the database are relative -to the directory holding the database. The @code{-r} option tells the -tools to look for the files relative to @var{} regardless -of the location of the database. -@item -c -This is shorthand for @code{-r`pwd`}. It tells the query tools to assume -the file names are stored relative to the current working directory. -@item -e -Force the pattern arguments to be treated as regular expressions. -Normally the query tools attempt to guess if the patterns are regular -expressions or simple identifiers by looking for special characters -in the pattern. -@item -w -Force the pattern arguments to be treated as simple words even if -they contain special regular expression characters. -@item -k -Normally the query tools that generate lists of file names attempt to -compress the lists using the @code{csh} brace notation. This option -suppresses the file name compression and outputs each name in full. -(This is particularly useful if you are a @code{ksh} user and want to -feed the list of names to another command --- the @code{-k} option -comes from the @code{k} in @code{ksh}). -@item -g -It is possible to build the query tools so the @code{-k} option is the -default behavior. If this is the case for your system, the @code{-g} -option turns on the globbing of file names using the @code{csh} brace -notation. -@item -n -Normally the query tools that generate lists of file names also list -the matching identifier at the head of the list of names. This is -irritating if you want just a list of names to feed to another command, -so the @code{-n} option suppresses the identifier and lists only -file names. -@item -b -This option is only used by the @code{pid} tool. It restricts @code{pid} -to pattern match only the basename part of a file name. Normally the -absolute file name is matched against the pattern. -@item -d -o -x -a -These options may be used in any combination to limit the radix of -numeric matches. The @code{-d} option will allow matches on decimal -numbers, @code{-o} on octal, and @code{-x} on hexadecimal numbers. -The @code{-a} option is shorthand for specifying all three. Any -combination of these options may be used. -@item -m -Merge multiple lines of output into a single line. (If your query -matches more than one identifier the default action is to generate -a separate line of output for each matching identifier). -@item -s -Search for identifiers that appear only once in the database. This -helps to locate identifiers that are defined but never used. -@item -u@var{} -List identifiers that conflict in the first @var{} characters. -This could be useful porting programs to brain-dead computers that -refuse to support long identifiers, but your best long term option -is to set such computers on fire. -@end table - -@node Patterns, Lid, Common Options, Database Query Tools -@section Patterns - -You can attempt to match either simple identifiers or numbers in a -query, or you can specify a regular expression pattern which may -match many different identifiers in the database. The query -programs use either @var{regex} and @var{regcmp} or @var{re_comp} -and @var{re_exec}, depending on which one is available in the library -on your system. These might not always support the exact same -regular expression syntax, so consult your local @var{man} pages -to find out. Any regular expression routines should support the following -syntax: - -@table @code -@item . -A dot matches any character. -@item [ ] -Brackets match any of the characters specified within the brackets. You -can match any characters @emph{except} the ones in brackets by typing -@code{^} as the first character. A range of characters can be specified -using @code{-}. -@item * -An asterisk means repeat the previous pattern zero or more times. -@item ^ -An @code{^} at the beginning of a pattern means the pattern must match -starting at the first character of the identifier. -@item $ -A @code{$} at the end of the pattern means the pattern must match ending -at the last character in the identifier. -@end table - -@node Lid, Aid, Patterns, Database Query Tools -@section Lid - -@deffn Command lid [@code{-f@var{}}] [@code{-u@var{}}] [@code{-r@var{}}] [@code{-ewdoxamskgnc}] patterns@dots{} -@end deffn - -The @code{lid} program stands for @var{lookup identifier}. -It searches the database for any identifiers matching the patterns -and prints the names of the files that match each pattern. The exact -format of the output depends on the options. - -@node Aid, Gid, Lid, Database Query Tools -@section Aid - -@deffn Command aid [@code{-f@var{}}] [@code{-u@var{}}] [@code{-r@var{}}] [@code{-doxamskgnc}] patterns@dots{} -@end deffn - -The @code{aid} command is an abbreviation for @var{apropos identifier}. -The patterns cannot be regular expressions, but it looks for them using -a case insensitive match, and any pattern that is a substring of an -identifier in the database will match that identifier. - -For example @samp{aid get} might match the identifiers @code{fgets}, -@code{GETLINE}, and @code{getchar}. - -@node Gid, Eid, Aid, Database Query Tools -@section Gid - -@deffn Command gid [@code{-f@var{}}] [@code{-u@var{}}] [@code{-r@var{}}] [@code{-doxasc}] patterns@dots{} -@end deffn - -The @code{gid} command stands for @var{grep for identifiers}. It finds -identifiers in the database that match the specified patterns, then -@code{greps} for those identifiers in just the set of files containing -matches. In a large source tree, this saves a fantastic amount of time. - -There is an @var{emacs} interface to this program (@pxref{GNU Emacs Interface}). -If you are an @var{emacs} user, you will probably prefer the @var{emacs} -interface over the @code{eid} tool. - -@node Eid, Pid, Gid, Database Query Tools -@section Eid - -@deffn Command eid [@code{-f@var{}}] [@code{-u@var{}}] [@code{-r@var{}}] [@code{-doxasc}] patterns@dots{} -@end deffn - -The @code{eid} command allows you to invoke an editor on each file containing -a matching pattern. The @code{EDITOR} environment variable is the name of the -program to be invoked. If the specified editor can accept an initial search -argument on the command line, you can use the @code{EIDARG}, @code{EIDLDEL}, -and @code{EIDRDEL} environment variables to specify the form of that argument. - -@table @code -@item EDITOR -The name of the editor program to invoke. -@item EIDARG -A printf string giving the form of the argument to pass containing the -initial search string (the matching identifier). For @code{vi} -it should be set to @samp{+/%s/'}. -@item EIDLDEL -A string giving the regular expression pattern that forces a match at -the beginning (left end) of a word. This string is inserted in front -of the matching identifier when composing the search argument. For @code{vi}, -this should be @samp{\<}. -@item EIDRDEL -The matching right end word delimiter. For @code{vi}, use @samp{\>}. -@end table - -@node Pid, , Eid, Database Query Tools -@section Pid - -@deffn Command pid [@code{-f@var{}}] [@code{-u@var{}}] [@code{-r@var{}}] [@code{-ebkgnc}] patterns@dots{} -@end deffn - -The @code{pid} tool is unlike all the other tools. It matches the -patterns against the file names in the database rather than the -identifiers in the database. Patterns are treated as shell wild card -patterns unless the @code{-e} option is given, in which case full -regular expression matching is done. - -The wild card pattern is matched against the absolute path name of the -file. Most shells treat slashes @samp{/} and file names that start with -dot @samp{.} specially, @code{pid} does not do this. It simply attempts -to match the absolute path name string against the wild card pattern. - -The @code{-b} option restricts the pattern matching to the base name of -the file (all the leading directory names are stripped prior to pattern -matching). - -@node Iid, Other Tools, Database Query Tools, Top -@chapter Iid - -@deffn Command iid [@code{-a}] [@code{-c@var{}}] [@code{-H}] -@table @code -@item -a -Normally @code{iid} uses the @code{lid} command to search for names. -If you give the @code{-a} option on the command line, then it will -use @code{aid} as the default search engine. -@item -c@var{} -In normal operation, @code{iid} starts up and prompts you for commands -used to build sets of files. The @code{-c} option is used to pass a -single query command to @code{iid} which it then executes and exits. -@item -H -The @code{-H} option prints a short help message and exits. To get more -help use the @code{help} command from inside @code{iid}. -@end table -@end deffn - -The @code{iid} program is an interactive ID query tool. It operates by -running the other query programs (such as @code{lid} and @code{aid}) -and creating sets of file names returned by these queries. It also -provides operators for @code{anding} and @code{oring} these sets to -create new sets. - -The @code{PAGER} environment variable names the program @code{iid} uses -to display files. If you use @code{emacs}, you might want to set -@code{PAGER} so it invokes the @code{emacsclient} program. Check the -file @file{lisp/server.el} in the emacs source tree for documentation on -this. It is useful not only with X windows, but also when running -@code{iid} from an emacs shell buffer. There is also a somewhat spiffier -version called gnuserv by Andy Norman -(@code{ange%anorman@@hplabs.hp.com}) which appeared in @file{comp.emacs} -sometime in 1989. - -@menu -* Ss and Files commands:: Ss and Files commands -* Sets:: Sets -* Show:: Show -* Begin:: Begin -* Help:: Help -* Off:: Off -* Shell Commands as Queries:: Shell Commands as Queries -* Shell Escape:: Shell Escape -@end menu - -@node Ss and Files commands, Sets, Iid, Iid -@section Ss and Files commands - -The primary query commands are @code{ss} (for select sets) and @code{files} -(for show file names). These commands both take a query expression as an -argument. - -@deffn Subcommand ss query -The @code{ss} command runs a query and builds a set (or sets) of file names. The -result is printed as a summary of the sets constructed showing how many file -names are in each set. -@end deffn - -@deffn Subcommand files query -The @code{files} command is like the @code{ss} command, but rather than printing -a summary, it displays the full list of matching file names. -@end deffn - -@deffn Subcommand f query -The @code{f} command is merely a shorthand notation for @code{files}. -@end deffn - -Database queries are simple expressions with operators like @code{and} -and @code{or}. Parentheses can be used to group operations. The complete -set of operators is summarized below: - -@table @code -@item @var{pattern} -Any pattern not recognized as one of the keywords in this table is treated -as an identifier to be searched for in the database. It is passed as an -argument to the default search program (normally @code{lid}, but @code{aid} -is used if the @code{-a} option was given when @code{iid} was started). -The result of this operation is a set of file names, and it is assigned a -unique set number. -@item lid -@code{lid} is a keyword. It is used to invoke @code{lid} with the list of -identifiers following it as arguments. This forces the use of @code{lid} -regardless of the state of the @code{-a} option (@pxref{Lid}). -@item aid -The @code{aid} keyword is like the @code{lid} keyword, but it forces the -use of the @code{aid} program (@pxref{Aid}). -@item match -The @code{match} operator invokes the @code{pid} program to do pattern -matching on file names rather than identifiers. The set generated contains -the file names that match the specified patterns (@pxref{Pid}). -@item or -The @code{or} operator takes two sets of file names as arguments and generates -a new set containing all the files from both sets. -@item and -The @code{and} operator takes two sets of file names and generates a new -set containing only files from both sets. -@item not -The @code{not} operator inverts a set of file names, producing the set of -all files not in the input set. -@item set number -A set number consists of the letter @code{s} followed immediately by a number. -This refers to one of the sets created by a previous query operation. During -one @code{iid} session, each query generates a unique set number, so any -previously generated set may be used as part of any new query by referring -to the set number. -@end table - -The @code{not} operator has the highest precedence with @code{and} -coming in the middle and @code{or} having the lowest precedence. The -operator names are recognized using case insensitive matching, so -@code{AND}, @code{and}, and @code{aNd} are all the same as far as -@code{iid} is concerned. If you wish to use a keyword as an operand to -one of the query programs, you must enclose it in quotes. Any patterns -containing shell special characters must also be properly quoted or -escaped, since the query commands are run by invoking them with the -shell. - -Summary of query expression syntax: - -@example -A is: - - - lid - aid - match - or - and - not - ( ) -@end example - -@node Sets, Show, Ss and Files commands, Iid -@section Sets - -@deffn Subcommand sets -@end deffn - -The @code{sets} command displays all the sets created so far. Each one -is described by the query command that generated it. - -@node Show, Begin, Sets, Iid -@section Show - -@deffn Subcommand show set -@end deffn - -@deffn Subcommand p set -@end deffn - -The @code{show} and @code{p} commands are equivalent. They both accept -a set number as an argument and run the program given in the @code{PAGER} -environment variable with the file names in that set as arguments. - -@node Begin, Help, Show, Iid -@section Begin - -@deffn Subcommand begin directory -@end deffn - -@deffn Subcommand b directory -@end deffn - -The @code{begin} command (and its abbreviated version @code{b}) is used -to begin a new @code{iid} session in a different directory (which presumably -contains a different database). It flushes all the sets created so far -and switches to the specified directory. It is equivalent to exiting @code{iid}, -changing directories in the shell, and running @code{iid} again. - -@node Help, Off, Begin, Iid -@section Help - -@deffn Subcommand help -@end deffn - -@deffn Subcommand h -@end deffn - -@deffn Subcommand ? -@end deffn - -The @code{help}, @code{h}, and @code{?} command are three different ways to -ask for help. They all invoke the @code{PAGER} program to display a short -help file. - -@node Off, Shell Commands as Queries, Help, Iid -@section Off - -@deffn Subcommand off -@end deffn - -@deffn Subcommand quit -@end deffn - -@deffn Subcommand q -@end deffn - -These three command (or just an end of file) all cause @code{iid} to exit. - -@node Shell Commands as Queries, Shell Escape, Off, Iid -@section Shell Commands as Queries - -When the first word on an @code{iid} command is not recognized as a -builtin @code{iid} command, @code{iid} assumes the command is a shell -command which will write a list of file names to @var{stdout}. This list -of file names is used to generate a new set of files. - -Any set numbers that appear as arguments to this command are expanded -into lists of file names prior to running the command. - -@node Shell Escape, , Shell Commands as Queries, Iid -@section Shell Escape - -If a command starts with a bang (@code{!}) character, the remainder of -the line is run as a shell command. Any set numbers that appear as -arguments to this command are expanded into lists of file names prior to -running the command. - -@node Other Tools, Command Index, Iid, Top -@chapter Other Tools - -This chapter describes some support tools that work with the other ID -programs. - -@menu -* GNU Emacs Interface:: Using gid.el -* Fid:: List identifiers in a file. -* Idx:: Extract identifiers from source file. -@end menu - -@node GNU Emacs Interface, Fid, Other Tools, Other Tools -@section GNU Emacs Interface - -The source distribution comes with a file named @file{gid.el}. This is -a GNU emacs interface to the @code{gid} tool. If you put the file where -emacs can find it (somewhere in your @code{EMACSLOADPATH}) and put -@code{(autoload 'gid "gid" nil t)} in your @file{.emacs} file, you will -be able to invoke the @code{gid} function using @kbd{M-x gid}. - -This function prompts you with the word the cursor is on. If you want -to search for a different pattern, simply delete the line and type the -pattern of interest. - -It runs @code{gid} in a @code{*compilation*} buffer, so the normal -@code{next-error} function can be used to visit all the places the -identifier is found (@pxref{Compilation,,,emacs,The GNU Emacs Manual}). - -@node Fid, Idx, GNU Emacs Interface, Other Tools -@section Fid - -@deffn Command fid [@code{-f@var{}}] file1 [file2] -@table @code -@item -f@var{} -Look in the named database. -@item @var{file1} -List the identifiers contained in file1 according to the database. -@item @var{file2} -If a second file is given, list only the identifiers both files have -in common. -@end table -@end deffn - -The @code{fid} program provides an inverse query. Instead of listing -files containing some identifier, it lists the identifiers found in -a file. - -@node Idx, , Fid, Other Tools -@section Idx - -@deffn Command idx [@code{-s@var{}}] [@code{-r@var{}}] [@code{-S@var{}}] files@dots{} -The @code{-s}, @code{-r}, and @code{-S} arguments to @code{idx} -are identical to the same arguments on @code{mkid} -(@pxref{Mkid Command Line Options}). -@end deffn - -The @code{idx} command is more of a test frame for scanners than a tool -designed to be independently useful. It takes the same scanner arguments -as @code{mkid}, but rather than building a database, it prints the -identifiers found to @var{stdout}, one per line. You can use it to try -out a scanner on a sample file to make sure it is extracting the -identifiers you believe it should extract. - -@node Command Index, , Other Tools, Top -@unnumbered Command Index - -@printindex fn - -@contents -@bye diff --git a/regex.c b/regex.c deleted file mode 100644 index 3900958..0000000 --- a/regex.c +++ /dev/null @@ -1,5244 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for - internationalization features.) - - Copyright (C) 1993, 1994 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) - #pragma alloca -#endif - -#define _GNU_SOURCE - -#if HAVE_CONFIG_H -#include -#endif - -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -/* Emacs uses `NULL' as a predicate. */ -#undef NULL - -#else /* not emacs */ - -#ifdef STDC_HEADERS -#include -#else -char *malloc (); -char *realloc (); -#endif - - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#ifndef INHIBIT_STRING_HEADER -#if HAVE_STRING_H || STDC_HEADERS -#include -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif -#else -#include -#endif -#endif - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[c] - -#endif /* not emacs */ - -/* Get the interface, including the syntax bits. */ -#include "regex.h" - -/* isalpha etc. are used for the character classes. */ -#include - -/* Jim Meyering writes: - - "... Some ctype macros are valid only for character codes that - isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when - using /bin/cc or gcc but without giving an ansi option). So, all - ctype uses should be through macros like ISPRINT... If - STDC_HEADERS is defined, then autoconf has verified that the ctype - macros don't need to be guarded with references to isascii. ... - Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." */ - -#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) -#define ISASCII(c) 1 -#else -#define ISASCII(c) isascii(c) -#endif - -#ifdef isblank -#define ISBLANK(c) (ISASCII (c) && isblank (c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif - -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -#ifndef NULL -#define NULL 0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -#ifndef alloca - -/* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ - -#endif /* not alloca */ - -#define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) - -#endif /* not REGEX_MALLOC */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -static int re_match_2_internal (); - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. */ - -typedef enum -{ - no_op = 0, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -#include - -/* It is useful to test things that ``must'' be true when debugging. */ -#include - -static int debug = 0; - -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -extern void printchar (); - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - printchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - printchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - printf ("%d:\t", p - start); - - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - printchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c, last = -100; - register int in_range = 0; - - printf ("/charset [%s", - (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); - - assert (p + *p < pend); - - for (c = 0; c < 256; c++) - if (c / 8 < *p - && (p[1 + (c/8)] & (1 << (c % 8)))) - { - /* Are we starting a range? */ - if (last + 1 == c && ! in_range) - { - putchar ('-'); - in_range = 1; - } - /* Have we broken a range? */ - else if (last + 1 != c && in_range) - { - printchar (last); - in_range = 0; - } - - if (! in_range) - printchar (c); - - last = c; - } - - if (in_range) - printchar (last); - - putchar (']'); - - p += 1 + *p; - } - break; - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump to %d", p + mcnt - start); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump to %d", p + mcnt - start); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump to %d", p + mcnt - start); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump to %d", p + mcnt - start); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt to %d", p + mcnt - start); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump to %d", p + mcnt - start); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -#ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -#endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - - putchar ('\n'); - } - - printf ("%d:\tend of pattern.\n", p - start); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - unsigned this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); - } -} - -#else /* not DEBUG */ - -#undef assert -#define assert(e) - -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ - }; - -/* Avoiding alloca during matching, to placate r_alloc. */ - -/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the - searching and matching functions should not call alloca. On some - systems, alloca is implemented in terms of malloc, and if we're - using the relocating allocator routines, then malloc could cause a - relocation, which might (if the strings being searched are in the - ralloc heap) shift the data out from underneath the regexp - routines. - - Here's another reason to avoid allocation: Emacs - processes input from X in a signal handler; processing X input may - call malloc; if input arrives while a matching routine is calling - malloc, then we're scrod. But Emacs can't just block input while - calling matching routines; then we don't notice interrupts when - they come in. So, Emacs blocks input around all regexp calls - except the matching calls, which it leaves unprotected, in the - faith that they will not malloc. */ - -/* Normally, this is fine. */ -#define MATCH_MAY_ALLOCATE - -/* The match routines may not allocate if (1) they would do it with malloc - and (2) it's not safe for them to use malloc. */ -#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC)) -#undef MATCH_MAY_ALLOCATE -#endif - - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ -int re_max_failures = 2000; - -typedef unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#ifdef MATCH_MAY_ALLOCATE -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) -#else -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.avail = 0; \ - } while (0) -#endif - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item - -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - - - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) - - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - - - -/* How do we implement a missing MATCH_MAY_ALLOCATE? - We make the fail stack a global thing, and then grow it to - re_max_failures when we compile. */ -#ifndef MATCH_MAY_ALLOCATE -static fail_stack_type fail_stack; - -static const char ** regstart, ** regend; -static const char ** old_regstart, ** old_regend; -static const char **best_regstart, **best_regend; -static register_info_type *reg_info; -static const char **reg_dummy; -static register_info_type *reg_info_dummy; -#endif - - -/* Subroutine declarations and macros for regex_compile. */ - -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) - - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -#define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -/* Return, freeing storage we allocated. */ -#define FREE_STACK_RETURN(value) \ - return (free (compile_stack.stack), value) - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - int size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random temporary spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - FREE_STACK_RETURN (REG_ERANGE); - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (;;) - { - PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) - FREE_STACK_RETURN (REG_ECTYPE); - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - /* This was split into 3 if's to - avoid an arbitrary limit in some compiler. */ - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch))) - SET_LIST_BIT (ch); - if ( (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch))) - SET_LIST_BIT (ch); - if ( (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - FREE_STACK_RETURN (REG_ERPAREN); - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - FREE_STACK_RETURN (REG_ERPAREN); - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_EBRACE); - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at - set_number_at - succeed_n - - jump_n - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - BUF_PUSH (wordbeg); - break; - - case '>': - BUF_PUSH (wordend); - break; - - case 'b': - BUF_PUSH (wordbound); - break; - - case 'B': - BUF_PUSH (notwordbound); - break; - - case '`': - BUF_PUSH (begbuf); - break; - - case '\'': - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - FREE_STACK_RETURN (REG_ESUBREG); - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - FREE_STACK_RETURN (REG_EPAREN); - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: \n"); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - -#ifndef MATCH_MAY_ALLOCATE - /* Initialize the failure stack to the largest possible stack. This - isn't necessary unless we're trying to avoid calling alloca in - the search and match routines. */ - { - int num_regs = bufp->re_nsub + 1; - - /* Since DOUBLE_FAIL_STACK refuses to double only if the current size - is strictly greater than re_max_failures, the largest possible stack - is 2 * re_max_failures failure points. */ - if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) - { - fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); - -#ifdef emacs - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) xmalloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) xrealloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -#else /* not emacs */ - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) malloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) realloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -#endif /* not emacs */ - } - - /* Initialize some other variables the matcher uses. */ - RETALLOC_IF (regstart, num_regs, const char *); - RETALLOC_IF (regend, num_regs, const char *); - RETALLOC_IF (old_regstart, num_regs, const char *); - RETALLOC_IF (old_regend, num_regs, const char *); - RETALLOC_IF (best_regstart, num_regs, const char *); - RETALLOC_IF (best_regend, num_regs, const char *); - RETALLOC_IF (reg_info, num_regs, register_info_type); - RETALLOC_IF (reg_dummy, num_regs, const char *); - RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); - } -#endif - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - int syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ - range_start = ((const unsigned char *) p)[-2]; - range_end = ((const unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; -#ifdef MATCH_MAY_ALLOCATE - fail_stack_type fail_stack; -#endif -#ifndef REGEX_MALLOC - char *destination; -#endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) - { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - return 0; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - { - int fastmap_newline = fastmap['\n']; - - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = fastmap_newline; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; - - /* Otherwise, have to check alternative paths. */ - break; - } - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* not emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - return 0; -} /* re_compile_fastmap */ - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2_internal (bufp, string1, size1, string2, size2, - startpos, regs, stop); -#ifndef REGEX_MALLOC -#ifdef C_ALLOCA - alloca (0); -#endif -#endif - - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) \ - ? ((regoff_t) ((ptr) - string1)) \ - : ((regoff_t) ((ptr) - string2 + size1))) - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - - -/* Free everything we malloc. */ -#ifdef MATCH_MAY_ALLOCATE -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ - do { \ - FREE_VAR (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else /* not REGEX_MALLOC */ -/* This used to do alloca (0), but now we do that in the caller. */ -#define FREE_VARIABLES() /* Nothing */ -#endif /* not REGEX_MALLOC */ -#else -#define FREE_VARIABLES() /* Do nothing! */ -#endif /* not MATCH_MAY_ALLOCATE */ - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; -{ - int result = re_match_2_internal (bufp, NULL, 0, string, size, - pos, regs, size); - alloca (0); - return result; -} -#endif /* not emacs */ - - -/* re_match_2 matches the compiled pattern in BUFP against the - the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - int result = re_match_2_internal (bufp, string1, size1, string2, size2, - pos, regs, stop); - alloca (0); - return result; -} - -/* This is a separate function so that we can force an alloca cleanup - afterwards. */ -static int -re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* Mark the opcode just after a start_memory, so we can test for an - empty subpattern when we get to the stop_memory. */ - unsigned char *just_past_start_mem = 0; - - /* We use this to map every character in the string. */ - char *translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - fail_stack_type fail_stack; -#endif -#ifdef DEBUG - static unsigned failure_id = 0; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **regstart, **regend; -#endif - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **old_regstart, **old_regend; -#endif - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - register_info_type *reg_info; -#endif - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **best_regstart, **best_regend; -#endif - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* Used when we pop values we don't care about. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **reg_dummy; - register_info_type *reg_info_dummy; -#endif - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - -#ifdef MATCH_MAY_ALLOCATE - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } -#if defined (REGEX_MALLOC) - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* REGEX_MALLOC */ -#endif /* MATCH_MAY_ALLOCATE */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is: "); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { - DEBUG_PRINT2 ("\n0x%x: ", p); - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - /* 1 if this match ends in the same string (string1 or string2) - as the best previous match. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - /* 1 if this match is the best seen so far. */ - boolean best_match_p; - - /* AIX compiler got confused when this was combined - with the previous declaration. */ - if (same_str_p) - best_match_p = d > match_end; - else - best_match_p = !MATCHING_IN_FIRST_STRING; - - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - - /* If exceeds best match so far, save it. */ - if (!best_regs_set || best_match_p) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. And if - last match is real best match, don't restore second - best one. */ - else if (best_regs_set && !best_match_p) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - } - } - else - { - /* These braces fend off a "empty body in an else-statement" - warning under GCC when assert expands to nothing. */ - assert (bufp->regs_allocated == REGS_FIXED); - } - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING - ? ((regoff_t) (d - string1)) - : ((regoff_t) (d - string2 + size1))); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - FREE_VARIABLES (); - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - return mcnt; - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - just_past_start_mem = p; - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || just_past_start_mem == p - 1) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \ has been turned into a `duplicate' command which is - followed by the numeric value of as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. - If what follows this loop is a ...+ construct, - look at what begins its body, since we will have to - match at least one of that. */ - while (1) - { - if (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; - else if (p2 + 6 < pend - && (re_opcode_t) *p2 == dummy_failure_jump) - p2 += 6; - else - break; - } - - p1 = p + mcnt; - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - else if ((re_opcode_t) *p2 == charset) - { -#ifdef DEBUG - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; -#endif - - if ((re_opcode_t) p1[3] == exactn - && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] - && (p2[1 + p1[4] / BYTEWIDTH] - & (1 << (p1[4] % BYTEWIDTH))))) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset_not) - { - int idx; - /* We win if the charset_not inside the loop - lists every character listed in the charset after. */ - for (idx = 0; idx < (int) p2[1]; idx++) - if (! (p2[2 + idx] == 0 - || (idx < (int) p1[4] - && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) - break; - - if (idx == p2[1]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - else if ((re_opcode_t) p1[3] == charset) - { - int idx; - /* We win if the charset inside the loop - has no overlap with the one after the loop. */ - for (idx = 0; - idx < (int) p2[1] && idx < (int) p1[4]; - idx++) - if ((p2[2 + idx] & p1[5 + idx]) != 0) - break; - - if (idx == p2[1] || idx == p1[4]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - - /* Unconditionally jump (without popping any failure points). */ - case jump: - unconditional_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%x).\n", p); - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); - } - else if (mcnt == 0) - { - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); - STORE_NUMBER (p1, mcnt); - break; - } - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; -#if 0 /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: -#endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate (s1, s2, len, translate) - unsigned char *s1, *s2; - register int len; - char *translate; -{ - register unsigned char *p1 = s1, *p2 = s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - int length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - return re_error_msg[(int) ret]; -} - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#ifdef _REGEX_RE_COMP - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; -} - - -int -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} -#endif /* _REGEX_RE_COMP */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - unsigned syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - preg->used = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) - return (int) REG_NOMATCH; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - free (regs.end); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - strncpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} - -#endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex.h b/regex.h deleted file mode 100644 index c6076c9..0000000 --- a/regex.h +++ /dev/null @@ -1,489 +0,0 @@ -#define _REGEX_RE_COMP - -/* Definitions for data structures and routines for the regular - expression library, version 0.12. - - Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ - -/* POSIX says that must be included (by the caller) before - . */ - -#ifdef VMS -/* VMS doesn't have `size_t' in , even though POSIX says it - should be there. */ -#include -#endif - - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \ matches . - If not set, then \ is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -#undef RE_DUP_MAX -#endif -#define RE_DUP_MAX ((1 << 15) - 1) - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - - -/* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ -typedef enum -{ - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -struct re_pattern_buffer -{ -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -#define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - -#ifdef _REGEX_RE_COMP -/* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); -#endif - -/* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/scanners.c b/scanners.c deleted file mode 100644 index f2a5d44..0000000 --- a/scanners.c +++ /dev/null @@ -1,1216 +0,0 @@ -/* scanners.c -- file & directory name manipulations - Copyright (C) 1986, 1995 Greg McGary - VHIL portions Copyright (C) 1988 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include - -#include -#include "strxtra.h" -#include "token.h" -#include "alloc.h" -#include "scanners.h" - -extern char const *program_name; - -static char const *get_token_VHIL __P((FILE *input_FILE, int *flags)); -static char const *get_token_c __P((FILE *input_FILE, int *flags)); -static void set_args_c __P((char const *lang_name, int op, char const *arg)); -static void set_ctype_c __P((char const *chars, int type)); -static void clear_ctype_c __P((char const *chars, int type)); -static void usage_c __P((char const *lang_name)); - -static char const *get_token_asm __P((FILE *input_FILE, int *flags)); -static void set_ctype_asm __P((char const *chars, int type)); -static void clear_ctype_asm __P((char const *chars, int type)); -static void usage_asm __P((char const *lang_name)); -static void set_args_asm __P((char const *lang_name, int op, char const *arg)); - -static char const *get_token_text __P((FILE *input_FILE, int *flags)); -static void set_ctype_text __P((char const *chars, int type)); -static void clear_ctype_text __P((char const *chars, int type)); -static void usage_text __P((char const *lang_name)); -static void set_args_text __P((char const *lang_name, int op, char const *arg)); - -/****************************************************************************/ - -typedef void (*set_args_t) __P((char const *lang_name, int op, char const *arg)); - -struct language -{ - char const *lang_name; - get_token_t lang_get_token; - set_args_t lang_set_args; - char const *lang_filter; - struct language *lang_next; -}; - -struct suffix -{ - char const *suff_suffix; - char const *suff_lang_name; - struct language *suff_language; - struct suffix *suff_next; -}; - -static struct suffix *get_suffix_entry (char const *suffix); -static struct language *get_lang_entry (char const *lang_name); -static void usage_scan (void); - -struct language languages_0[] = -{ - { "C", get_token_c, set_args_c, NULL }, - { "TeX", get_token_text, set_args_text, NULL }, - { "VHIL", get_token_VHIL, set_args_c, NULL }, - { "asm", get_token_asm, set_args_asm, NULL }, -/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/ - { "gzip", NULL, NULL, "zcat %s" }, - { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" }, - { "text", get_token_text, set_args_text, NULL }, -}; -struct language *languages = languages_0; - -/* - This is a rather incomplete list of default associations - between suffixes and languages. You may add more to the - default list, or you may define them dynamically with the - `-S=' argument to mkid(1) and idx(1). e.g. to - associate a `.ada' suffix with the Ada language, use - `-S.ada=ada' -*/ -struct suffix suffixes_0[] = -{ - { "", "text" }, - { ".1", "roff" }, - { ".2", "roff" }, - { ".3", "roff" }, - { ".4", "roff" }, - { ".5", "roff" }, - { ".6", "roff" }, - { ".7", "roff" }, - { ".8", "roff" }, - { ".C", "C" }, - { ".H", "C" }, - { ".Z", "gzip" }, - { ".c", "C" }, - { ".cc", "C" }, - { ".cpp", "C" }, - { ".cxx", "C" }, - { ".doc", "text" }, -/*{ ".el", "elisp" },*/ - { ".gz", "gzip" }, - { ".h", "C" }, - { ".hh", "C" }, - { ".hpp", "C" }, - { ".hxx", "C" }, - { ".l", "C" }, - { ".lex", "C" }, - { ".ltx", "TeX" }, - { ".p", "pas" }, - { ".pas", "pas" }, - { ".s", "asm" }, - { ".S", "asm" }, - { ".tex", "TeX" }, - { ".x", "VHIL" }, - { ".y", "C" }, - { ".yacc", "C" }, - { ".z", "gzip" }, -}; -struct suffix *suffixes = suffixes_0; - -void -init_scanners (void) -{ - struct language *lang; - struct language *lang_N = &languages_0[(sizeof (languages_0) / sizeof (languages_0[0])) - 1]; - struct suffix *suff; - struct suffix *suff_N = &suffixes_0[(sizeof (suffixes_0) / sizeof (suffixes_0[0])) - 1]; - - for (lang = languages; lang <= lang_N; ++lang) - lang->lang_next = lang + 1; - lang_N->lang_next = NULL; - - for (suff = suffixes; suff <= suff_N; ++suff) { - lang = get_lang_entry (suff->suff_lang_name); - if (lang) - suff->suff_language = lang; - suff->suff_next = suff + 1; - } - suff_N->suff_next = NULL; -} - -/* Return a suffix table entry for the given suffix. */ -static struct suffix * -get_suffix_entry (char const *suffix) -{ - struct suffix *stp; - - if (suffix == NULL) - suffix = ""; - - for (stp = suffixes; stp; stp = stp->suff_next) - if (strequ (stp->suff_suffix, suffix)) - return stp; - return NULL; -} - -static struct language * -get_lang_entry (char const *lang_name) -{ - struct language *ltp; - - if (lang_name == NULL) - lang_name = ""; - - for (ltp = languages; ltp; ltp = ltp->lang_next) - if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name)) - return ltp; - return ltp; -} - -char const * -get_lang_name (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_name; -} - -char const * -get_filter (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_filter; -} - -get_token_t -get_scanner (char const *lang) -{ - struct language *ltp; - - ltp = get_lang_entry (lang); - if (ltp == NULL) - return NULL; - return ltp->lang_get_token; -} - -void -set_scan_args (int op, char *arg) -{ - struct language *ltp, *ltp2; - struct suffix *stp; - char *lhs; - char *lhs2; - int count = 0; - - lhs = arg; - while (isalnum (*arg) || *arg == '.') - arg++; - - if (strequ (lhs, "?=?")) - { - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strnequ (lhs, "?=", 2)) - { - lhs += 2; - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - printf ("No scanner for language `%s'\n", lhs); - return; - } - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - if (stp->suff_language == ltp) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strequ (arg, "=?")) - { - lhs[strlen (lhs) - 2] = '\0'; - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - printf ("No scanner assigned to suffix `%s'\n", lhs); - return; - } - printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - printf ("\n"); - return; - } - - if (*arg == '=') - { - *arg++ = '\0'; - - ltp = get_lang_entry (arg); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg); - return; - } - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - stp = CALLOC (struct suffix, 1); - stp->suff_suffix = lhs; - stp->suff_language = ltp; - stp->suff_next = suffixes; - suffixes = stp; - } - else if (!strequ (arg, stp->suff_language->lang_name)) - { - fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name); - stp->suff_language = ltp; - } - return; - } - else if (*arg == '/') - { - *arg++ = '\0'; - ltp = get_lang_entry (lhs); - if (ltp->lang_next == NULL) - { - ltp = CALLOC (struct language, 1); - ltp->lang_name = lhs; - ltp->lang_get_token = get_token_text; - ltp->lang_set_args = set_args_text; - ltp->lang_filter = NULL; - ltp->lang_next = languages; - languages = ltp; - } - lhs2 = arg; - arg = strchr (arg, '/'); - if (arg == NULL) - ltp2 = ltp; - else - { - *arg++ = '\0'; - ltp2 = get_lang_entry (lhs2); - if (ltp2 == NULL) - { - fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2); - ltp2 = ltp; - } - } - ltp->lang_get_token = ltp2->lang_get_token; - ltp->lang_set_args = ltp2->lang_set_args; - if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter))) - fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter); - ltp->lang_filter = arg; - return; - } - - if (op == '+') - { - switch (op = *arg++) - { - case '+': - case '-': - case '?': - break; - default: - usage_scan (); - } - for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next) - (*ltp->lang_set_args) (NULL, op, arg); - return; - } - - if (*arg == '-' || *arg == '+' || *arg == '?') - { - op = *arg; - *arg++ = '\0'; - - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs); - return; - } - (*ltp->lang_set_args) (lhs, op, arg); - return; - } - - usage_scan (); -} - -static void -usage_scan (void) -{ - fprintf (stderr, "Usage: %s [-S=] [+S(+|-)] [-S(+|-)] [-S//]\n", program_name); - exit (1); -} - -/*************** C & C++ ****************************************************/ - -#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ -#define DG 0x0002 /* decimal digit [0-9] */ -#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ -#define C1 0x0008 /* C comment introduction char: / */ -#define C2 0x0010 /* C comment termination char: * */ -#define Q1 0x0020 /* single quote: ' */ -#define Q2 0x0040 /* double quote: " */ -#define ES 0x0080 /* escape char: \ */ -#define NL 0x0100 /* newline: \n */ -#define EF 0x0200 /* EOF */ -#define SK 0x0400 /* Make these chars valid for names within strings */ -#define VH 0x0800 /* VHIL comment introduction char: # */ -#define WS 0x1000 /* White space characters */ - -/* - character class membership macros: -*/ -#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ -#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ -#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ -#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ -#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ -#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ -#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ -/* - The `BORING' classes should be skipped over - until something interesting comes along... -*/ -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ -#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ -#define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */ -#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ -#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ - -static unsigned short ctype_c[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, -/*070*/ DG, DG, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -static int eat_underscore = 1; -static int scan_VHIL = 0; - -static char const * -get_token_VHIL (FILE *input_FILE, int *flags) -{ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - return get_token_c (input_FILE, flags); -} - -/* - Grab the next identifier from the C source - file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_c (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - static int new_line = 1; - unsigned short *rct = &ctype_c[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - if (new_line) - { - new_line = 0; - if (c == '.') - { - /* Auto-recognize vhil code when you see a '.' in column 1. - also ignore lines that start with a '.' */ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - if (c != '#') - goto next; - c = getc (input_FILE); - if (scan_VHIL && ISSPACE (c)) - { - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c == ' ' || c == '\t') - c = getc (input_FILE); - if (c == '\n') - { - new_line = 1; - goto top; - } - id = input_buffer; - if (c == '"') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '"') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (c == '<') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '>') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (ISID1ST (c)) - { - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *flags = TOK_NAME; - } - else - { - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - *id = '\0'; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "elif") /* ansi C */ - || (scan_VHIL && strequ (input_buffer, "elsif")) - || strequ (input_buffer, "undef")) - goto next; - while ((c != '\n') && (c != EOF)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - switch (c) - { - case '"': - id = input_buffer; - *id++ = c = getc (input_FILE); - for (;;) - { - while (ISQ2BORING (c)) - *id++ = c = getc (input_FILE); - if (c == '\\') - { - *id++ = c = getc (input_FILE); - continue; - } - else if (c != '"') - goto next; - break; - } - *--id = '\0'; - id = input_buffer; - while (ISSTRKEEP (*id)) - id++; - if (*id || id == input_buffer) - { - c = getc (input_FILE); - goto next; - } - *flags = TOK_STRING; - if (eat_underscore && input_buffer[0] == '_' && input_buffer[1]) - return &input_buffer[1]; - else - return input_buffer; - - case '\'': - c = getc (input_FILE); - for (;;) - { - while (ISQ1BORING (c)) - c = getc (input_FILE); - if (c == '\\') - { - c = getc (input_FILE); - continue; - } - else if (c == '\'') - c = getc (input_FILE); - goto next; - } - - case '/': - c = getc (input_FILE); - if (c == '/') - { /* Cope with C++ comment */ - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - else if (c != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - goto next; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - - case '\n': - new_line = 1; - goto top; - - case '#': - if (!scan_VHIL) - { - /* Auto-recognize vhil when find a # in the middle of a line. */ - set_args_c ("vhil", '+', "v"); - } - c = getc (input_FILE); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - default: - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISDIGIT (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - fprintf (stderr, "junk: `\\%3o'", c); - ungetc (c, input_FILE); - *id = '\0'; - *flags |= TOK_LITERAL; - return input_buffer; - } -} - -static void -set_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_c (char const *lang_name) -{ - fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name); - exit (1); -} - -static char document_c[] = "\ -The C scanner arguments take the form -Sc, where \n\ -is one of the following: ( denotes one or more characters)\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\ - (+|-)s . . Allow in string ids, and (keep|ignore) those ids.\n\ - -v . . . . . . Skip vhil comments."; - -static void -set_args_c (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_c); - return; - } - switch (*arg++) - { - case 'u': - eat_underscore = (op == '+'); - break; - case 's': - if (op == '+') - set_ctype_c (arg, SK); - else - clear_ctype_c (arg, SK); - break; - case 'v': - set_ctype_c ("$", I1); - set_ctype_c ("#", VH); - set_ctype_c (" \t", WS); - scan_VHIL = 1; - break; - default: - if (lang_name) - usage_c (lang_name); - break; - } -} - -#undef I1 -#undef DG -#undef NM -#undef C1 -#undef C2 -#undef Q1 -#undef Q2 -#undef ES -#undef NL -#undef EF -#undef SK -#undef VH -#undef WS -#undef ISDIGIT -#undef ISNUMBER -#undef ISEOF -#undef ISID1ST -#undef ISIDREST -#undef ISSTRKEEP -#undef ISSPACE -#undef ISBORING -#undef ISCBORING -#undef ISVBORING -#undef ISQ1BORING -#undef ISQ2BORING - -/*************** Assembly ***************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define NL 0x04 /* newline: \n */ -#define CM 0x08 /* assembler comment char: usually # or | */ -#define IG 0x10 /* ignore `identifiers' with these chars in them */ -#define C1 0x20 /* C comment introduction char: / */ -#define C2 0x40 /* C comment termination char: * */ -#define EF 0x80 /* EOF */ - -/* Assembly Language character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISCOMMENT(c) ((rct)[c] & (CM)) -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) -#define ISCBORING(c) (!((rct)[c] & (EF|NL))) -#define ISCCBORING(c) (!((rct)[c] & (EF|C2))) -#define ISIGNORE(c) ((rct)[c] & (IG)) - -static unsigned char ctype_asm[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, - -}; - -static int cpp_on_asm = 1; - -/* - Grab the next identifier the assembly language - source file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_asm (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_asm[1]; - int c; - char *id = input_buffer; - static int new_line = 1; - -top: - c = getc (input_FILE); - if (cpp_on_asm > 0 && new_line) - { - new_line = 0; - if (c != '#') - goto next; - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c != '"' && c != '<') - c = getc (input_FILE); - id = input_buffer; - *id++ = c = getc (input_FILE); - while ((c = getc (input_FILE)) != '"' && c != '>') - *id++ = c; - *id = '\0'; - *flags = TOK_STRING; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "undef")) - goto next; - while (c != '\n') - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - if (ISCOMMENT (c)) - { - while (ISCBORING (c)) - c = getc (input_FILE); - new_line = 1; - } - - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - - if (c == '\n') - { - new_line = 1; - goto top; - } - - if (c == '/') - { - if ((c = getc (input_FILE)) != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - break; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - goto next; - } - - id = input_buffer; - if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE))) - { - ungetc (c, input_FILE); - return "_"; - } - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto next; - } - - *id = '\0'; - for (id = input_buffer; *id; id++) - if (ISIGNORE (*id)) - goto next; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_asm (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([-c] [-u] [(+|-)a] [(+|-)p] [(+|-)C])\n", program_name, lang_name); - exit (1); -} - -static char document_asm[] = "\ -The Assembler scanner arguments take the form -Sasm, where\n\ - is one of the following: ( denotes one or more characters)\n\ - -c . . . . introduce(s) a comment until end-of-line.\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\ - (+|-)a . . Allow in ids, and (keep|ignore) those ids.\n\ - (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\ - (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)"; - -static void -set_args_asm (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_asm); - return; - } - switch (*arg++) - { - case 'a': - set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0)); - break; - case 'c': - set_ctype_asm (arg, CM); - break; - case 'u': - eat_underscore = (op == '+'); - break; - case 'p': - cpp_on_asm = (op == '+'); - break; - case 'C': - if (op == '+') - { - set_ctype_asm ("/", C1); - set_ctype_asm ("*", C2); - } - else - { - clear_ctype_asm ("/", C1); - clear_ctype_asm ("*", C2); - } - break; - default: - if (lang_name) - usage_asm (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef NL -#undef CM -#undef IG -#undef C1 -#undef C2 -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISCOMMENT -#undef ISBORING -#undef ISCBORING -#undef ISCCBORING -#undef ISIGNORE - -/*************** Text *******************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define SQ 0x04 /* squeeze these out (.,',-) */ -#define EF 0x80 /* EOF */ - -/* Text character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) -#define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) - -static unsigned char ctype_text[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -/* - Grab the next identifier the text source file opened with the - handle `input_FILE'. This state machine is built for speed, not - elegance. -*/ -static char const * -get_token_text (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_text[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - while (ISBORING (c)) - c = getc (input_FILE); - if (ISEOF (c)) - return NULL; - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - if (!ISIDSQUEEZE (c)) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto top; - } - - *id = '\0'; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_text (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([(+|-)a] [(+|-)s]\n", program_name, lang_name); - exit (1); -} - -static char document_text[] = "\ -The Text scanner arguments take the form -Stext, where\n\ - is one of the following: ( denotes one or more characters)\n\ - (+|-)a . . Include (or exculde) in ids.\n\ - (+|-)s . . Squeeze (or don't squeeze) out of ids."; - -static void -set_args_text (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_text); - return; - } - switch (*arg++) - { - case 'a': - if (op == '+') - set_ctype_text (arg, I1); - else - clear_ctype_text (arg, I1); - break; - case 's': - if (op == '+') - set_ctype_text (arg, SQ); - else - clear_ctype_text (arg, SQ); - break; - default: - if (lang_name) - usage_text (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef SQ -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISBORING -#undef ISIDSQUEEZE diff --git a/scanners.h b/scanners.h deleted file mode 100644 index 7f6eb30..0000000 --- a/scanners.h +++ /dev/null @@ -1,30 +0,0 @@ -/* scanners.h -- defs for interface to scanners.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _scanners_h_ -#define _scanners_h_ - -typedef char const *(*get_token_t) __P((FILE*, int*)); - -char const *get_lang_name __P((char const *suffix)); -char const *get_filter __P((char const *suffix)); -get_token_t get_scanner __P((char const *lang_name)); -void set_scan_args __P((int op, char *arg)); -void init_scanners __P((void)); - -#endif /* not _scanners_h_ */ diff --git a/stamp-vti b/stamp-vti deleted file mode 100644 index 9788f70..0000000 --- a/stamp-vti +++ /dev/null @@ -1 +0,0 @@ -timestamp diff --git a/strcasecmp.c b/strcasecmp.c deleted file mode 100644 index cd038e3..0000000 --- a/strcasecmp.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 1987 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that this notice is preserved and that due credit is given - * to the University of California at Berkeley. The name of the University - * may not be used to endorse or promote products derived from this - * software without specific written prior permission. This software - * is provided ``as is'' without express or implied warranty. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)strcasecmp.c 5.5 (Berkeley) 11/24/87"; -#endif /* LIBC_SCCS and not lint */ - -#include -#include - -/* - * This array is designed for mapping upper and lower case letter - * together for a case independent comparison. The mappings are -p * based upon ascii character sequences. - */ -static unsigned char charmap[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; - -int -strcasecmp(char const *s1, char const *s2) -{ - unsigned char u1, u2; - - for (;;) { - u1 = (unsigned char) *s1++; - u2 = (unsigned char) *s2++; - if (charmap[u1] != charmap[u2]) { - return charmap[u1] - charmap[u2]; - } - if (u1 == '\0') { - return 0; - } - } -} - diff --git a/strxtra.h b/strxtra.h deleted file mode 100644 index d992c03..0000000 --- a/strxtra.h +++ /dev/null @@ -1,41 +0,0 @@ -/* strxtra.c -- convenient string manipulation macros - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _strxtra_h_ -#define _strxtra_h_ - -#if HAVE_STDLIB_H -#include -#else /* not HAVE_STDLIB_H */ -#if HAVE_MALLOC_H -#include -#endif /* HAVE_MALLOC_H */ -#endif /* not HAVE_STDLIB_H */ - -#define strequ(s1, s2) (strcmp ((s1), (s2)) == 0) -#define strnequ(s1, s2, n) (strncmp ((s1), (s2), (n)) == 0) -#define strcaseequ(s1, s2) (strcasecmp ((s1), (s2)) == 0) -#define strncaseequ(s1, s2, n) (strncasecmp ((s1), (s2), (n)) == 0) -#ifndef HAVE_STRDUP -#define strdup(s) (strcpy (calloc (1, strlen (s) + 1), (s))) -#else -char *strdup (); -#endif -#define strndup(s, n) (strncpy (calloc (1, (n)+1), (s), (n))) - -#endif /* not _strxtra_h_ */ diff --git a/texinfo.tex b/texinfo.tex deleted file mode 100644 index dfd57a9..0000000 --- a/texinfo.tex +++ /dev/null @@ -1,4421 +0,0 @@ -%% TeX macros to handle texinfo files - -% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 1994 Free Software Foundation, Inc. - -%This texinfo.tex file is free software; you can redistribute it and/or -%modify it under the terms of the GNU General Public License as -%published by the Free Software Foundation; either version 2, or (at -%your option) any later version. - -%This texinfo.tex file is distributed in the hope that it will be -%useful, but WITHOUT ANY WARRANTY; without even the implied warranty -%of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%General Public License for more details. - -%You should have received a copy of the GNU General Public License -%along with this texinfo.tex file; see the file COPYING. If not, write -%to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, -%USA. - - -%In other words, you are welcome to use, share and improve this program. -%You are forbidden to forbid anyone else to use, share and improve -%what you give them. Help stamp out software-hoarding! - - -% Send bug reports to bug-texinfo@prep.ai.mit.edu. -% Please include a *precise* test case in each bug report. - - -% Make it possible to create a .fmt file just by loading this file: -% if the underlying format is not loaded, start by loading it now. -% Added by gildea November 1993. -\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi - -% This automatically updates the version number based on RCS. -\def\deftexinfoversion$#1: #2 ${\def\texinfoversion{#2}} -\deftexinfoversion$Revision$ -\message{Loading texinfo package [Version \texinfoversion]:} - -% If in a .fmt file, print the version number -% and turn on active characters that we couldn't do earlier because -% they might have appeared in the input file name. -\everyjob{\message{[Texinfo version \texinfoversion]}\message{} - \catcode`+=\active \catcode`\_=\active} - -% Save some parts of plain tex whose names we will redefine. - -\let\ptextilde=\~ -\let\ptexlbrace=\{ -\let\ptexrbrace=\} -\let\ptexdots=\dots -\let\ptexdot=\. -\let\ptexstar=\* -\let\ptexend=\end -\let\ptexbullet=\bullet -\let\ptexb=\b -\let\ptexc=\c -\let\ptexi=\i -\let\ptext=\t -\let\ptexl=\l -\let\ptexL=\L - -% Be sure we're in horizontal mode when doing a tie, since we make space -% equivalent to this in @example-like environments. Otherwise, a space -% at the beginning of a line will start with \penalty -- and -% since \penalty is valid in vertical mode, we'd end up putting the -% penalty on the vertical list instead of in the new paragraph. -{\catcode`@ = 11 - \gdef\tie{\leavevmode\penalty\@M\ } -} -\let\~ = \tie % And make it available as @~. - -\message{Basics,} -\chardef\other=12 - -% If this character appears in an error message or help string, it -% starts a new line in the output. -\newlinechar = `^^J - -% Set up fixed words for English. -\ifx\putwordChapter\undefined{\gdef\putwordChapter{Chapter}}\fi% -\def\putwordInfo{Info}% -\ifx\putwordSee\undefined{\gdef\putwordSee{See}}\fi% -\ifx\putwordsee\undefined{\gdef\putwordsee{see}}\fi% -\ifx\putwordfile\undefined{\gdef\putwordfile{file}}\fi% -\ifx\putwordpage\undefined{\gdef\putwordpage{page}}\fi% -\ifx\putwordsection\undefined{\gdef\putwordsection{section}}\fi% -\ifx\putwordSection\undefined{\gdef\putwordSection{Section}}\fi% -\ifx\putwordTableofContents\undefined{\gdef\putwordTableofContents{Table of Contents}}\fi% -\ifx\putwordShortContents\undefined{\gdef\putwordShortContents{Short Contents}}\fi% -\ifx\putwordAppendix\undefined{\gdef\putwordAppendix{Appendix}}\fi% - -% Ignore a token. -% -\def\gobble#1{} - -\hyphenation{ap-pen-dix} -\hyphenation{mini-buf-fer mini-buf-fers} -\hyphenation{eshell} - -% Margin to add to right of even pages, to left of odd pages. -\newdimen \bindingoffset \bindingoffset=0pt -\newdimen \normaloffset \normaloffset=\hoffset -\newdimen\pagewidth \newdimen\pageheight -\pagewidth=\hsize \pageheight=\vsize - -% Sometimes it is convenient to have everything in the transcript file -% and nothing on the terminal. We don't just call \tracingall here, -% since that produces some useless output on the terminal. -% -\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% -\def\loggingall{\tracingcommands2 \tracingstats2 - \tracingpages1 \tracingoutput1 \tracinglostchars1 - \tracingmacros2 \tracingparagraphs1 \tracingrestores1 - \showboxbreadth\maxdimen\showboxdepth\maxdimen -}% - -%---------------------Begin change----------------------- -% -%%%% For @cropmarks command. -% Dimensions to add cropmarks at corners Added by P. A. MacKay, 12 Nov. 1986 -% -\newdimen\cornerlong \newdimen\cornerthick -\newdimen \topandbottommargin -\newdimen \outerhsize \newdimen \outervsize -\cornerlong=1pc\cornerthick=.3pt % These set size of cropmarks -\outerhsize=7in -%\outervsize=9.5in -% Alternative @smallbook page size is 9.25in -\outervsize=9.25in -\topandbottommargin=.75in -% -%---------------------End change----------------------- - -% \onepageout takes a vbox as an argument. Note that \pagecontents -% does insertions itself, but you have to call it yourself. -\chardef\PAGE=255 \output={\onepageout{\pagecontents\PAGE}} -\def\onepageout#1{\hoffset=\normaloffset -\ifodd\pageno \advance\hoffset by \bindingoffset -\else \advance\hoffset by -\bindingoffset\fi -{\escapechar=`\\\relax % makes sure backslash is used in output files. -\shipout\vbox{{\let\hsize=\pagewidth \makeheadline} \pagebody{#1}% -{\let\hsize=\pagewidth \makefootline}}}% -\advancepageno \ifnum\outputpenalty>-20000 \else\dosupereject\fi} - -%%%% For @cropmarks command %%%% - -% Here is a modification of the main output routine for Near East Publications -% This provides right-angle cropmarks at all four corners. -% The contents of the page are centerlined into the cropmarks, -% and any desired binding offset is added as an \hskip on either -% site of the centerlined box. (P. A. MacKay, 12 November, 1986) -% -\def\croppageout#1{\hoffset=0pt % make sure this doesn't mess things up -{\escapechar=`\\\relax % makes sure backslash is used in output files. - \shipout - \vbox to \outervsize{\hsize=\outerhsize - \vbox{\line{\ewtop\hfill\ewtop}} - \nointerlineskip - \line{\vbox{\moveleft\cornerthick\nstop} - \hfill - \vbox{\moveright\cornerthick\nstop}} - \vskip \topandbottommargin - \centerline{\ifodd\pageno\hskip\bindingoffset\fi - \vbox{ - {\let\hsize=\pagewidth \makeheadline} - \pagebody{#1} - {\let\hsize=\pagewidth \makefootline}} - \ifodd\pageno\else\hskip\bindingoffset\fi} - \vskip \topandbottommargin plus1fill minus1fill - \boxmaxdepth\cornerthick - \line{\vbox{\moveleft\cornerthick\nsbot} - \hfill - \vbox{\moveright\cornerthick\nsbot}} - \nointerlineskip - \vbox{\line{\ewbot\hfill\ewbot}} - }} - \advancepageno - \ifnum\outputpenalty>-20000 \else\dosupereject\fi} -% -% Do @cropmarks to get crop marks -\def\cropmarks{\let\onepageout=\croppageout } - -\newinsert\margin \dimen\margin=\maxdimen - -\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}} -{\catcode`\@ =11 -\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi -% marginal hacks, juha@viisa.uucp (Juha Takala) -\ifvoid\margin\else % marginal info is present - \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi -\dimen@=\dp#1 \unvbox#1 -\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi -\ifr@ggedbottom \kern-\dimen@ \vfil \fi} -} - -% -% Here are the rules for the cropmarks. Note that they are -% offset so that the space between them is truly \outerhsize or \outervsize -% (P. A. MacKay, 12 November, 1986) -% -\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} -\def\nstop{\vbox - {\hrule height\cornerthick depth\cornerlong width\cornerthick}} -\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} -\def\nsbot{\vbox - {\hrule height\cornerlong depth\cornerthick width\cornerthick}} - -% Parse an argument, then pass it to #1. The argument is the rest of -% the input line (except we remove a trailing comment). #1 should be a -% macro which expects an ordinary undelimited TeX argument. -% -\def\parsearg#1{% - \let\next = #1% - \begingroup - \obeylines - \futurelet\temp\parseargx -} - -% If the next token is an obeyed space (from an @example environment or -% the like), remove it and recurse. Otherwise, we're done. -\def\parseargx{% - % \obeyedspace is defined far below, after the definition of \sepspaces. - \ifx\obeyedspace\temp - \expandafter\parseargdiscardspace - \else - \expandafter\parseargline - \fi -} - -% Remove a single space (as the delimiter token to the macro call). -{\obeyspaces % - \gdef\parseargdiscardspace {\futurelet\temp\parseargx}} - -{\obeylines % - \gdef\parseargline#1^^M{% - \endgroup % End of the group started in \parsearg. - % - % First remove any @c comment, then any @comment. - % Result of each macro is put in \toks0. - \argremovec #1\c\relax % - \expandafter\argremovecomment \the\toks0 \comment\relax % - % - % Call the caller's macro, saved as \next in \parsearg. - \expandafter\next\expandafter{\the\toks0}% - }% -} - -% Since all \c{,omment} does is throw away the argument, we can let TeX -% do that for us. The \relax here is matched by the \relax in the call -% in \parseargline; it could be more or less anything, its purpose is -% just to delimit the argument to the \c. -\def\argremovec#1\c#2\relax{\toks0 = {#1}} -\def\argremovecomment#1\comment#2\relax{\toks0 = {#1}} - -% \argremovec{,omment} might leave us with trailing spaces, though; e.g., -% @end itemize @c foo -% will have two active spaces as part of the argument with the -% `itemize'. Here we remove all active spaces from #1, and assign the -% result to \toks0. -% -% This loses if there are any *other* active characters besides spaces -% in the argument -- _ ^ +, for example -- since they get expanded. -% Fortunately, Texinfo does not define any such commands. (If it ever -% does, the catcode of the characters in questionwill have to be changed -% here.) But this means we cannot call \removeactivespaces as part of -% \argremovec{,omment}, since @c uses \parsearg, and thus the argument -% that \parsearg gets might well have any character at all in it. -% -\def\removeactivespaces#1{% - \begingroup - \ignoreactivespaces - \edef\temp{#1}% - \global\toks0 = \expandafter{\temp}% - \endgroup -} - -% Change the active space to expand to nothing. -% -\begingroup - \obeyspaces - \gdef\ignoreactivespaces{\obeyspaces\let =\empty} -\endgroup - - -\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} - -%% These are used to keep @begin/@end levels from running away -%% Call \inENV within environments (after a \begingroup) -\newif\ifENV \ENVfalse \def\inENV{\ifENV\relax\else\ENVtrue\fi} -\def\ENVcheck{% -\ifENV\errmessage{Still within an environment. Type Return to continue.} -\endgroup\fi} % This is not perfect, but it should reduce lossage - -% @begin foo is the same as @foo, for now. -\newhelp\EMsimple{Type to continue.} - -\outer\def\begin{\parsearg\beginxxx} - -\def\beginxxx #1{% -\expandafter\ifx\csname #1\endcsname\relax -{\errhelp=\EMsimple \errmessage{Undefined command @begin #1}}\else -\csname #1\endcsname\fi} - -% @end foo executes the definition of \Efoo. -% -\def\end{\parsearg\endxxx} -\def\endxxx #1{% - \removeactivespaces{#1}% - \edef\endthing{\the\toks0}% - % - \expandafter\ifx\csname E\endthing\endcsname\relax - \expandafter\ifx\csname \endthing\endcsname\relax - % There's no \foo, i.e., no ``environment'' foo. - \errhelp = \EMsimple - \errmessage{Undefined command `@end \endthing'}% - \else - \unmatchedenderror\endthing - \fi - \else - % Everything's ok; the right environment has been started. - \csname E\endthing\endcsname - \fi -} - -% There is an environment #1, but it hasn't been started. Give an error. -% -\def\unmatchedenderror#1{% - \errhelp = \EMsimple - \errmessage{This `@end #1' doesn't have a matching `@#1'}% -} - -% Define the control sequence \E#1 to give an unmatched @end error. -% -\def\defineunmatchedend#1{% - \expandafter\def\csname E#1\endcsname{\unmatchedenderror{#1}}% -} - - -% Single-spacing is done by various environments (specifically, in -% \nonfillstart and \quotations). -\newskip\singlespaceskip \singlespaceskip = 12.5pt -\def\singlespace{% - % Why was this kern here? It messes up equalizing space above and below - % environments. --karl, 6may93 - %{\advance \baselineskip by -\singlespaceskip - %\kern \baselineskip}% - \setleading \singlespaceskip -} - -%% Simple single-character @ commands - -% @@ prints an @ -% Kludge this until the fonts are right (grr). -\def\@{{\tt \char '100}} - -% This is turned off because it was never documented -% and you can use @w{...} around a quote to suppress ligatures. -%% Define @` and @' to be the same as ` and ' -%% but suppressing ligatures. -%\def\`{{`}} -%\def\'{{'}} - -% Used to generate quoted braces. - -\def\mylbrace {{\tt \char '173}} -\def\myrbrace {{\tt \char '175}} -\let\{=\mylbrace -\let\}=\myrbrace - -% @: forces normal size whitespace following. -\def\:{\spacefactor=1000 } - -% @* forces a line break. -\def\*{\hfil\break\hbox{}\ignorespaces} - -% @. is an end-of-sentence period. -\def\.{.\spacefactor=3000 } - -% @enddots{} is an end-of-sentence ellipsis. -\gdef\enddots{$\mathinner{\ldotp\ldotp\ldotp\ldotp}$\spacefactor=3000} - -% @! is an end-of-sentence bang. -\gdef\!{!\spacefactor=3000 } - -% @? is an end-of-sentence query. -\gdef\?{?\spacefactor=3000 } - -% @w prevents a word break. Without the \leavevmode, @w at the -% beginning of a paragraph, when TeX is still in vertical mode, would -% produce a whole line of output instead of starting the paragraph. -\def\w#1{\leavevmode\hbox{#1}} - -% @group ... @end group forces ... to be all on one page, by enclosing -% it in a TeX vbox. We use \vtop instead of \vbox to construct the box -% to keep its height that of a normal line. According to the rules for -% \topskip (p.114 of the TeXbook), the glue inserted is -% max (\topskip - \ht (first item), 0). If that height is large, -% therefore, no glue is inserted, and the space between the headline and -% the text is small, which looks bad. -% -\def\group{\begingroup - \ifnum\catcode13=\active \else - \errhelp = \groupinvalidhelp - \errmessage{@group invalid in context where filling is enabled}% - \fi - % - % The \vtop we start below produces a box with normal height and large - % depth; thus, TeX puts \baselineskip glue before it, and (when the - % next line of text is done) \lineskip glue after it. (See p.82 of - % the TeXbook.) Thus, space below is not quite equal to space - % above. But it's pretty close. - \def\Egroup{% - \egroup % End the \vtop. - \endgroup % End the \group. - }% - % - \vtop\bgroup - % We have to put a strut on the last line in case the @group is in - % the midst of an example, rather than completely enclosing it. - % Otherwise, the interline space between the last line of the group - % and the first line afterwards is too small. But we can't put the - % strut in \Egroup, since there it would be on a line by itself. - % Hence this just inserts a strut at the beginning of each line. - \everypar = {\strut}% - % - % Since we have a strut on every line, we don't need any of TeX's - % normal interline spacing. - \offinterlineskip - % - % OK, but now we have to do something about blank - % lines in the input in @example-like environments, which normally - % just turn into \lisppar, which will insert no space now that we've - % turned off the interline space. Simplest is to make them be an - % empty paragraph. - \ifx\par\lisppar - \edef\par{\leavevmode \par}% - % - % Reset ^^M's definition to new definition of \par. - \obeylines - \fi - % - % Do @comment since we are called inside an environment such as - % @example, where each end-of-line in the input causes an - % end-of-line in the output. We don't want the end-of-line after - % the `@group' to put extra space in the output. Since @group - % should appear on a line by itself (according to the Texinfo - % manual), we don't worry about eating any user text. - \comment -} -% -% TeX puts in an \escapechar (i.e., `@') at the beginning of the help -% message, so this ends up printing `@group can only ...'. -% -\newhelp\groupinvalidhelp{% -group can only be used in environments such as @example,^^J% -where each line of input produces a line of output.} - -% @need space-in-mils -% forces a page break if there is not space-in-mils remaining. - -\newdimen\mil \mil=0.001in - -\def\need{\parsearg\needx} - -% Old definition--didn't work. -%\def\needx #1{\par % -%% This method tries to make TeX break the page naturally -%% if the depth of the box does not fit. -%{\baselineskip=0pt% -%\vtop to #1\mil{\vfil}\kern -#1\mil\penalty 10000 -%\prevdepth=-1000pt -%}} - -\def\needx#1{% - % Go into vertical mode, so we don't make a big box in the middle of a - % paragraph. - \par - % - % Don't add any leading before our big empty box, but allow a page - % break, since the best break might be right here. - \allowbreak - \nointerlineskip - \vtop to #1\mil{\vfil}% - % - % TeX does not even consider page breaks if a penalty added to the - % main vertical list is 10000 or more. But in order to see if the - % empty box we just added fits on the page, we must make it consider - % page breaks. On the other hand, we don't want to actually break the - % page after the empty box. So we use a penalty of 9999. - % - % There is an extremely small chance that TeX will actually break the - % page at this \penalty, if there are no other feasible breakpoints in - % sight. (If the user is using lots of big @group commands, which - % almost-but-not-quite fill up a page, TeX will have a hard time doing - % good page breaking, for example.) However, I could not construct an - % example where a page broke at this \penalty; if it happens in a real - % document, then we can reconsider our strategy. - \penalty9999 - % - % Back up by the size of the box, whether we did a page break or not. - \kern -#1\mil - % - % Do not allow a page break right after this kern. - \nobreak -} - -% @br forces paragraph break - -\let\br = \par - -% @dots{} output some dots - -\def\dots{$\ldots$} - -% @page forces the start of a new page - -\def\page{\par\vfill\supereject} - -% @exdent text.... -% outputs text on separate line in roman font, starting at standard page margin - -% This records the amount of indent in the innermost environment. -% That's how much \exdent should take out. -\newskip\exdentamount - -% This defn is used inside fill environments such as @defun. -\def\exdent{\parsearg\exdentyyy} -\def\exdentyyy #1{{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break}} - -% This defn is used inside nofill environments such as @example. -\def\nofillexdent{\parsearg\nofillexdentyyy} -\def\nofillexdentyyy #1{{\advance \leftskip by -\exdentamount -\leftline{\hskip\leftskip{\rm#1}}}} - -%\hbox{{\rm#1}}\hfil\break}} - -% @include file insert text of that file as input. - -\def\include{\parsearg\includezzz} -%Use \input\thisfile to avoid blank after \input, which may be an active -%char (in which case the blank would become the \input argument). -%The grouping keeps the value of \thisfile correct even when @include -%is nested. -\def\includezzz #1{\begingroup -\def\thisfile{#1}\input\thisfile -\endgroup} - -\def\thisfile{} - -% @center line outputs that line, centered - -\def\center{\parsearg\centerzzz} -\def\centerzzz #1{{\advance\hsize by -\leftskip -\advance\hsize by -\rightskip -\centerline{#1}}} - -% @sp n outputs n lines of vertical space - -\def\sp{\parsearg\spxxx} -\def\spxxx #1{\par \vskip #1\baselineskip} - -% @comment ...line which is ignored... -% @c is the same as @comment -% @ignore ... @end ignore is another way to write a comment - -\def\comment{\catcode 64=\other \catcode 123=\other \catcode 125=\other% -\parsearg \commentxxx} - -\def\commentxxx #1{\catcode 64=0 \catcode 123=1 \catcode 125=2 } - -\let\c=\comment - -% Prevent errors for section commands. -% Used in @ignore and in failing conditionals. -\def\ignoresections{% -\let\chapter=\relax -\let\unnumbered=\relax -\let\top=\relax -\let\unnumberedsec=\relax -\let\unnumberedsection=\relax -\let\unnumberedsubsec=\relax -\let\unnumberedsubsection=\relax -\let\unnumberedsubsubsec=\relax -\let\unnumberedsubsubsection=\relax -\let\section=\relax -\let\subsec=\relax -\let\subsubsec=\relax -\let\subsection=\relax -\let\subsubsection=\relax -\let\appendix=\relax -\let\appendixsec=\relax -\let\appendixsection=\relax -\let\appendixsubsec=\relax -\let\appendixsubsection=\relax -\let\appendixsubsubsec=\relax -\let\appendixsubsubsection=\relax -\let\contents=\relax -\let\smallbook=\relax -\let\titlepage=\relax -} - -% Used in nested conditionals, where we have to parse the Texinfo source -% and so want to turn off most commands, in case they are used -% incorrectly. -% -\def\ignoremorecommands{% - \let\defcv = \relax - \let\deffn = \relax - \let\deffnx = \relax - \let\defindex = \relax - \let\defivar = \relax - \let\defmac = \relax - \let\defmethod = \relax - \let\defop = \relax - \let\defopt = \relax - \let\defspec = \relax - \let\deftp = \relax - \let\deftypefn = \relax - \let\deftypefun = \relax - \let\deftypevar = \relax - \let\deftypevr = \relax - \let\defun = \relax - \let\defvar = \relax - \let\defvr = \relax - \let\ref = \relax - \let\xref = \relax - \let\printindex = \relax - \let\pxref = \relax - \let\settitle = \relax - \let\include = \relax - \let\lowersections = \relax - \let\down = \relax - \let\raisesections = \relax - \let\up = \relax - \let\set = \relax - \let\clear = \relax - \let\item = \relax - \let\message = \relax -} - -% Ignore @ignore ... @end ignore. -% -\def\ignore{\doignore{ignore}} - -% Also ignore @ifinfo, @ifhtml, @html, @menu, and @direntry text. -% -\def\ifinfo{\doignore{ifinfo}} -\def\ifhtml{\doignore{ifhtml}} -\def\html{\doignore{html}} -\def\menu{\doignore{menu}} -\def\direntry{\doignore{direntry}} - -% Ignore text until a line `@end #1'. -% -\def\doignore#1{\begingroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define a command to swallow text until we reach `@end #1'. - \long\def\doignoretext##1\end #1{\enddoignore}% - % - % Make sure that spaces turn into tokens that match what \doignoretext wants. - \catcode32 = 10 - % - % And now expand that command. - \doignoretext -} - -% What we do to finish off ignored text. -% -\def\enddoignore{\endgroup\ignorespaces}% - -\newif\ifwarnedobs\warnedobsfalse -\def\obstexwarn{% - \ifwarnedobs\relax\else - % We need to warn folks that they may have trouble with TeX 3.0. - % This uses \immediate\write16 rather than \message to get newlines. - \immediate\write16{} - \immediate\write16{***WARNING*** for users of Unix TeX 3.0!} - \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} - \immediate\write16{If you are running another version of TeX, relax.} - \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} - \immediate\write16{ Then upgrade your TeX installation if you can.} - \immediate\write16{If you are stuck with version 3.0, run the} - \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} - \immediate\write16{ to use a workaround.} - \immediate\write16{} - \warnedobstrue - \fi -} - -% **In TeX 3.0, setting text in \nullfont hangs tex. For a -% workaround (which requires the file ``dummy.tfm'' to be installed), -% uncomment the following line: -%%%%%\font\nullfont=dummy\let\obstexwarn=\relax - -% Ignore text, except that we keep track of conditional commands for -% purposes of nesting, up to an `@end #1' command. -% -\def\nestedignore#1{% - \obstexwarn - % We must actually expand the ignored text to look for the @end - % command, so that nested ignore constructs work. Thus, we put the - % text into a \vbox and then do nothing with the result. To minimize - % the change of memory overflow, we follow the approach outlined on - % page 401 of the TeXbook: make the current font be a dummy font. - % - \setbox0 = \vbox\bgroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define `@end #1' to end the box, which will in turn undefine the - % @end command again. - \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% - % - % We are going to be parsing Texinfo commands. Most cause no - % trouble when they are used incorrectly, but some commands do - % complicated argument parsing or otherwise get confused, so we - % undefine them. - % - % We can't do anything about stray @-signs, unfortunately; - % they'll produce `undefined control sequence' errors. - \ignoremorecommands - % - % Set the current font to be \nullfont, a TeX primitive, and define - % all the font commands to also use \nullfont. We don't use - % dummy.tfm, as suggested in the TeXbook, because not all sites - % might have that installed. Therefore, math mode will still - % produce output, but that should be an extremely small amount of - % stuff compared to the main input. - % - \nullfont - \let\tenrm = \nullfont \let\tenit = \nullfont \let\tensl = \nullfont - \let\tenbf = \nullfont \let\tentt = \nullfont \let\smallcaps = \nullfont - \let\tensf = \nullfont - % Similarly for index fonts (mostly for their use in - % smallexample) - \let\indrm = \nullfont \let\indit = \nullfont \let\indsl = \nullfont - \let\indbf = \nullfont \let\indtt = \nullfont \let\indsc = \nullfont - \let\indsf = \nullfont - % - % Don't complain when characters are missing from the fonts. - \tracinglostchars = 0 - % - % Don't bother to do space factor calculations. - \frenchspacing - % - % Don't report underfull hboxes. - \hbadness = 10000 - % - % Do minimal line-breaking. - \pretolerance = 10000 - % - % Do not execute instructions in @tex - \def\tex{\doignore{tex}} -} - -% @set VAR sets the variable VAR to an empty value. -% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. -% -% Since we want to separate VAR from REST-OF-LINE (which might be -% empty), we can't just use \parsearg; we have to insert a space of our -% own to delimit the rest of the line, and then take it out again if we -% didn't need it. -% -\def\set{\parsearg\setxxx} -\def\setxxx#1{\setyyy#1 \endsetyyy} -\def\setyyy#1 #2\endsetyyy{% - \def\temp{#2}% - \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty - \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. - \fi -} -% Can't use \xdef to pre-expand #2 and save some time, since \temp or -% \next or other control sequences that we've defined might get us into -% an infinite loop. Consider `@set foo @cite{bar}'. -\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}} - -% @clear VAR clears (i.e., unsets) the variable VAR. -% -\def\clear{\parsearg\clearxxx} -\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} - -% @value{foo} gets the text saved in variable foo. -% -\def\value#1{\expandafter - \ifx\csname SET#1\endcsname\relax - {\{No value for ``#1''\}} - \else \csname SET#1\endcsname \fi} - -% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined -% with @set. -% -\def\ifset{\parsearg\ifsetxxx} -\def\ifsetxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifsetfail - \else - \expandafter\ifsetsucceed - \fi -} -\def\ifsetsucceed{\conditionalsucceed{ifset}} -\def\ifsetfail{\nestedignore{ifset}} -\defineunmatchedend{ifset} - -% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been -% defined with @set, or has been undefined with @clear. -% -\def\ifclear{\parsearg\ifclearxxx} -\def\ifclearxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifclearsucceed - \else - \expandafter\ifclearfail - \fi -} -\def\ifclearsucceed{\conditionalsucceed{ifclear}} -\def\ifclearfail{\nestedignore{ifclear}} -\defineunmatchedend{ifclear} - -% @iftex always succeeds; we read the text following, through @end -% iftex). But `@end iftex' should be valid only after an @iftex. -% -\def\iftex{\conditionalsucceed{iftex}} -\defineunmatchedend{iftex} - -% We can't just want to start a group at @iftex (for example) and end it -% at @end iftex, since then @set commands inside the conditional have no -% effect (they'd get reverted at the end of the group). So we must -% define \Eiftex to redefine itself to be its previous value. (We can't -% just define it to fail again with an ``unmatched end'' error, since -% the @ifset might be nested.) -% -\def\conditionalsucceed#1{% - \edef\temp{% - % Remember the current value of \E#1. - \let\nece{prevE#1} = \nece{E#1}% - % - % At the `@end #1', redefine \E#1 to be its previous value. - \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% - }% - \temp -} - -% We need to expand lots of \csname's, but we don't want to expand the -% control sequences after we've constructed them. -% -\def\nece#1{\expandafter\noexpand\csname#1\endcsname} - -% @asis just yields its argument. Used with @table, for example. -% -\def\asis#1{#1} - -% @math means output in math mode. -% We don't use $'s directly in the definition of \math because control -% sequences like \math are expanded when the toc file is written. Then, -% we read the toc file back, the $'s will be normal characters (as they -% should be, according to the definition of Texinfo). So we must use a -% control sequence to switch into and out of math mode. -% -% This isn't quite enough for @math to work properly in indices, but it -% seems unlikely it will ever be needed there. -% -\let\implicitmath = $ -\def\math#1{\implicitmath #1\implicitmath} - -% @bullet and @minus need the same treatment as @math, just above. -\def\bullet{\implicitmath\ptexbullet\implicitmath} -\def\minus{\implicitmath-\implicitmath} - -\def\node{\ENVcheck\parsearg\nodezzz} -\def\nodezzz#1{\nodexxx [#1,]} -\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} -\let\nwnode=\node -\let\lastnode=\relax - -\def\donoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\setref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\unnumbnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\unnumbsetref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\appendixnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\appendixsetref{\lastnode}\fi -\global\let\lastnode=\relax} - -\let\refill=\relax - -% @setfilename is done at the beginning of every texinfo file. -% So open here the files we need to have open while reading the input. -% This makes it possible to make a .fmt file for texinfo. -\def\setfilename{% - \readauxfile - \opencontents - \openindices - \fixbackslash % Turn off hack to swallow `\input texinfo'. - \global\let\setfilename=\comment % Ignore extra @setfilename cmds. - \comment % Ignore the actual filename. -} - -\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} - -\def\inforef #1{\inforefzzz #1,,,,**} -\def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, - node \samp{\ignorespaces#1{}}} - -\message{fonts,} - -% Font-change commands. - -% Texinfo supports the sans serif font style, which plain TeX does not. -% So we set up a \sf analogous to plain's \rm, etc. -\newfam\sffam -\def\sf{\fam=\sffam \tensf} -\let\li = \sf % Sometimes we call it \li, not \sf. - -%% Try out Computer Modern fonts at \magstephalf -\let\mainmagstep=\magstephalf - -% Set the font macro #1 to the font named #2, adding on the -% specified font prefix (normally `cm'). -\def\setfont#1#2{\font#1=\fontprefix#2} - -% Use cm as the default font prefix. -% To specify the font prefix, you must define \fontprefix -% before you read in texinfo.tex. -\ifx\fontprefix\undefined -\def\fontprefix{cm} -\fi - -\ifx\bigger\relax -\let\mainmagstep=\magstep1 -\setfont\textrm{r12} -\setfont\texttt{tt12} -\else -\setfont\textrm{r10 scaled \mainmagstep} -\setfont\texttt{tt10 scaled \mainmagstep} -\fi -% Instead of cmb10, you many want to use cmbx10. -% cmbx10 is a prettier font on its own, but cmb10 -% looks better when embedded in a line with cmr10. -\setfont\textbf{b10 scaled \mainmagstep} -\setfont\textit{ti10 scaled \mainmagstep} -\setfont\textsl{sl10 scaled \mainmagstep} -\setfont\textsf{ss10 scaled \mainmagstep} -\setfont\textsc{csc10 scaled \mainmagstep} -\font\texti=cmmi10 scaled \mainmagstep -\font\textsy=cmsy10 scaled \mainmagstep - -% A few fonts for @defun, etc. -\setfont\defbf{bx10 scaled \magstep1} %was 1314 -\setfont\deftt{tt10 scaled \magstep1} -\def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf} - -% Fonts for indices and small examples. -% We actually use the slanted font rather than the italic, -% because texinfo normally uses the slanted fonts for that. -% Do not make many font distinctions in general in the index, since they -% aren't very useful. -\setfont\ninett{tt9} -\setfont\indrm{r9} -\setfont\indit{sl9} -\let\indsl=\indit -\let\indtt=\ninett -\let\indsf=\indrm -\let\indbf=\indrm -\setfont\indsc{csc10 at 9pt} -\font\indi=cmmi9 -\font\indsy=cmsy9 - -% Fonts for headings -\setfont\chaprm{bx12 scaled \magstep2} -\setfont\chapit{ti12 scaled \magstep2} -\setfont\chapsl{sl12 scaled \magstep2} -\setfont\chaptt{tt12 scaled \magstep2} -\setfont\chapsf{ss12 scaled \magstep2} -\let\chapbf=\chaprm -\setfont\chapsc{csc10 scaled\magstep3} -\font\chapi=cmmi12 scaled \magstep2 -\font\chapsy=cmsy10 scaled \magstep3 - -\setfont\secrm{bx12 scaled \magstep1} -\setfont\secit{ti12 scaled \magstep1} -\setfont\secsl{sl12 scaled \magstep1} -\setfont\sectt{tt12 scaled \magstep1} -\setfont\secsf{ss12 scaled \magstep1} -\setfont\secbf{bx12 scaled \magstep1} -\setfont\secsc{csc10 scaled\magstep2} -\font\seci=cmmi12 scaled \magstep1 -\font\secsy=cmsy10 scaled \magstep2 - -% \setfont\ssecrm{bx10 scaled \magstep1} % This size an font looked bad. -% \setfont\ssecit{cmti10 scaled \magstep1} % The letters were too crowded. -% \setfont\ssecsl{sl10 scaled \magstep1} -% \setfont\ssectt{tt10 scaled \magstep1} -% \setfont\ssecsf{ss10 scaled \magstep1} - -%\setfont\ssecrm{b10 scaled 1315} % Note the use of cmb rather than cmbx. -%\setfont\ssecit{ti10 scaled 1315} % Also, the size is a little larger than -%\setfont\ssecsl{sl10 scaled 1315} % being scaled magstep1. -%\setfont\ssectt{tt10 scaled 1315} -%\setfont\ssecsf{ss10 scaled 1315} - -%\let\ssecbf=\ssecrm - -\setfont\ssecrm{bx12 scaled \magstephalf} -\setfont\ssecit{ti12 scaled \magstephalf} -\setfont\ssecsl{sl12 scaled \magstephalf} -\setfont\ssectt{tt12 scaled \magstephalf} -\setfont\ssecsf{ss12 scaled \magstephalf} -\setfont\ssecbf{bx12 scaled \magstephalf} -\setfont\ssecsc{csc10 scaled \magstep1} -\font\sseci=cmmi12 scaled \magstephalf -\font\ssecsy=cmsy10 scaled \magstep1 -% The smallcaps and symbol fonts should actually be scaled \magstep1.5, -% but that is not a standard magnification. - -% Fonts for title page: -\setfont\titlerm{bx12 scaled \magstep3} -\let\authorrm = \secrm - -% In order for the font changes to affect most math symbols and letters, -% we have to define the \textfont of the standard families. Since -% texinfo doesn't allow for producing subscripts and superscripts, we -% don't bother to reset \scriptfont and \scriptscriptfont (which would -% also require loading a lot more fonts). -% -\def\resetmathfonts{% - \textfont0 = \tenrm \textfont1 = \teni \textfont2 = \tensy - \textfont\itfam = \tenit \textfont\slfam = \tensl \textfont\bffam = \tenbf - \textfont\ttfam = \tentt \textfont\sffam = \tensf -} - - -% The font-changing commands redefine the meanings of \tenSTYLE, instead -% of just \STYLE. We do this so that font changes will continue to work -% in math mode, where it is the current \fam that is relevant in most -% cases, not the current. Plain TeX does, for example, -% \def\bf{\fam=\bffam \tenbf} By redefining \tenbf, we obviate the need -% to redefine \bf itself. -\def\textfonts{% - \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl - \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc - \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy - \resetmathfonts} -\def\chapfonts{% - \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl - \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc - \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy - \resetmathfonts} -\def\secfonts{% - \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl - \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc - \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy - \resetmathfonts} -\def\subsecfonts{% - \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl - \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc - \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy - \resetmathfonts} -\def\indexfonts{% - \let\tenrm=\indrm \let\tenit=\indit \let\tensl=\indsl - \let\tenbf=\indbf \let\tentt=\indtt \let\smallcaps=\indsc - \let\tensf=\indsf \let\teni=\indi \let\tensy=\indsy - \resetmathfonts} - -% Set up the default fonts, so we can use them for creating boxes. -% -\textfonts - -% Count depth in font-changes, for error checks -\newcount\fontdepth \fontdepth=0 - -% Fonts for short table of contents. -\setfont\shortcontrm{r12} -\setfont\shortcontbf{bx12} -\setfont\shortcontsl{sl12} - -%% Add scribe-like font environments, plus @l for inline lisp (usually sans -%% serif) and @ii for TeX italic - -% \smartitalic{ARG} outputs arg in italics, followed by an italic correction -% unless the following character is such as not to need one. -\def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else\/\fi\fi\fi} -\def\smartitalic#1{{\sl #1}\futurelet\next\smartitalicx} - -\let\i=\smartitalic -\let\var=\smartitalic -\let\dfn=\smartitalic -\let\emph=\smartitalic -\let\cite=\smartitalic - -\def\b#1{{\bf #1}} -\let\strong=\b - -% We can't just use \exhyphenpenalty, because that only has effect at -% the end of a paragraph. Restore normal hyphenation at the end of the -% group within which \nohyphenation is presumably called. -% -\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} -\def\restorehyphenation{\hyphenchar\font = `- } - -\def\t#1{% - {\tt \nohyphenation \rawbackslash \frenchspacing #1}% - \null -} -\let\ttfont = \t -%\def\samp #1{`{\tt \rawbackslash \frenchspacing #1}'\null} -\def\samp #1{`\tclose{#1}'\null} -\def\key #1{{\tt \nohyphenation \uppercase{#1}}\null} -\def\ctrl #1{{\tt \rawbackslash \hat}#1} - -\let\file=\samp - -% @code is a modification of @t, -% which makes spaces the same size as normal in the surrounding text. -\def\tclose#1{% - {% - % Change normal interword space to be same as for the current font. - \spaceskip = \fontdimen2\font - % - % Switch to typewriter. - \tt - % - % But `\ ' produces the large typewriter interword space. - \def\ {{\spaceskip = 0pt{} }}% - % - % Turn off hyphenation. - \nohyphenation - % - \rawbackslash - \frenchspacing - #1% - }% - \null -} - -% We *must* turn on hyphenation at `-' and `_' in \code. -% Otherwise, it is too hard to avoid overful hboxes -% in the Emacs manual, the Library manual, etc. - -% Unfortunately, TeX uses one parameter (\hyphenchar) to control -% both hyphenation at - and hyphenation within words. -% We must therefore turn them both off (\tclose does that) -% and arrange explicitly to hyphenate an a dash. -% -- rms. -{ -\catcode`\-=\active -\catcode`\_=\active -\global\def\code{\begingroup \catcode`\-=\active \let-\codedash \catcode`\_=\active \let_\codeunder \codex} -% The following is used by \doprintindex to insure that long function names -% wrap around. It is necessary for - and _ to be active before the index is -% read from the file, as \entry parses the arguments long before \code is -% ever called. -- mycroft -\global\def\indexbreaks{\catcode`\-=\active \let-\realdash \catcode`\_=\active \let_\realunder} -} -\def\realdash{-} -\def\realunder{_} -\def\codedash{-\discretionary{}{}{}} -\def\codeunder{\normalunderscore\discretionary{}{}{}} -\def\codex #1{\tclose{#1}\endgroup} - -%\let\exp=\tclose %Was temporary - -% @kbd is like @code, except that if the argument is just one @key command, -% then @kbd has no effect. - -\def\xkey{\key} -\def\kbdfoo#1#2#3\par{\def\one{#1}\def\three{#3}\def\threex{??}% -\ifx\one\xkey\ifx\threex\three \key{#2}% -\else\tclose{\look}\fi -\else\tclose{\look}\fi} - -% Typeset a dimension, e.g., `in' or `pt'. The only reason for the -% argument is to make the input look right: @dmn{pt} instead of -% @dmn{}pt. -% -\def\dmn#1{\thinspace #1} - -\def\kbd#1{\def\look{#1}\expandafter\kbdfoo\look??\par} - -\def\l#1{{\li #1}\null} % - -\def\r#1{{\rm #1}} % roman font -% Use of \lowercase was suggested. -\def\sc#1{{\smallcaps#1}} % smallcaps font -\def\ii#1{{\it #1}} % italic font - -\message{page headings,} - -\newskip\titlepagetopglue \titlepagetopglue = 1.5in -\newskip\titlepagebottomglue \titlepagebottomglue = 2pc - -% First the title page. Must do @settitle before @titlepage. -\def\titlefont#1{{\titlerm #1}} - -\newif\ifseenauthor -\newif\iffinishedtitlepage - -\def\shorttitlepage{\parsearg\shorttitlepagezzz} -\def\shorttitlepagezzz #1{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}% - \endgroup\page\hbox{}\page} - -\def\titlepage{\begingroup \parindent=0pt \textfonts - \let\subtitlerm=\tenrm -% I deinstalled the following change because \cmr12 is undefined. -% This change was not in the ChangeLog anyway. --rms. -% \let\subtitlerm=\cmr12 - \def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}% - % - \def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines}% - % - % Leave some space at the very top of the page. - \vglue\titlepagetopglue - % - % Now you can print the title using @title. - \def\title{\parsearg\titlezzz}% - \def\titlezzz##1{\leftline{\titlefont{##1}} - % print a rule at the page bottom also. - \finishedtitlepagefalse - \vskip4pt \hrule height 4pt width \hsize \vskip4pt}% - % No rule at page bottom unless we print one at the top with @title. - \finishedtitlepagetrue - % - % Now you can put text using @subtitle. - \def\subtitle{\parsearg\subtitlezzz}% - \def\subtitlezzz##1{{\subtitlefont \rightline{##1}}}% - % - % @author should come last, but may come many times. - \def\author{\parsearg\authorzzz}% - \def\authorzzz##1{\ifseenauthor\else\vskip 0pt plus 1filll\seenauthortrue\fi - {\authorfont \leftline{##1}}}% - % - % Most title ``pages'' are actually two pages long, with space - % at the top of the second. We don't want the ragged left on the second. - \let\oldpage = \page - \def\page{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - \oldpage - \let\page = \oldpage - \hbox{}}% -% \def\page{\oldpage \hbox{}} -} - -\def\Etitlepage{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - % It is important to do the page break before ending the group, - % because the headline and footline are only empty inside the group. - % If we use the new definition of \page, we always get a blank page - % after the title page, which we certainly don't want. - \oldpage - \endgroup - \HEADINGSon -} - -\def\finishtitlepage{% - \vskip4pt \hrule height 2pt width \hsize - \vskip\titlepagebottomglue - \finishedtitlepagetrue -} - -%%% Set up page headings and footings. - -\let\thispage=\folio - -\newtoks \evenheadline % Token sequence for heading line of even pages -\newtoks \oddheadline % Token sequence for heading line of odd pages -\newtoks \evenfootline % Token sequence for footing line of even pages -\newtoks \oddfootline % Token sequence for footing line of odd pages - -% Now make Tex use those variables -\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline - \else \the\evenheadline \fi}} -\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline - \else \the\evenfootline \fi}\HEADINGShook} -\let\HEADINGShook=\relax - -% Commands to set those variables. -% For example, this is what @headings on does -% @evenheading @thistitle|@thispage|@thischapter -% @oddheading @thischapter|@thispage|@thistitle -% @evenfooting @thisfile|| -% @oddfooting ||@thisfile - -\def\evenheading{\parsearg\evenheadingxxx} -\def\oddheading{\parsearg\oddheadingxxx} -\def\everyheading{\parsearg\everyheadingxxx} - -\def\evenfooting{\parsearg\evenfootingxxx} -\def\oddfooting{\parsearg\oddfootingxxx} -\def\everyfooting{\parsearg\everyfootingxxx} - -{\catcode`\@=0 % - -\gdef\evenheadingxxx #1{\evenheadingyyy #1@|@|@|@|\finish} -\gdef\evenheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddheadingxxx #1{\oddheadingyyy #1@|@|@|@|\finish} -\gdef\oddheadingyyy #1@|#2@|#3@|#4\finish{% -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyheadingxxx #1{\everyheadingyyy #1@|@|@|@|\finish} -\gdef\everyheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\evenfootingxxx #1{\evenfootingyyy #1@|@|@|@|\finish} -\gdef\evenfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddfootingxxx #1{\oddfootingyyy #1@|@|@|@|\finish} -\gdef\oddfootingyyy #1@|#2@|#3@|#4\finish{% -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyfootingxxx #1{\everyfootingyyy #1@|@|@|@|\finish} -\gdef\everyfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} -% -}% unbind the catcode of @. - -% @headings double turns headings on for double-sided printing. -% @headings single turns headings on for single-sided printing. -% @headings off turns them off. -% @headings on same as @headings double, retained for compatibility. -% @headings after turns on double-sided headings after this page. -% @headings doubleafter turns on double-sided headings after this page. -% @headings singleafter turns on single-sided headings after this page. -% By default, they are off. - -\def\headings #1 {\csname HEADINGS#1\endcsname} - -\def\HEADINGSoff{ -\global\evenheadline={\hfil} \global\evenfootline={\hfil} -\global\oddheadline={\hfil} \global\oddfootline={\hfil}} -\HEADINGSoff -% When we turn headings on, set the page number to 1. -% For double-sided printing, put current file name in lower left corner, -% chapter name on inside top of right hand pages, document -% title on inside top of left hand pages, and page numbers on outside top -% edge of all pages. -\def\HEADINGSdouble{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -% For single-sided printing, chapter title goes across top left of page, -% page number on top right. -\def\HEADINGSsingle{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -\def\HEADINGSon{\HEADINGSdouble} - -\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} -\let\HEADINGSdoubleafter=\HEADINGSafter -\def\HEADINGSdoublex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} -\def\HEADINGSsinglex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -% Subroutines used in generating headings -% Produces Day Month Year style of output. -\def\today{\number\day\space -\ifcase\month\or -January\or February\or March\or April\or May\or June\or -July\or August\or September\or October\or November\or December\fi -\space\number\year} - -% Use this if you want the Month Day, Year style of output. -%\def\today{\ifcase\month\or -%January\or February\or March\or April\or May\or June\or -%July\or August\or September\or October\or November\or December\fi -%\space\number\day, \number\year} - -% @settitle line... specifies the title of the document, for headings -% It generates no output of its own - -\def\thistitle{No Title} -\def\settitle{\parsearg\settitlezzz} -\def\settitlezzz #1{\gdef\thistitle{#1}} - -\message{tables,} - -% @tabs -- simple alignment - -% These don't work. For one thing, \+ is defined as outer. -% So these macros cannot even be defined. - -%\def\tabs{\parsearg\tabszzz} -%\def\tabszzz #1{\settabs\+#1\cr} -%\def\tabline{\parsearg\tablinezzz} -%\def\tablinezzz #1{\+#1\cr} -%\def\&{&} - -% Tables -- @table, @ftable, @vtable, @item(x), @kitem(x), @xitem(x). - -% default indentation of table text -\newdimen\tableindent \tableindent=.8in -% default indentation of @itemize and @enumerate text -\newdimen\itemindent \itemindent=.3in -% margin between end of table item and start of table text. -\newdimen\itemmargin \itemmargin=.1in - -% used internally for \itemindent minus \itemmargin -\newdimen\itemmax - -% Note @table, @vtable, and @vtable define @item, @itemx, etc., with -% these defs. -% They also define \itemindex -% to index the item name in whatever manner is desired (perhaps none). - -\newif\ifitemxneedsnegativevskip - -\def\itemxpar{\par\ifitemxneedsnegativevskip\vskip-\parskip\nobreak\fi} - -\def\internalBitem{\smallbreak \parsearg\itemzzz} -\def\internalBitemx{\itemxpar \parsearg\itemzzz} - -\def\internalBxitem "#1"{\def\xitemsubtopix{#1} \smallbreak \parsearg\xitemzzz} -\def\internalBxitemx "#1"{\def\xitemsubtopix{#1} \itemxpar \parsearg\xitemzzz} - -\def\internalBkitem{\smallbreak \parsearg\kitemzzz} -\def\internalBkitemx{\itemxpar \parsearg\kitemzzz} - -\def\kitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \lastfunction}}% - \itemzzz {#1}} - -\def\xitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \xitemsubtopic}}% - \itemzzz {#1}} - -\def\itemzzz #1{\begingroup % - \advance\hsize by -\rightskip - \advance\hsize by -\tableindent - \setbox0=\hbox{\itemfont{#1}}% - \itemindex{#1}% - \nobreak % This prevents a break before @itemx. - % - % Be sure we are not still in the middle of a paragraph. - %{\parskip = 0in - %\par - %}% - % - % If the item text does not fit in the space we have, put it on a line - % by itself, and do not allow a page break either before or after that - % line. We do not start a paragraph here because then if the next - % command is, e.g., @kindex, the whatsit would get put into the - % horizontal list on a line by itself, resulting in extra blank space. - \ifdim \wd0>\itemmax - % - % Make this a paragraph so we get the \parskip glue and wrapping, - % but leave it ragged-right. - \begingroup - \advance\leftskip by-\tableindent - \advance\hsize by\tableindent - \advance\rightskip by0pt plus1fil - \leavevmode\unhbox0\par - \endgroup - % - % We're going to be starting a paragraph, but we don't want the - % \parskip glue -- logically it's part of the @item we just started. - \nobreak \vskip-\parskip - % - % Stop a page break at the \parskip glue coming up. Unfortunately - % we can't prevent a possible page break at the following - % \baselineskip glue. - \nobreak - \endgroup - \itemxneedsnegativevskipfalse - \else - % The item text fits into the space. Start a paragraph, so that the - % following text (if any) will end up on the same line. Since that - % text will be indented by \tableindent, we make the item text be in - % a zero-width box. - \noindent - \rlap{\hskip -\tableindent\box0}\ignorespaces% - \endgroup% - \itemxneedsnegativevskiptrue% - \fi -} - -\def\item{\errmessage{@item while not in a table}} -\def\itemx{\errmessage{@itemx while not in a table}} -\def\kitem{\errmessage{@kitem while not in a table}} -\def\kitemx{\errmessage{@kitemx while not in a table}} -\def\xitem{\errmessage{@xitem while not in a table}} -\def\xitemx{\errmessage{@xitemx while not in a table}} - -%% Contains a kludge to get @end[description] to work -\def\description{\tablez{\dontindex}{1}{}{}{}{}} - -\def\table{\begingroup\inENV\obeylines\obeyspaces\tablex} -{\obeylines\obeyspaces% -\gdef\tablex #1^^M{% -\tabley\dontindex#1 \endtabley}} - -\def\ftable{\begingroup\inENV\obeylines\obeyspaces\ftablex} -{\obeylines\obeyspaces% -\gdef\ftablex #1^^M{% -\tabley\fnitemindex#1 \endtabley -\def\Eftable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\vtable{\begingroup\inENV\obeylines\obeyspaces\vtablex} -{\obeylines\obeyspaces% -\gdef\vtablex #1^^M{% -\tabley\vritemindex#1 \endtabley -\def\Evtable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\dontindex #1{} -\def\fnitemindex #1{\doind {fn}{\code{#1}}}% -\def\vritemindex #1{\doind {vr}{\code{#1}}}% - -{\obeyspaces % -\gdef\tabley#1#2 #3 #4 #5 #6 #7\endtabley{\endgroup% -\tablez{#1}{#2}{#3}{#4}{#5}{#6}}} - -\def\tablez #1#2#3#4#5#6{% -\aboveenvbreak % -\begingroup % -\def\Edescription{\Etable}% Neccessary kludge. -\let\itemindex=#1% -\ifnum 0#3>0 \advance \leftskip by #3\mil \fi % -\ifnum 0#4>0 \tableindent=#4\mil \fi % -\ifnum 0#5>0 \advance \rightskip by #5\mil \fi % -\def\itemfont{#2}% -\itemmax=\tableindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \tableindent % -\exdentamount=\tableindent -\parindent = 0pt -\parskip = \smallskipamount -\ifdim \parskip=0pt \parskip=2pt \fi% -\def\Etable{\endgraf\afterenvbreak\endgroup}% -\let\item = \internalBitem % -\let\itemx = \internalBitemx % -\let\kitem = \internalBkitem % -\let\kitemx = \internalBkitemx % -\let\xitem = \internalBxitem % -\let\xitemx = \internalBxitemx % -} - -% This is the counter used by @enumerate, which is really @itemize - -\newcount \itemno - -\def\itemize{\parsearg\itemizezzz} - -\def\itemizezzz #1{% - \begingroup % ended by the @end itemsize - \itemizey {#1}{\Eitemize} -} - -\def\itemizey #1#2{% -\aboveenvbreak % -\itemmax=\itemindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \itemindent % -\exdentamount=\itemindent -\parindent = 0pt % -\parskip = \smallskipamount % -\ifdim \parskip=0pt \parskip=2pt \fi% -\def#2{\endgraf\afterenvbreak\endgroup}% -\def\itemcontents{#1}% -\let\item=\itemizeitem} - -% Set sfcode to normal for the chars that usually have another value. -% These are `.?!:;,' -\def\frenchspacing{\sfcode46=1000 \sfcode63=1000 \sfcode33=1000 - \sfcode58=1000 \sfcode59=1000 \sfcode44=1000 } - -% \splitoff TOKENS\endmark defines \first to be the first token in -% TOKENS, and \rest to be the remainder. -% -\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% - -% Allow an optional argument of an uppercase letter, lowercase letter, -% or number, to specify the first label in the enumerated list. No -% argument is the same as `1'. -% -\def\enumerate{\parsearg\enumeratezzz} -\def\enumeratezzz #1{\enumeratey #1 \endenumeratey} -\def\enumeratey #1 #2\endenumeratey{% - \begingroup % ended by the @end enumerate - % - % If we were given no argument, pretend we were given `1'. - \def\thearg{#1}% - \ifx\thearg\empty \def\thearg{1}\fi - % - % Detect if the argument is a single token. If so, it might be a - % letter. Otherwise, the only valid thing it can be is a number. - % (We will always have one token, because of the test we just made. - % This is a good thing, since \splitoff doesn't work given nothing at - % all -- the first parameter is undelimited.) - \expandafter\splitoff\thearg\endmark - \ifx\rest\empty - % Only one token in the argument. It could still be anything. - % A ``lowercase letter'' is one whose \lccode is nonzero. - % An ``uppercase letter'' is one whose \lccode is both nonzero, and - % not equal to itself. - % Otherwise, we assume it's a number. - % - % We need the \relax at the end of the \ifnum lines to stop TeX from - % continuing to look for a . - % - \ifnum\lccode\expandafter`\thearg=0\relax - \numericenumerate % a number (we hope) - \else - % It's a letter. - \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax - \lowercaseenumerate % lowercase letter - \else - \uppercaseenumerate % uppercase letter - \fi - \fi - \else - % Multiple tokens in the argument. We hope it's a number. - \numericenumerate - \fi -} - -% An @enumerate whose labels are integers. The starting integer is -% given in \thearg. -% -\def\numericenumerate{% - \itemno = \thearg - \startenumeration{\the\itemno}% -} - -% The starting (lowercase) letter is in \thearg. -\def\lowercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more lowercase letters in @enumerate; get a bigger - alphabet}% - \fi - \char\lccode\itemno - }% -} - -% The starting (uppercase) letter is in \thearg. -\def\uppercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more uppercase letters in @enumerate; get a bigger - alphabet} - \fi - \char\uccode\itemno - }% -} - -% Call itemizey, adding a period to the first argument and supplying the -% common last two arguments. Also subtract one from the initial value in -% \itemno, since @item increments \itemno. -% -\def\startenumeration#1{% - \advance\itemno by -1 - \itemizey{#1.}\Eenumerate\flushcr -} - -% @alphaenumerate and @capsenumerate are abbreviations for giving an arg -% to @enumerate. -% -\def\alphaenumerate{\enumerate{a}} -\def\capsenumerate{\enumerate{A}} -\def\Ealphaenumerate{\Eenumerate} -\def\Ecapsenumerate{\Eenumerate} - -% Definition of @item while inside @itemize. - -\def\itemizeitem{% -\advance\itemno by 1 -{\let\par=\endgraf \smallbreak}% -\ifhmode \errmessage{\in hmode at itemizeitem}\fi -{\parskip=0in \hskip 0pt -\hbox to 0pt{\hss \itemcontents\hskip \itemmargin}% -\vadjust{\penalty 1200}}% -\flushcr} - -% @multitable macros -% Amy Hendrickson, 8/18/94 -% -% @multitable ... @endmultitable will make as many columns as desired. -% Contents of each column will wrap at width given in preamble. Width -% can be specified either with sample text given in a template line, -% or in percent of \hsize, the current width of text on page. - -% Table can continue over pages but will only break between lines. - -% To make preamble: -% -% Either define widths of columns in terms of percent of \hsize: -% @multitable @percentofhsize .2 .3 .5 -% @item ... -% -% Numbers following @percentofhsize are the percent of the total -% current hsize to be used for each column. You may use as many -% columns as desired. - -% Or use a template: -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item ... -% using the widest term desired in each column. - - -% Each new table line starts with @item, each subsequent new column -% starts with @tab. Empty columns may be produced by supplying @tab's -% with nothing between them for as many times as empty columns are needed, -% ie, @tab@tab@tab will produce two empty columns. - -% @item, @tab, @multicolumn or @endmulticolumn do not need to be on their -% own lines, but it will not hurt if they are. - -% Sample multitable: - -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item first col stuff @tab second col stuff @tab third col -% @item -% first col stuff -% @tab -% second col stuff -% @tab -% third col -% @item first col stuff @tab second col stuff -% @tab Many paragraphs of text may be used in any column. -% -% They will wrap at the width determined by the template. -% @item@tab@tab This will be in third column. -% @endmultitable - -% Default dimensions may be reset by user. -% @intableparskip will set vertical space between paragraphs in table. -% @intableparindent will set paragraph indent in table. -% @spacebetweencols will set horizontal space to be left between columns. -% @spacebetweenlines will set vertical space to be left between lines. - -%%%% -% Dimensions - -\newdimen\intableparskip -\newdimen\intableparindent -\newdimen\spacebetweencols -\newdimen\spacebetweenlines -\intableparskip=0pt -\intableparindent=6pt -\spacebetweencols=12pt -\spacebetweenlines=12pt - -%%%% -% Macros used to set up halign preamble: -\let\endsetuptable\relax -\def\xendsetuptable{\endsetuptable} -\let\percentofhsize\relax -\def\xpercentofhsize{\percentofhsize} -\newif\ifsetpercent - -\newcount\colcount -\def\setuptable#1{\def\firstarg{#1}% -\ifx\firstarg\xendsetuptable\let\go\relax% -\else - \ifx\firstarg\xpercentofhsize\global\setpercenttrue% - \else - \ifsetpercent - \if#1.\else% - \global\advance\colcount by1 % - \expandafter\xdef\csname col\the\colcount\endcsname{.#1\hsize}% - \fi - \else - \global\advance\colcount by1 - \setbox0=\hbox{#1}% - \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% - \fi% - \fi% - \let\go\setuptable% -\fi\go} -%%%% -% multitable syntax -\def\tab{&} - -%%%% -% @multitable ... @endmultitable definitions: - -\def\multitable#1\item{\bgroup -\let\item\cr -\tolerance=9500 -\hbadness=9500 -\parskip=\intableparskip -\parindent=\intableparindent -\overfullrule=0pt -\global\colcount=0\relax% -\def\Emultitable{\global\setpercentfalse\global\everycr{}\cr\egroup\egroup}% - % To parse everything between @multitable and @item : -\def\one{#1}\expandafter\setuptable\one\endsetuptable - % Need to reset this to 0 after \setuptable. -\global\colcount=0\relax% - % - % This preamble sets up a generic column definition, which will - % be used as many times as user calls for columns. - % \vtop will set a single line and will also let text wrap and - % continue for many paragraphs if desired. -\halign\bgroup&\global\advance\colcount by 1\relax% -\vtop{\hsize=\expandafter\csname col\the\colcount\endcsname - % In order to keep entries from bumping into each other - % we will add a \leftskip of \spacebetweencols to all columns after - % the first one. - % If a template has been used, we will add \spacebetweencols - % to the width of each template entry. - % If user has set preamble in terms of percent of \hsize - % we will use that dimension as the width of the column, and - % the \leftskip will keep entries from bumping into each other. - % Table will start at left margin and final column will justify at - % right margin. -\ifnum\colcount=1 -\else - \ifsetpercent - \else - % If user has set preamble in terms of percent of \hsize - % we will advance \hsize by \spacebetweencols - \advance\hsize by \spacebetweencols - \fi - % In either case we will make \leftskip=\spacebetweencols: -\leftskip=\spacebetweencols -\fi -\noindent##}\cr% - % \everycr will reset column counter, \colcount, at the end of - % each line. Every column entry will cause \colcount to advance by one. - % The table preamble - % looks at the current \colcount to find the correct column width. -\global\everycr{\noalign{\nointerlineskip\vskip\spacebetweenlines -\filbreak%% keeps underfull box messages off when table breaks over pages. -\global\colcount=0\relax}}} - -\message{indexing,} -% Index generation facilities - -% Define \newwrite to be identical to plain tex's \newwrite -% except not \outer, so it can be used within \newindex. -{\catcode`\@=11 -\gdef\newwrite{\alloc@7\write\chardef\sixt@@n}} - -% \newindex {foo} defines an index named foo. -% It automatically defines \fooindex such that -% \fooindex ...rest of line... puts an entry in the index foo. -% It also defines \fooindfile to be the number of the output channel for -% the file that accumulates this index. The file's extension is foo. -% The name of an index should be no more than 2 characters long -% for the sake of vms. - -\def\newindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#1}} -} - -% @defindex foo == \newindex{foo} - -\def\defindex{\parsearg\newindex} - -% Define @defcodeindex, like @defindex except put all entries in @code. - -\def\newcodeindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#1}} -} - -\def\defcodeindex{\parsearg\newcodeindex} - -% @synindex foo bar makes index foo feed into index bar. -% Do this instead of @defindex foo if you don't want it as a separate index. -\def\synindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#2}}% -} - -% @syncodeindex foo bar similar, but put all entries made for index foo -% inside @code. -\def\syncodeindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#2}}% -} - -% Define \doindex, the driver for all \fooindex macros. -% Argument #1 is generated by the calling \fooindex macro, -% and it is "foo", the name of the index. - -% \doindex just uses \parsearg; it calls \doind for the actual work. -% This is because \doind is more useful to call from other macros. - -% There is also \dosubind {index}{topic}{subtopic} -% which makes an entry in a two-level index such as the operation index. - -\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer} -\def\singleindexer #1{\doind{\indexname}{#1}} - -% like the previous two, but they put @code around the argument. -\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer} -\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} - -\def\indexdummies{% -% Take care of the plain tex accent commands. -\def\"{\realbackslash "}% -\def\`{\realbackslash `}% -\def\'{\realbackslash '}% -\def\^{\realbackslash ^}% -\def\~{\realbackslash ~}% -\def\={\realbackslash =}% -\def\b{\realbackslash b}% -\def\c{\realbackslash c}% -\def\d{\realbackslash d}% -\def\u{\realbackslash u}% -\def\v{\realbackslash v}% -\def\H{\realbackslash H}% -% Take care of the plain tex special European modified letters. -\def\oe{\realbackslash oe}% -\def\ae{\realbackslash ae}% -\def\aa{\realbackslash aa}% -\def\OE{\realbackslash OE}% -\def\AE{\realbackslash AE}% -\def\AA{\realbackslash AA}% -\def\o{\realbackslash o}% -\def\O{\realbackslash O}% -\def\l{\realbackslash l}% -\def\L{\realbackslash L}% -\def\ss{\realbackslash ss}% -% Take care of texinfo commands likely to appear in an index entry. -\def\_{{\realbackslash _}}% -\def\w{\realbackslash w }% -\def\bf{\realbackslash bf }% -\def\rm{\realbackslash rm }% -\def\sl{\realbackslash sl }% -\def\sf{\realbackslash sf}% -\def\tt{\realbackslash tt}% -\def\gtr{\realbackslash gtr}% -\def\less{\realbackslash less}% -\def\hat{\realbackslash hat}% -\def\char{\realbackslash char}% -\def\TeX{\realbackslash TeX}% -\def\dots{\realbackslash dots }% -\def\copyright{\realbackslash copyright }% -\def\tclose##1{\realbackslash tclose {##1}}% -\def\code##1{\realbackslash code {##1}}% -\def\samp##1{\realbackslash samp {##1}}% -\def\t##1{\realbackslash r {##1}}% -\def\r##1{\realbackslash r {##1}}% -\def\i##1{\realbackslash i {##1}}% -\def\b##1{\realbackslash b {##1}}% -\def\cite##1{\realbackslash cite {##1}}% -\def\key##1{\realbackslash key {##1}}% -\def\file##1{\realbackslash file {##1}}% -\def\var##1{\realbackslash var {##1}}% -\def\kbd##1{\realbackslash kbd {##1}}% -\def\dfn##1{\realbackslash dfn {##1}}% -\def\emph##1{\realbackslash emph {##1}}% -} - -% \indexnofonts no-ops all font-change commands. -% This is used when outputting the strings to sort the index by. -\def\indexdummyfont#1{#1} -\def\indexdummytex{TeX} -\def\indexdummydots{...} - -\def\indexnofonts{% -% Just ignore accents. -\let\"=\indexdummyfont -\let\`=\indexdummyfont -\let\'=\indexdummyfont -\let\^=\indexdummyfont -\let\~=\indexdummyfont -\let\==\indexdummyfont -\let\b=\indexdummyfont -\let\c=\indexdummyfont -\let\d=\indexdummyfont -\let\u=\indexdummyfont -\let\v=\indexdummyfont -\let\H=\indexdummyfont -% Take care of the plain tex special European modified letters. -\def\oe{oe}% -\def\ae{ae}% -\def\aa{aa}% -\def\OE{OE}% -\def\AE{AE}% -\def\AA{AA}% -\def\o{o}% -\def\O{O}% -\def\l{l}% -\def\L{L}% -\def\ss{ss}% -\let\w=\indexdummyfont -\let\t=\indexdummyfont -\let\r=\indexdummyfont -\let\i=\indexdummyfont -\let\b=\indexdummyfont -\let\emph=\indexdummyfont -\let\strong=\indexdummyfont -\let\cite=\indexdummyfont -\let\sc=\indexdummyfont -%Don't no-op \tt, since it isn't a user-level command -% and is used in the definitions of the active chars like <, >, |... -%\let\tt=\indexdummyfont -\let\tclose=\indexdummyfont -\let\code=\indexdummyfont -\let\file=\indexdummyfont -\let\samp=\indexdummyfont -\let\kbd=\indexdummyfont -\let\key=\indexdummyfont -\let\var=\indexdummyfont -\let\TeX=\indexdummytex -\let\dots=\indexdummydots -} - -% To define \realbackslash, we must make \ not be an escape. -% We must first make another character (@) an escape -% so we do not become unable to do a definition. - -{\catcode`\@=0 \catcode`\\=\other -@gdef@realbackslash{\}} - -\let\indexbackslash=0 %overridden during \printindex. - -\let\SETmarginindex=\relax %initialize! -% workhorse for all \fooindexes -% #1 is name of index, #2 is stuff to put there -\def\doind #1#2{% -% Put the index entry in the margin if desired. -\ifx\SETmarginindex\relax\else% -\insert\margin{\hbox{\vrule height8pt depth3pt width0pt #2}}% -\fi% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% Expand all macros now EXCEPT \folio -\def\rawbackslashxx{\indexbackslash}% \indexbackslash isn't defined now -% so it will be output as is; and it will print as backslash in the indx. -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}}}% -\temp }% -}\penalty\count10}} - -\def\dosubind #1#2#3{% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% -\def\rawbackslashxx{\indexbackslash}% -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2 #3}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}{#3}}}% -\temp }% -}\penalty\count10}} - -% The index entry written in the file actually looks like -% \entry {sortstring}{page}{topic} -% or -% \entry {sortstring}{page}{topic}{subtopic} -% The texindex program reads in these files and writes files -% containing these kinds of lines: -% \initial {c} -% before the first topic whose initial is c -% \entry {topic}{pagelist} -% for a topic that is used without subtopics -% \primary {topic} -% for the beginning of a topic that is used with subtopics -% \secondary {subtopic}{pagelist} -% for each subtopic. - -% Define the user-accessible indexing commands -% @findex, @vindex, @kindex, @cindex. - -\def\findex {\fnindex} -\def\kindex {\kyindex} -\def\cindex {\cpindex} -\def\vindex {\vrindex} -\def\tindex {\tpindex} -\def\pindex {\pgindex} - -\def\cindexsub {\begingroup\obeylines\cindexsub} -{\obeylines % -\gdef\cindexsub "#1" #2^^M{\endgroup % -\dosubind{cp}{#2}{#1}}} - -% Define the macros used in formatting output of the sorted index material. - -% This is what you call to cause a particular index to get printed. -% Write -% @unnumbered Function Index -% @printindex fn - -\def\printindex{\parsearg\doprintindex} - -\def\doprintindex#1{% - \tex - \dobreak \chapheadingskip {10000} - \catcode`\%=\other\catcode`\&=\other\catcode`\#=\other - \catcode`\$=\other - \catcode`\~=\other - \indexbreaks - % - % The following don't help, since the chars were translated - % when the raw index was written, and their fonts were discarded - % due to \indexnofonts. - %\catcode`\"=\active - %\catcode`\^=\active - %\catcode`\_=\active - %\catcode`\|=\active - %\catcode`\<=\active - %\catcode`\>=\active - % % - \def\indexbackslash{\rawbackslashxx} - \indexfonts\rm \tolerance=9500 \advance\baselineskip -1pt - \begindoublecolumns - % - % See if the index file exists and is nonempty. - \openin 1 \jobname.#1s - \ifeof 1 - % \enddoublecolumns gets confused if there is no text in the index, - % and it loses the chapter title and the aux file entries for the - % index. The easiest way to prevent this problem is to make sure - % there is some text. - (Index is nonexistent) - \else - % - % If the index file exists but is empty, then \openin leaves \ifeof - % false. We have to make TeX try to read something from the file, so - % it can discover if there is anything in it. - \read 1 to \temp - \ifeof 1 - (Index is empty) - \else - \input \jobname.#1s - \fi - \fi - \closein 1 - \enddoublecolumns - \Etex -} - -% These macros are used by the sorted index file itself. -% Change them to control the appearance of the index. - -% Same as \bigskipamount except no shrink. -% \balancecolumns gets confused if there is any shrink. -\newskip\initialskipamount \initialskipamount 12pt plus4pt - -\def\initial #1{% -{\let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt -\ifdim\lastskip<\initialskipamount -\removelastskip \penalty-200 \vskip \initialskipamount\fi -\line{\secbf#1\hfill}\kern 2pt\penalty10000}} - -% This typesets a paragraph consisting of #1, dot leaders, and then #2 -% flush to the right margin. It is used for index and table of contents -% entries. The paragraph is indented by \leftskip. -% -\def\entry #1#2{\begingroup - % - % Start a new paragraph if necessary, so our assignments below can't - % affect previous text. - \par - % - % Do not fill out the last line with white space. - \parfillskip = 0in - % - % No extra space above this paragraph. - \parskip = 0in - % - % Do not prefer a separate line ending with a hyphen to fewer lines. - \finalhyphendemerits = 0 - % - % \hangindent is only relevant when the entry text and page number - % don't both fit on one line. In that case, bob suggests starting the - % dots pretty far over on the line. Unfortunately, a large - % indentation looks wrong when the entry text itself is broken across - % lines. So we use a small indentation and put up with long leaders. - % - % \hangafter is reset to 1 (which is the value we want) at the start - % of each paragraph, so we need not do anything with that. - \hangindent=2em - % - % When the entry text needs to be broken, just fill out the first line - % with blank space. - \rightskip = 0pt plus1fil - % - % Start a ``paragraph'' for the index entry so the line breaking - % parameters we've set above will have an effect. - \noindent - % - % Insert the text of the index entry. TeX will do line-breaking on it. - #1% - % The following is kluged to not output a line of dots in the index if - % there are no page numbers. The next person who breaks this will be - % cursed by a Unix daemon. - \def\tempa{{\rm }}% - \def\tempb{#2}% - \edef\tempc{\tempa}% - \edef\tempd{\tempb}% - \ifx\tempc\tempd\ \else% - % - % If we must, put the page number on a line of its own, and fill out - % this line with blank space. (The \hfil is overwhelmed with the - % fill leaders glue in \indexdotfill if the page number does fit.) - \hfil\penalty50 - \null\nobreak\indexdotfill % Have leaders before the page number. - % - % The `\ ' here is removed by the implicit \unskip that TeX does as - % part of (the primitive) \par. Without it, a spurious underfull - % \hbox ensues. - \ #2% The page number ends the paragraph. - \fi% - \par -\endgroup} - -% Like \dotfill except takes at least 1 em. -\def\indexdotfill{\cleaders - \hbox{$\mathsurround=0pt \mkern1.5mu ${\it .}$ \mkern1.5mu$}\hskip 1em plus 1fill} - -\def\primary #1{\line{#1\hfil}} - -\newskip\secondaryindent \secondaryindent=0.5cm - -\def\secondary #1#2{ -{\parfillskip=0in \parskip=0in -\hangindent =1in \hangafter=1 -\noindent\hskip\secondaryindent\hbox{#1}\indexdotfill #2\par -}} - -%% Define two-column mode, which is used in indexes. -%% Adapted from the TeXbook, page 416. -\catcode `\@=11 - -\newbox\partialpage - -\newdimen\doublecolumnhsize - -\def\begindoublecolumns{\begingroup - % Grab any single-column material above us. - \output = {\global\setbox\partialpage - =\vbox{\unvbox255\kern -\topskip \kern \baselineskip}}% - \eject - % - % Now switch to the double-column output routine. - \output={\doublecolumnout}% - % - % Change the page size parameters. We could do this once outside this - % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 - % format, but then we repeat the same computation. Repeating a couple - % of assignments once per index is clearly meaningless for the - % execution time, so we may as well do it once. - % - % First we halve the line length, less a little for the gutter between - % the columns. We compute the gutter based on the line length, so it - % changes automatically with the paper format. The magic constant - % below is chosen so that the gutter has the same value (well, +- < - % 1pt) as it did when we hard-coded it. - % - % We put the result in a separate register, \doublecolumhsize, so we - % can restore it in \pagesofar, after \hsize itself has (potentially) - % been clobbered. - % - \doublecolumnhsize = \hsize - \advance\doublecolumnhsize by -.04154\hsize - \divide\doublecolumnhsize by 2 - \hsize = \doublecolumnhsize - % - % Double the \vsize as well. (We don't need a separate register here, - % since nobody clobbers \vsize.) - \vsize = 2\vsize - \doublecolumnpagegoal -} - -\def\enddoublecolumns{\eject \endgroup \pagegoal=\vsize \unvbox\partialpage} - -\def\doublecolumnsplit{\splittopskip=\topskip \splitmaxdepth=\maxdepth - \global\dimen@=\pageheight \global\advance\dimen@ by-\ht\partialpage - \global\setbox1=\vsplit255 to\dimen@ \global\setbox0=\vbox{\unvbox1} - \global\setbox3=\vsplit255 to\dimen@ \global\setbox2=\vbox{\unvbox3} - \ifdim\ht0>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi - \ifdim\ht2>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi -} -\def\doublecolumnpagegoal{% - \dimen@=\vsize \advance\dimen@ by-2\ht\partialpage \global\pagegoal=\dimen@ -} -\def\pagesofar{\unvbox\partialpage % - \hsize=\doublecolumnhsize % have to restore this since output routine - \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}} -\def\doublecolumnout{% - \setbox5=\copy255 - {\vbadness=10000 \doublecolumnsplit} - \ifvbox255 - \setbox0=\vtop to\dimen@{\unvbox0} - \setbox2=\vtop to\dimen@{\unvbox2} - \onepageout\pagesofar \unvbox255 \penalty\outputpenalty - \else - \setbox0=\vbox{\unvbox5} - \ifvbox0 - \dimen@=\ht0 \advance\dimen@ by\topskip \advance\dimen@ by-\baselineskip - \divide\dimen@ by2 \splittopskip=\topskip \splitmaxdepth=\maxdepth - {\vbadness=10000 - \loop \global\setbox5=\copy0 - \setbox1=\vsplit5 to\dimen@ - \setbox3=\vsplit5 to\dimen@ - \ifvbox5 \global\advance\dimen@ by1pt \repeat - \setbox0=\vbox to\dimen@{\unvbox1} - \setbox2=\vbox to\dimen@{\unvbox3} - \global\setbox\partialpage=\vbox{\pagesofar} - \doublecolumnpagegoal - } - \fi - \fi -} - -\catcode `\@=\other -\message{sectioning,} -% Define chapters, sections, etc. - -\newcount \chapno -\newcount \secno \secno=0 -\newcount \subsecno \subsecno=0 -\newcount \subsubsecno \subsubsecno=0 - -% This counter is funny since it counts through charcodes of letters A, B, ... -\newcount \appendixno \appendixno = `\@ -\def\appendixletter{\char\the\appendixno} - -\newwrite \contentsfile -% This is called from \setfilename. -\def\opencontents{\openout \contentsfile = \jobname.toc} - -% Each @chapter defines this as the name of the chapter. -% page headings and footings can use it. @section does likewise - -\def\thischapter{} \def\thissection{} -\def\seccheck#1{\if \pageno<0 % -\errmessage{@#1 not allowed after generating table of contents}\fi -% -} - -\def\chapternofonts{% -\let\rawbackslash=\relax% -\let\frenchspacing=\relax% -\def\result{\realbackslash result} -\def\equiv{\realbackslash equiv} -\def\expansion{\realbackslash expansion} -\def\print{\realbackslash print} -\def\TeX{\realbackslash TeX} -\def\dots{\realbackslash dots} -\def\copyright{\realbackslash copyright} -\def\tt{\realbackslash tt} -\def\bf{\realbackslash bf } -\def\w{\realbackslash w} -\def\less{\realbackslash less} -\def\gtr{\realbackslash gtr} -\def\hat{\realbackslash hat} -\def\char{\realbackslash char} -\def\tclose##1{\realbackslash tclose {##1}} -\def\code##1{\realbackslash code {##1}} -\def\samp##1{\realbackslash samp {##1}} -\def\r##1{\realbackslash r {##1}} -\def\b##1{\realbackslash b {##1}} -\def\key##1{\realbackslash key {##1}} -\def\file##1{\realbackslash file {##1}} -\def\kbd##1{\realbackslash kbd {##1}} -% These are redefined because @smartitalic wouldn't work inside xdef. -\def\i##1{\realbackslash i {##1}} -\def\cite##1{\realbackslash cite {##1}} -\def\var##1{\realbackslash var {##1}} -\def\emph##1{\realbackslash emph {##1}} -\def\dfn##1{\realbackslash dfn {##1}} -} - -\newcount\absseclevel % used to calculate proper heading level -\newcount\secbase\secbase=0 % @raise/lowersections modify this count - -% @raisesections: treat @section as chapter, @subsection as section, etc. -\def\raisesections{\global\advance\secbase by -1} -\let\up=\raisesections % original BFox name - -% @lowersections: treat @chapter as section, @section as subsection, etc. -\def\lowersections{\global\advance\secbase by 1} -\let\down=\lowersections % original BFox name - -% Choose a numbered-heading macro -% #1 is heading level if unmodified by @raisesections or @lowersections -% #2 is text for heading -\def\numhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \chapterzzz{#2} -\or - \seczzz{#2} -\or - \numberedsubseczzz{#2} -\or - \numberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \chapterzzz{#2} - \else - \numberedsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses appendix heading levels -\def\apphead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \appendixzzz{#2} -\or - \appendixsectionzzz{#2} -\or - \appendixsubseczzz{#2} -\or - \appendixsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \appendixzzz{#2} - \else - \appendixsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses numberless heading levels -\def\unnmhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \unnumberedzzz{#2} -\or - \unnumberedseczzz{#2} -\or - \unnumberedsubseczzz{#2} -\or - \unnumberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \unnumberedzzz{#2} - \else - \unnumberedsubsubseczzz{#2} - \fi -\fi -} - - -\def\thischaptername{No Chapter Title} -\outer\def\chapter{\parsearg\chapteryyy} -\def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz -\def\chapterzzz #1{\seccheck{chapter}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \chapno by 1 \message{\putwordChapter \the\chapno}% -\chapmacro {#1}{\the\chapno}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -% We don't substitute the actual chapter name into \thischapter -% because we don't want its macros evaluated now. -\xdef\thischapter{\putwordChapter{} \the\chapno: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry {#1}{\the\chapno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec -}} - -\outer\def\appendix{\parsearg\appendixyyy} -\def\appendixyyy #1{\apphead0{#1}} % normally apphead0 calls appendixzzz -\def\appendixzzz #1{\seccheck{appendix}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \appendixno by 1 \message{Appendix \appendixletter}% -\chapmacro {#1}{\putwordAppendix{} \appendixletter}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -\xdef\thischapter{\putwordAppendix{} \appendixletter: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry - {#1}{\putwordAppendix{} \appendixletter}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\global\let\section = \appendixsec -\global\let\subsection = \appendixsubsec -\global\let\subsubsection = \appendixsubsubsec -}} - -\outer\def\top{\parsearg\unnumberedyyy} -\outer\def\unnumbered{\parsearg\unnumberedyyy} -\def\unnumberedyyy #1{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz -\def\unnumberedzzz #1{\seccheck{unnumbered}% -\secno=0 \subsecno=0 \subsubsecno=0 -% -% This used to be simply \message{#1}, but TeX fully expands the -% argument to \message. Therefore, if #1 contained @-commands, TeX -% expanded them. For example, in `@unnumbered The @cite{Book}', TeX -% expanded @cite (which turns out to cause errors because \cite is meant -% to be executed, not expanded). -% -% Anyway, we don't want the fully-expanded definition of @cite to appear -% as a result of the \message, we just want `@cite' itself. We use -% \the to achieve this: TeX expands \the only once, -% simply yielding the contents of the . -\toks0 = {#1}\message{(\the\toks0)}% -% -\unnumbchapmacro {#1}% -\gdef\thischapter{#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbchapentry {#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\global\let\section = \unnumberedsec -\global\let\subsection = \unnumberedsubsec -\global\let\subsubsection = \unnumberedsubsubsec -}} - -\outer\def\numberedsec{\parsearg\secyyy} -\def\secyyy #1{\numhead1{#1}} % normally calls seczzz -\def\seczzz #1{\seccheck{section}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\the\chapno}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\the\chapno}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appenixsection{\parsearg\appendixsecyyy} -\outer\def\appendixsec{\parsearg\appendixsecyyy} -\def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz -\def\appendixsectionzzz #1{\seccheck{appendixsection}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\appendixletter}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\appendixletter}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsec{\parsearg\unnumberedsecyyy} -\def\unnumberedsecyyy #1{\unnmhead1{#1}} % normally calls unnumberedseczzz -\def\unnumberedseczzz #1{\seccheck{unnumberedsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsec{\parsearg\numberedsubsecyyy} -\def\numberedsubsecyyy #1{\numhead2{#1}} % normally calls numberedsubseczzz -\def\numberedsubseczzz #1{\seccheck{subsection}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\the\chapno}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\the\chapno}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsec{\parsearg\appendixsubsecyyy} -\def\appendixsubsecyyy #1{\apphead2{#1}} % normally calls appendixsubseczzz -\def\appendixsubseczzz #1{\seccheck{appendixsubsec}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\appendixletter}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\appendixletter}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsec{\parsearg\unnumberedsubsecyyy} -\def\unnumberedsubsecyyy #1{\unnmhead2{#1}} %normally calls unnumberedsubseczzz -\def\unnumberedsubseczzz #1{\seccheck{unnumberedsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsubsec{\parsearg\numberedsubsubsecyyy} -\def\numberedsubsubsecyyy #1{\numhead3{#1}} % normally numberedsubsubseczzz -\def\numberedsubsubseczzz #1{\seccheck{subsubsection}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry % - {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno} - {\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsubsec{\parsearg\appendixsubsubsecyyy} -\def\appendixsubsubsecyyy #1{\apphead3{#1}} % normally appendixsubsubseczzz -\def\appendixsubsubseczzz #1{\seccheck{appendixsubsubsec}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry{#1}% - {\appendixletter} - {\the\secno}{\the\subsecno}{\the\subsubsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsubsec{\parsearg\unnumberedsubsubsecyyy} -\def\unnumberedsubsubsecyyy #1{\unnmhead3{#1}} %normally unnumberedsubsubseczzz -\def\unnumberedsubsubseczzz #1{\seccheck{unnumberedsubsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -% These are variants which are not "outer", so they can appear in @ifinfo. -% Actually, they should now be obsolete; ordinary section commands should work. -\def\infotop{\parsearg\unnumberedzzz} -\def\infounnumbered{\parsearg\unnumberedzzz} -\def\infounnumberedsec{\parsearg\unnumberedseczzz} -\def\infounnumberedsubsec{\parsearg\unnumberedsubseczzz} -\def\infounnumberedsubsubsec{\parsearg\unnumberedsubsubseczzz} - -\def\infoappendix{\parsearg\appendixzzz} -\def\infoappendixsec{\parsearg\appendixseczzz} -\def\infoappendixsubsec{\parsearg\appendixsubseczzz} -\def\infoappendixsubsubsec{\parsearg\appendixsubsubseczzz} - -\def\infochapter{\parsearg\chapterzzz} -\def\infosection{\parsearg\sectionzzz} -\def\infosubsection{\parsearg\subsectionzzz} -\def\infosubsubsection{\parsearg\subsubsectionzzz} - -% These macros control what the section commands do, according -% to what kind of chapter we are in (ordinary, appendix, or unnumbered). -% Define them by default for a numbered chapter. -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec - -% Define @majorheading, @heading and @subheading - -% NOTE on use of \vbox for chapter headings, section headings, and -% such: -% 1) We use \vbox rather than the earlier \line to permit -% overlong headings to fold. -% 2) \hyphenpenalty is set to 10000 because hyphenation in a -% heading is obnoxious; this forbids it. -% 3) Likewise, headings look best if no \parindent is used, and -% if justification is not attempted. Hence \raggedright. - - -\def\majorheading{\parsearg\majorheadingzzz} -\def\majorheadingzzz #1{% -{\advance\chapheadingskip by 10pt \chapbreak }% -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\chapheading{\parsearg\chapheadingzzz} -\def\chapheadingzzz #1{\chapbreak % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\heading{\parsearg\secheadingi} - -\def\subheading{\parsearg\subsecheadingi} - -\def\subsubheading{\parsearg\subsubsecheadingi} - -% These macros generate a chapter, section, etc. heading only -% (including whitespace, linebreaking, etc. around it), -% given all the information in convenient, parsed form. - -%%% Args are the skip and penalty (usually negative) -\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} - -\def\setchapterstyle #1 {\csname CHAPF#1\endcsname} - -%%% Define plain chapter starts, and page on/off switching for it -% Parameter controlling skip before chapter headings (if needed) - -\newskip \chapheadingskip \chapheadingskip = 30pt plus 8pt minus 4pt - -\def\chapbreak{\dobreak \chapheadingskip {-4000}} -\def\chappager{\par\vfill\supereject} -\def\chapoddpage{\chappager \ifodd\pageno \else \hbox to 0pt{} \chappager\fi} - -\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} - -\def\CHAPPAGoff{ -\global\let\pchapsepmacro=\chapbreak -\global\let\pagealignmacro=\chappager} - -\def\CHAPPAGon{ -\global\let\pchapsepmacro=\chappager -\global\let\pagealignmacro=\chappager -\global\def\HEADINGSon{\HEADINGSsingle}} - -\def\CHAPPAGodd{ -\global\let\pchapsepmacro=\chapoddpage -\global\let\pagealignmacro=\chapoddpage -\global\def\HEADINGSon{\HEADINGSdouble}} - -\CHAPPAGon - -\def\CHAPFplain{ -\global\let\chapmacro=\chfplain -\global\let\unnumbchapmacro=\unnchfplain} - -\def\chfplain #1#2{% - \pchapsepmacro - {% - \chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #2\enspace #1}% - }% - \bigskip - \penalty5000 -} - -\def\unnchfplain #1{% -\pchapsepmacro % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} -\CHAPFplain % The default - -\def\unnchfopen #1{% -\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} - -\def\chfopen #1#2{\chapoddpage {\chapfonts -\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}% -\par\penalty 5000 % -} - -\def\CHAPFopen{ -\global\let\chapmacro=\chfopen -\global\let\unnumbchapmacro=\unnchfopen} - -% Parameter controlling skip before section headings. - -\newskip \subsecheadingskip \subsecheadingskip = 17pt plus 8pt minus 4pt -\def\subsecheadingbreak{\dobreak \subsecheadingskip {-500}} - -\newskip \secheadingskip \secheadingskip = 21pt plus 8pt minus 4pt -\def\secheadingbreak{\dobreak \secheadingskip {-1000}} - -% @paragraphindent is defined for the Info formatting commands only. -\let\paragraphindent=\comment - -% Section fonts are the base font at magstep2, which produces -% a size a bit more than 14 points in the default situation. - -\def\secheading #1#2#3{\secheadingi {#2.#3\enspace #1}} -\def\plainsecheading #1{\secheadingi {#1}} -\def\secheadingi #1{{\advance \secheadingskip by \parskip % -\secheadingbreak}% -{\secfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - - -% Subsection fonts are the base font at magstep1, -% which produces a size of 12 points. - -\def\subsecheading #1#2#3#4{\subsecheadingi {#2.#3.#4\enspace #1}} -\def\subsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - -\def\subsubsecfonts{\subsecfonts} % Maybe this should change: - % Perhaps make sssec fonts scaled - % magstep half -\def\subsubsecheading #1#2#3#4#5{\subsubsecheadingi {#2.#3.#4.#5\enspace #1}} -\def\subsubsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsubsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000} - - -\message{toc printing,} - -% Finish up the main text and prepare to read what we've written -% to \contentsfile. - -\newskip\contentsrightmargin \contentsrightmargin=1in -\def\startcontents#1{% - \pagealignmacro - \immediate\closeout \contentsfile - \ifnum \pageno>0 - \pageno = -1 % Request roman numbered pages. - \fi - % Don't need to put `Contents' or `Short Contents' in the headline. - % It is abundantly clear what they are. - \unnumbchapmacro{#1}\def\thischapter{}% - \begingroup % Set up to handle contents files properly. - \catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11 - \catcode`\^=7 % to see ^^e4 as \"a etc. juha@piuha.ydi.vtt.fi - \raggedbottom % Worry more about breakpoints than the bottom. - \advance\hsize by -\contentsrightmargin % Don't use the full line length. -} - - -% Normal (long) toc. -\outer\def\contents{% - \startcontents{\putwordTableofContents}% - \input \jobname.toc - \endgroup - \vfill \eject -} - -% And just the chapters. -\outer\def\summarycontents{% - \startcontents{\putwordShortContents}% - % - \let\chapentry = \shortchapentry - \let\unnumbchapentry = \shortunnumberedentry - % We want a true roman here for the page numbers. - \secfonts - \let\rm=\shortcontrm \let\bf=\shortcontbf \let\sl=\shortcontsl - \rm - \advance\baselineskip by 1pt % Open it up a little. - \def\secentry ##1##2##3##4{} - \def\unnumbsecentry ##1##2{} - \def\subsecentry ##1##2##3##4##5{} - \def\unnumbsubsecentry ##1##2{} - \def\subsubsecentry ##1##2##3##4##5##6{} - \def\unnumbsubsubsecentry ##1##2{} - \input \jobname.toc - \endgroup - \vfill \eject -} -\let\shortcontents = \summarycontents - -% These macros generate individual entries in the table of contents. -% The first argument is the chapter or section name. -% The last argument is the page number. -% The arguments in between are the chapter number, section number, ... - -% Chapter-level things, for both the long and short contents. -\def\chapentry#1#2#3{\dochapentry{#2\labelspace#1}{#3}} - -% See comments in \dochapentry re vbox and related settings -\def\shortchapentry#1#2#3{% - \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno{#3}}% -} - -% Typeset the label for a chapter or appendix for the short contents. -% The arg is, e.g. `Appendix A' for an appendix, or `3' for a chapter. -% We could simplify the code here by writing out an \appendixentry -% command in the toc file for appendices, instead of using \chapentry -% for both, but it doesn't seem worth it. -\setbox0 = \hbox{\shortcontrm \putwordAppendix } -\newdimen\shortappendixwidth \shortappendixwidth = \wd0 - -\def\shortchaplabel#1{% - % We typeset #1 in a box of constant width, regardless of the text of - % #1, so the chapter titles will come out aligned. - \setbox0 = \hbox{#1}% - \dimen0 = \ifdim\wd0 > \shortappendixwidth \shortappendixwidth \else 0pt \fi - % - % This space should be plenty, since a single number is .5em, and the - % widest letter (M) is 1em, at least in the Computer Modern fonts. - % (This space doesn't include the extra space that gets added after - % the label; that gets put in in \shortchapentry above.) - \advance\dimen0 by 1.1em - \hbox to \dimen0{#1\hfil}% -} - -\def\unnumbchapentry#1#2{\dochapentry{#1}{#2}} -\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno{#2}}} - -% Sections. -\def\secentry#1#2#3#4{\dosecentry{#2.#3\labelspace#1}{#4}} -\def\unnumbsecentry#1#2{\dosecentry{#1}{#2}} - -% Subsections. -\def\subsecentry#1#2#3#4#5{\dosubsecentry{#2.#3.#4\labelspace#1}{#5}} -\def\unnumbsubsecentry#1#2{\dosubsecentry{#1}{#2}} - -% And subsubsections. -\def\subsubsecentry#1#2#3#4#5#6{% - \dosubsubsecentry{#2.#3.#4.#5\labelspace#1}{#6}} -\def\unnumbsubsubsecentry#1#2{\dosubsubsecentry{#1}{#2}} - - -% This parameter controls the indentation of the various levels. -\newdimen\tocindent \tocindent = 3pc - -% Now for the actual typesetting. In all these, #1 is the text and #2 is the -% page number. -% -% If the toc has to be broken over pages, we would want to be at chapters -% if at all possible; hence the \penalty. -\def\dochapentry#1#2{% - \penalty-300 \vskip\baselineskip - \begingroup - \chapentryfonts - \tocentry{#1}{\dopageno{#2}}% - \endgroup - \nobreak\vskip .25\baselineskip -} - -\def\dosecentry#1#2{\begingroup - \secentryfonts \leftskip=\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsecentry#1#2{\begingroup - \subsecentryfonts \leftskip=2\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsubsecentry#1#2{\begingroup - \subsubsecentryfonts \leftskip=3\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -% Final typesetting of a toc entry; we use the same \entry macro as for -% the index entries, but we want to suppress hyphenation here. (We -% can't do that in the \entry macro, since index entries might consist -% of hyphenated-identifiers-that-do-not-fit-on-a-line-and-nothing-else.) -% -\def\tocentry#1#2{\begingroup - \hyphenpenalty = 10000 - \entry{#1}{#2}% -\endgroup} - -% Space between chapter (or whatever) number and the title. -\def\labelspace{\hskip1em \relax} - -\def\dopageno#1{{\rm #1}} -\def\doshortpageno#1{{\rm #1}} - -\def\chapentryfonts{\secfonts \rm} -\def\secentryfonts{\textfonts} -\let\subsecentryfonts = \textfonts -\let\subsubsecentryfonts = \textfonts - - -\message{environments,} - -% Since these characters are used in examples, it should be an even number of -% \tt widths. Each \tt character is 1en, so two makes it 1em. -% Furthermore, these definitions must come after we define our fonts. -\newbox\dblarrowbox \newbox\longdblarrowbox -\newbox\pushcharbox \newbox\bullbox -\newbox\equivbox \newbox\errorbox - -\let\ptexequiv = \equiv - -%{\tentt -%\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil} -%\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil} -%\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil} -%\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil} -% Adapted from the manmac format (p.420 of TeXbook) -%\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex -% depth .1ex\hfil} -%} - -\def\point{$\star$} - -\def\result{\leavevmode\raise.15ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} -\def\expansion{\leavevmode\raise.1ex\hbox to 1em{\hfil$\mapsto$\hfil}} -\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} - -\def\equiv{\leavevmode\lower.1ex\hbox to 1em{\hfil$\ptexequiv$\hfil}} - -% Adapted from the TeXbook's \boxit. -{\tentt \global\dimen0 = 3em}% Width of the box. -\dimen2 = .55pt % Thickness of rules -% The text. (`r' is open on the right, `e' somewhat less so on the left.) -\setbox0 = \hbox{\kern-.75pt \tensf error\kern-1.5pt} - -\global\setbox\errorbox=\hbox to \dimen0{\hfil - \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. - \advance\hsize by -2\dimen2 % Rules. - \vbox{ - \hrule height\dimen2 - \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. - \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. - \kern3pt\vrule width\dimen2}% Space to right. - \hrule height\dimen2} - \hfil} - -% The @error{} command. -\def\error{\leavevmode\lower.7ex\copy\errorbox} - -% @tex ... @end tex escapes into raw Tex temporarily. -% One exception: @ is still an escape character, so that @end tex works. -% But \@ or @@ will get a plain tex @ character. - -\def\tex{\begingroup -\catcode `\\=0 \catcode `\{=1 \catcode `\}=2 -\catcode `\$=3 \catcode `\&=4 \catcode `\#=6 -\catcode `\^=7 \catcode `\_=8 \catcode `\~=13 \let~=\tie -\catcode `\%=14 -\catcode 43=12 -\catcode`\"=12 -\catcode`\==12 -\catcode`\|=12 -\catcode`\<=12 -\catcode`\>=12 -\escapechar=`\\ -% -\let\~=\ptextilde -\let\{=\ptexlbrace -\let\}=\ptexrbrace -\let\.=\ptexdot -\let\*=\ptexstar -\let\dots=\ptexdots -\def\@{@}% -\let\bullet=\ptexbullet -\let\b=\ptexb \let\c=\ptexc \let\i=\ptexi \let\t=\ptext \let\l=\ptexl -\let\L=\ptexL -% -\let\Etex=\endgroup} - -% Define @lisp ... @endlisp. -% @lisp does a \begingroup so it can rebind things, -% including the definition of @endlisp (which normally is erroneous). - -% Amount to narrow the margins by for @lisp. -\newskip\lispnarrowing \lispnarrowing=0.4in - -% This is the definition that ^^M gets inside @lisp, @example, and other -% such environments. \null is better than a space, since it doesn't -% have any width. -\def\lisppar{\null\endgraf} - -% Make each space character in the input produce a normal interword -% space in the output. Don't allow a line break at this space, as this -% is used only in environments like @example, where each line of input -% should produce a line of output anyway. -% -{\obeyspaces % -\gdef\sepspaces{\obeyspaces\let =\tie}} - -% Define \obeyedspace to be our active space, whatever it is. This is -% for use in \parsearg. -{\sepspaces% -\global\let\obeyedspace= } - -% This space is always present above and below environments. -\newskip\envskipamount \envskipamount = 0pt - -% Make spacing and below environment symmetrical. We use \parskip here -% to help in doing that, since in @example-like environments \parskip -% is reset to zero; thus the \afterenvbreak inserts no space -- but the -% start of the next paragraph will insert \parskip -% -\def\aboveenvbreak{{\advance\envskipamount by \parskip -\endgraf \ifdim\lastskip<\envskipamount -\removelastskip \penalty-50 \vskip\envskipamount \fi}} - -\let\afterenvbreak = \aboveenvbreak - -% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins. -\let\nonarrowing=\relax - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \cartouche: draw rectangle w/rounded corners around argument -\font\circle=lcircle10 -\newdimen\circthick -\newdimen\cartouter\newdimen\cartinner -\newskip\normbskip\newskip\normpskip\newskip\normlskip -\circthick=\fontdimen8\circle -% -\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth -\def\ctr{{\hskip 6pt\circle\char'010}} -\def\cbl{{\circle\char'012\hskip -6pt}} -\def\cbr{{\hskip 6pt\circle\char'011}} -\def\carttop{\hbox to \cartouter{\hskip\lskip - \ctl\leaders\hrule height\circthick\hfil\ctr - \hskip\rskip}} -\def\cartbot{\hbox to \cartouter{\hskip\lskip - \cbl\leaders\hrule height\circthick\hfil\cbr - \hskip\rskip}} -% -\newskip\lskip\newskip\rskip - -\long\def\cartouche{% -\begingroup - \lskip=\leftskip \rskip=\rightskip - \leftskip=0pt\rightskip=0pt %we want these *outside*. - \cartinner=\hsize \advance\cartinner by-\lskip - \advance\cartinner by-\rskip - \cartouter=\hsize - \advance\cartouter by 18pt % allow for 3pt kerns on either -% side, and for 6pt waste from -% each corner char - \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip - % Flag to tell @lisp, etc., not to narrow margin. - \let\nonarrowing=\comment - \vbox\bgroup - \baselineskip=0pt\parskip=0pt\lineskip=0pt - \carttop - \hbox\bgroup - \hskip\lskip - \vrule\kern3pt - \vbox\bgroup - \hsize=\cartinner - \kern3pt - \begingroup - \baselineskip=\normbskip - \lineskip=\normlskip - \parskip=\normpskip - \vskip -\parskip -\def\Ecartouche{% - \endgroup - \kern3pt - \egroup - \kern3pt\vrule - \hskip\rskip - \egroup - \cartbot - \egroup -\endgroup -}} - - -% This macro is called at the beginning of all the @example variants, -% inside a group. -\def\nonfillstart{% - \aboveenvbreak - \inENV % This group ends at the end of the body - \hfuzz = 12pt % Don't be fussy - \sepspaces % Make spaces be word-separators rather than space tokens. - \singlespace - \let\par = \lisppar % don't ignore blank lines - \obeylines % each line of input is a line of output - \parskip = 0pt - \parindent = 0pt - \emergencystretch = 0pt % don't try to avoid overfull boxes - % @cartouche defines \nonarrowing to inhibit narrowing - % at next level down. - \ifx\nonarrowing\relax - \advance \leftskip by \lispnarrowing - \exdentamount=\lispnarrowing - \let\exdent=\nofillexdent - \let\nonarrowing=\relax - \fi -} - -% To ending an @example-like environment, we first end the paragraph -% (via \afterenvbreak's vertical glue), and then the group. That way we -% keep the zero \parskip that the environments set -- \parskip glue -% will be inserted at the beginning of the next paragraph in the -% document, after the environment. -% -\def\nonfillfinish{\afterenvbreak\endgroup}% - -% This macro is -\def\lisp{\begingroup - \nonfillstart - \let\Elisp = \nonfillfinish - \tt - \rawbackslash % have \ input char produce \ char from current font - \gobble -} - -% Define the \E... control sequence only if we are inside the -% environment, so the error checking in \end will work. -% -% We must call \lisp last in the definition, since it reads the -% return following the @example (or whatever) command. -% -\def\example{\begingroup \def\Eexample{\nonfillfinish\endgroup}\lisp} -\def\smallexample{\begingroup \def\Esmallexample{\nonfillfinish\endgroup}\lisp} -\def\smalllisp{\begingroup \def\Esmalllisp{\nonfillfinish\endgroup}\lisp} - -% @smallexample and @smalllisp. This is not used unless the @smallbook -% command is given. Originally contributed by Pavel@xerox. -% -\def\smalllispx{\begingroup - \nonfillstart - \let\Esmalllisp = \nonfillfinish - \let\Esmallexample = \nonfillfinish - % - % Smaller interline space and fonts for small examples. - \setleading{10pt}% - \indexfonts \tt - \rawbackslash % make \ output the \ character from the current font (tt) - \gobble -} - -% This is @display; same as @lisp except use roman font. -% -\def\display{\begingroup - \nonfillstart - \let\Edisplay = \nonfillfinish - \gobble -} - -% This is @format; same as @display except don't narrow margins. -% -\def\format{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eformat = \nonfillfinish - \gobble -} - -% @flushleft (same as @format) and @flushright. -% -\def\flushleft{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushleft = \nonfillfinish - \gobble -} -\def\flushright{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushright = \nonfillfinish - \advance\leftskip by 0pt plus 1fill - \gobble} - -% @quotation does normal linebreaking (hence we can't use \nonfillstart) -% and narrows the margins. -% -\def\quotation{% - \begingroup\inENV %This group ends at the end of the @quotation body - {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip - \singlespace - \parindent=0pt - % We have retained a nonzero parskip for the environment, since we're - % doing normal filling. So to avoid extra space below the environment... - \def\Equotation{\parskip = 0pt \nonfillfinish}% - % - % @cartouche defines \nonarrowing to inhibit narrowing at next level down. - \ifx\nonarrowing\relax - \advance\leftskip by \lispnarrowing - \advance\rightskip by \lispnarrowing - \exdentamount = \lispnarrowing - \let\nonarrowing = \relax - \fi -} - -\message{defuns,} -% Define formatter for defuns -% First, allow user to change definition object font (\df) internally -\def\setdeffont #1 {\csname DEF#1\endcsname} - -\newskip\defbodyindent \defbodyindent=.4in -\newskip\defargsindent \defargsindent=50pt -\newskip\deftypemargin \deftypemargin=12pt -\newskip\deflastargmargin \deflastargmargin=18pt - -\newcount\parencount -% define \functionparens, which makes ( and ) and & do special things. -% \functionparens affects the group it is contained in. -\def\activeparens{% -\catcode`\(=\active \catcode`\)=\active \catcode`\&=\active -\catcode`\[=\active \catcode`\]=\active} - -% Make control sequences which act like normal parenthesis chars. -\let\lparen = ( \let\rparen = ) - -{\activeparens % Now, smart parens don't turn on until &foo (see \amprm) - -% Be sure that we always have a definition for `(', etc. For example, -% if the fn name has parens in it, \boldbrax will not be in effect yet, -% so TeX would otherwise complain about undefined control sequence. -\global\let(=\lparen \global\let)=\rparen -\global\let[=\lbrack \global\let]=\rbrack - -\gdef\functionparens{\boldbrax\let&=\amprm\parencount=0 } -\gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} -% This is used to turn on special parens -% but make & act ordinary (given that it's active). -\gdef\boldbraxnoamp{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb\let&=\ampnr} - -% Definitions of (, ) and & used in args for functions. -% This is the definition of ( outside of all parentheses. -\gdef\oprm#1 {{\rm\char`\(}#1 \bf \let(=\opnested % -\global\advance\parencount by 1 } -% -% This is the definition of ( when already inside a level of parens. -\gdef\opnested{\char`\(\global\advance\parencount by 1 } -% -\gdef\clrm{% Print a paren in roman if it is taking us back to depth of 0. -% also in that case restore the outer-level definition of (. -\ifnum \parencount=1 {\rm \char `\)}\sl \let(=\oprm \else \char `\) \fi -\global\advance \parencount by -1 } -% If we encounter &foo, then turn on ()-hacking afterwards -\gdef\amprm#1 {{\rm\}\let(=\oprm \let)=\clrm\ } -% -\gdef\normalparens{\boldbrax\let&=\ampnr} -} % End of definition inside \activeparens -%% These parens (in \boldbrax) actually are a little bolder than the -%% contained text. This is especially needed for [ and ] -\def\opnr{{\sf\char`\(}} \def\clnr{{\sf\char`\)}} \def\ampnr{\&} -\def\lbrb{{\bf\char`\[}} \def\rbrb{{\bf\char`\]}} - -% First, defname, which formats the header line itself. -% #1 should be the function name. -% #2 should be the type of definition, such as "Function". - -\def\defname #1#2{% -% Get the values of \leftskip and \rightskip as they were -% outside the @def... -\dimen2=\leftskip -\advance\dimen2 by -\defbodyindent -\dimen3=\rightskip -\advance\dimen3 by -\defbodyindent -\noindent % -\setbox0=\hbox{\hskip \deflastargmargin{\rm #2}\hskip \deftypemargin}% -\dimen0=\hsize \advance \dimen0 by -\wd0 % compute size for first line -\dimen1=\hsize \advance \dimen1 by -\defargsindent %size for continuations -\parshape 2 0in \dimen0 \defargsindent \dimen1 % -% Now output arg 2 ("Function" or some such) -% ending at \deftypemargin from the right margin, -% but stuck inside a box of width 0 so it does not interfere with linebreaking -{% Adjust \hsize to exclude the ambient margins, -% so that \rightline will obey them. -\advance \hsize by -\dimen2 \advance \hsize by -\dimen3 -\rlap{\rightline{{\rm #2}\hskip \deftypemargin}}}% -% Make all lines underfull and no complaints: -\tolerance=10000 \hbadness=10000 -\advance\leftskip by -\defbodyindent -\exdentamount=\defbodyindent -{\df #1}\enskip % Generate function name -} - -% Actually process the body of a definition -% #1 should be the terminating control sequence, such as \Edefun. -% #2 should be the "another name" control sequence, such as \defunx. -% #3 should be the control sequence that actually processes the header, -% such as \defunheader. - -\def\defparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\activeparens\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % 61 is `=' -\obeylines\activeparens\spacesplit#3} - -\def\defmethparsebody #1#2#3#4 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#4}}} - -\def\defopparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\activeparens\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#5}}} - -% These parsing functions are similar to the preceding ones -% except that they do not make parens into active characters. -% These are used for "variables" since they have no arguments. - -\def\defvarparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % -\obeylines\spacesplit#3} - -% This is used for \def{tp,vr}parsebody. It could probably be used for -% some of the others, too, with some judicious conditionals. -% -\def\parsebodycommon#1#2#3{% - \begingroup\inENV % - \medbreak % - % Define the end token that this defining construct specifies - % so that it will exit this group. - \def#1{\endgraf\endgroup\medbreak}% - \def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% - \parindent=0in - \advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent - \exdentamount=\defbodyindent - \begingroup\obeylines -} - -\def\defvrparsebody#1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{#3{#4}}% -} - -% This loses on `@deftp {Data Type} {struct termios}' -- it thinks the -% type is just `struct', because we lose the braces in `{struct -% termios}' when \spacesplit reads its undelimited argument. Sigh. -% \let\deftpparsebody=\defvrparsebody -% -% So, to get around this, we put \empty in with the type name. That -% way, TeX won't find exactly `{...}' as an undelimited argument, and -% won't strip off the braces. -% -\def\deftpparsebody #1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{\parsetpheaderline{#3{#4}}}\empty -} - -% Fine, but then we have to eventually remove the \empty *and* the -% braces (if any). That's what this does, putting the result in \tptemp. -% -\def\removeemptybraces\empty#1\relax{\def\tptemp{#1}}% - -% After \spacesplit has done its work, this is called -- #1 is the final -% thing to call, #2 the type name (which starts with \empty), and #3 -% (which might be empty) the arguments. -% -\def\parsetpheaderline#1#2#3{% - \removeemptybraces#2\relax - #1{\tptemp}{#3}% -}% - -\def\defopvarparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\spacesplit{#3{#5}}} - -% Split up #2 at the first space token. -% call #1 with two arguments: -% the first is all of #2 before the space token, -% the second is all of #2 after that space token. -% If #2 contains no space token, all of it is passed as the first arg -% and the second is passed as empty. - -{\obeylines -\gdef\spacesplit#1#2^^M{\endgroup\spacesplitfoo{#1}#2 \relax\spacesplitfoo}% -\long\gdef\spacesplitfoo#1#2 #3#4\spacesplitfoo{% -\ifx\relax #3% -#1{#2}{}\else #1{#2}{#3#4}\fi}} - -% So much for the things common to all kinds of definitions. - -% Define @defun. - -% First, define the processing that is wanted for arguments of \defun -% Use this to expand the args and terminate the paragraph they make up - -\def\defunargs #1{\functionparens \sl -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -\hyphenchar\tensl=0 -#1% -\hyphenchar\tensl=45 -\ifnum\parencount=0 \else \errmessage{unbalanced parens in @def arguments}\fi% -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -\def\deftypefunargs #1{% -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -% Use \boldbraxnoamp, not \functionparens, so that & is not special. -\boldbraxnoamp -\tclose{#1}% avoid \code because of side effects on active chars -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -% Do complete processing of one @defun or @defunx line already parsed. - -% @deffn Command forward-char nchars - -\def\deffn{\defmethparsebody\Edeffn\deffnx\deffnheader} - -\def\deffnheader #1#2#3{\doind {fn}{\code{#2}}% -\begingroup\defname {#2}{#1}\defunargs{#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defun == @deffn Function - -\def\defun{\defparsebody\Edefun\defunx\defunheader} - -\def\defunheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Function}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefun int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefun{\defparsebody\Edeftypefun\deftypefunx\deftypefunheader} - -% #1 is the data type. #2 is the name and args. -\def\deftypefunheader #1#2{\deftypefunheaderx{#1}#2 \relax} -% #1 is the data type, #2 the name, #3 the args. -\def\deftypefunheaderx #1#2 #3\relax{% -\doind {fn}{\code{#2}}% Make entry in function index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Function}% -\deftypefunargs {#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefn {Library Function} int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefn{\defmethparsebody\Edeftypefn\deftypefnx\deftypefnheader} - -% \defheaderxcond#1\relax$$$ -% puts #1 in @code, followed by a space, but does nothing if #1 is null. -\def\defheaderxcond#1#2$$${\ifx#1\relax\else\code{#1#2} \fi} - -% #1 is the classification. #2 is the data type. #3 is the name and args. -\def\deftypefnheader #1#2#3{\deftypefnheaderx{#1}{#2}#3 \relax} -% #1 is the classification, #2 the data type, #3 the name, #4 the args. -\def\deftypefnheaderx #1#2#3 #4\relax{% -\doind {fn}{\code{#3}}% Make entry in function index -\begingroup -\normalparens % notably, turn off `&' magic, which prevents -% at least some C++ text from working -\defname {\defheaderxcond#2\relax$$$#3}{#1}% -\deftypefunargs {#4}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defmac == @deffn Macro - -\def\defmac{\defparsebody\Edefmac\defmacx\defmacheader} - -\def\defmacheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Macro}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defspec == @deffn Special Form - -\def\defspec{\defparsebody\Edefspec\defspecx\defspecheader} - -\def\defspecheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Special Form}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% This definition is run if you use @defunx -% anywhere other than immediately after a @defun or @defunx. - -\def\deffnx #1 {\errmessage{@deffnx in invalid context}} -\def\defunx #1 {\errmessage{@defunx in invalid context}} -\def\defmacx #1 {\errmessage{@defmacx in invalid context}} -\def\defspecx #1 {\errmessage{@defspecx in invalid context}} -\def\deftypefnx #1 {\errmessage{@deftypefnx in invalid context}} -\def\deftypeunx #1 {\errmessage{@deftypeunx in invalid context}} - -% @defmethod, and so on - -% @defop {Funny Method} foo-class frobnicate argument - -\def\defop #1 {\def\defoptype{#1}% -\defopparsebody\Edefop\defopx\defopheader\defoptype} - -\def\defopheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% Make entry in function index -\begingroup\defname {#2}{\defoptype{} on #1}% -\defunargs {#3}\endgroup % -} - -% @defmethod == @defop Method - -\def\defmethod{\defmethparsebody\Edefmethod\defmethodx\defmethodheader} - -\def\defmethodheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% entry in function index -\begingroup\defname {#2}{Method on #1}% -\defunargs {#3}\endgroup % -} - -% @defcv {Class Option} foo-class foo-flag - -\def\defcv #1 {\def\defcvtype{#1}% -\defopvarparsebody\Edefcv\defcvx\defcvarheader\defcvtype} - -\def\defcvarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{\defcvtype{} of #1}% -\defvarargs {#3}\endgroup % -} - -% @defivar == @defcv {Instance Variable} - -\def\defivar{\defvrparsebody\Edefivar\defivarx\defivarheader} - -\def\defivarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{Instance Variable of #1}% -\defvarargs {#3}\endgroup % -} - -% These definitions are run if you use @defmethodx, etc., -% anywhere other than immediately after a @defmethod, etc. - -\def\defopx #1 {\errmessage{@defopx in invalid context}} -\def\defmethodx #1 {\errmessage{@defmethodx in invalid context}} -\def\defcvx #1 {\errmessage{@defcvx in invalid context}} -\def\defivarx #1 {\errmessage{@defivarx in invalid context}} - -% Now @defvar - -% First, define the processing that is wanted for arguments of @defvar. -% This is actually simple: just print them in roman. -% This must expand the args and terminate the paragraph they make up -\def\defvarargs #1{\normalparens #1% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000} - -% @defvr Counter foo-count - -\def\defvr{\defvrparsebody\Edefvr\defvrx\defvrheader} - -\def\defvrheader #1#2#3{\doind {vr}{\code{#2}}% -\begingroup\defname {#2}{#1}\defvarargs{#3}\endgroup} - -% @defvar == @defvr Variable - -\def\defvar{\defvarparsebody\Edefvar\defvarx\defvarheader} - -\def\defvarheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{Variable}% -\defvarargs {#2}\endgroup % -} - -% @defopt == @defvr {User Option} - -\def\defopt{\defvarparsebody\Edefopt\defoptx\defoptheader} - -\def\defoptheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{User Option}% -\defvarargs {#2}\endgroup % -} - -% @deftypevar int foobar - -\def\deftypevar{\defvarparsebody\Edeftypevar\deftypevarx\deftypevarheader} - -% #1 is the data type. #2 is the name. -\def\deftypevarheader #1#2{% -\doind {vr}{\code{#2}}% Make entry in variables index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Variable}% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% @deftypevr {Global Flag} int enable - -\def\deftypevr{\defvrparsebody\Edeftypevr\deftypevrx\deftypevrheader} - -\def\deftypevrheader #1#2#3{\doind {vr}{\code{#3}}% -\begingroup\defname {\defheaderxcond#2\relax$$$#3}{#1} -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% This definition is run if you use @defvarx -% anywhere other than immediately after a @defvar or @defvarx. - -\def\defvrx #1 {\errmessage{@defvrx in invalid context}} -\def\defvarx #1 {\errmessage{@defvarx in invalid context}} -\def\defoptx #1 {\errmessage{@defoptx in invalid context}} -\def\deftypevarx #1 {\errmessage{@deftypevarx in invalid context}} -\def\deftypevrx #1 {\errmessage{@deftypevrx in invalid context}} - -% Now define @deftp -% Args are printed in bold, a slight difference from @defvar. - -\def\deftpargs #1{\bf \defvarargs{#1}} - -% @deftp Class window height width ... - -\def\deftp{\deftpparsebody\Edeftp\deftpx\deftpheader} - -\def\deftpheader #1#2#3{\doind {tp}{\code{#2}}% -\begingroup\defname {#2}{#1}\deftpargs{#3}\endgroup} - -% This definition is run if you use @deftpx, etc -% anywhere other than immediately after a @deftp, etc. - -\def\deftpx #1 {\errmessage{@deftpx in invalid context}} - -\message{cross reference,} -% Define cross-reference macros -\newwrite \auxfile - -\newif\ifhavexrefs % True if xref values are known. -\newif\ifwarnedxrefs % True if we warned once that they aren't known. - -% \setref{foo} defines a cross-reference point named foo. - -\def\setref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ysectionnumberandtype}} - -\def\unnumbsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ynothing}} - -\def\appendixsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Yappendixletterandtype}} - -% \xref, \pxref, and \ref generate cross-references to specified points. -% For \xrefX, #1 is the node name, #2 the name of the Info -% cross-reference, #3 the printed node name, #4 the name of the Info -% file, #5 the name of the printed manual. All but the node name can be -% omitted. -% -\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]} -\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]} -\def\ref#1{\xrefX[#1,,,,,,,]} -\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup - \def\printedmanual{\ignorespaces #5}% - \def\printednodename{\ignorespaces #3}% - \setbox1=\hbox{\printedmanual}% - \setbox0=\hbox{\printednodename}% - \ifdim \wd0 = 0pt - % No printed node name was explicitly given. - \ifx\SETxref-automatic-section-title\relax % - % Use the actual chapter/section title appear inside - % the square brackets. Use the real section title if we have it. - \ifdim \wd1>0pt% - % It is in another manual, so we don't have it. - \def\printednodename{\ignorespaces #1}% - \else - \ifhavexrefs - % We know the real title if we have the xref values. - \def\printednodename{\refx{#1-title}}% - \else - % Otherwise just copy the Info node name. - \def\printednodename{\ignorespaces #1}% - \fi% - \fi - \def\printednodename{#1-title}% - \else - % Use the node name inside the square brackets. - \def\printednodename{\ignorespaces #1}% - \fi - \fi - % - % If we use \unhbox0 and \unhbox1 to print the node names, TeX does not - % insert empty discretionaries after hyphens, which means that it will - % not find a line break at a hyphen in a node names. Since some manuals - % are best written with fairly long node names, containing hyphens, this - % is a loss. Therefore, we give the text of the node name again, so it - % is as if TeX is seeing it for the first time. - \ifdim \wd1 > 0pt - \putwordsection{} ``\printednodename'' in \cite{\printedmanual}% - \else - % _ (for example) has to be the character _ for the purposes of the - % control sequence corresponding to the node, but it has to expand - % into the usual \leavevmode...\vrule stuff for purposes of - % printing. So we \turnoffactive for the \refx-snt, back on for the - % printing, back off for the \refx-pg. - {\turnoffactive \refx{#1-snt}{}}% - \space [\printednodename],\space - \turnoffactive \putwordpage\tie\refx{#1-pg}{}% - \fi -\endgroup} - -% \dosetq is the interface for calls from other macros - -% Use \turnoffactive so that punctuation chars such as underscore -% work in node names. -\def\dosetq #1#2{{\let\folio=0 \turnoffactive \auxhat% -\edef\next{\write\auxfile{\internalsetq {#1}{#2}}}% -\next}} - -% \internalsetq {foo}{page} expands into -% CHARACTERS 'xrdef {foo}{...expansion of \Ypage...} -% When the aux file is read, ' is the escape character - -\def\internalsetq #1#2{'xrdef {#1}{\csname #2\endcsname}} - -% Things to be expanded by \internalsetq - -\def\Ypagenumber{\folio} - -\def\Ytitle{\thissection} - -\def\Ynothing{} - -\def\Ysectionnumberandtype{% -\ifnum\secno=0 \putwordChapter\xreftie\the\chapno % -\else \ifnum \subsecno=0 \putwordSection\xreftie\the\chapno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\def\Yappendixletterandtype{% -\ifnum\secno=0 \putwordAppendix\xreftie'char\the\appendixno{}% -\else \ifnum \subsecno=0 \putwordSection\xreftie'char\the\appendixno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\gdef\xreftie{'tie} - -% Use TeX 3.0's \inputlineno to get the line number, for better error -% messages, but if we're using an old version of TeX, don't do anything. -% -\ifx\inputlineno\thisisundefined - \let\linenumber = \empty % Non-3.0. -\else - \def\linenumber{\the\inputlineno:\space} -\fi - -% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME. -% If its value is nonempty, SUFFIX is output afterward. - -\def\refx#1#2{% - \expandafter\ifx\csname X#1\endcsname\relax - % If not defined, say something at least. - $\langle$un\-de\-fined$\rangle$% - \ifhavexrefs - \message{\linenumber Undefined cross reference `#1'.}% - \else - \ifwarnedxrefs\else - \global\warnedxrefstrue - \message{Cross reference values unknown; you must run TeX again.}% - \fi - \fi - \else - % It's defined, so just use it. - \csname X#1\endcsname - \fi - #2% Output the suffix in any case. -} - -% Read the last existing aux file, if any. No error if none exists. - -% This is the macro invoked by entries in the aux file. -\def\xrdef #1#2{ -{\catcode`\'=\other\expandafter \gdef \csname X#1\endcsname {#2}}} - -\def\readauxfile{% -\begingroup -\catcode `\^^@=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\^^C=\other -\catcode `\^^D=\other -\catcode `\^^E=\other -\catcode `\^^F=\other -\catcode `\^^G=\other -\catcode `\^^H=\other -\catcode `\ =\other -\catcode `\^^L=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode 26=\other -\catcode `\^^[=\other -\catcode `\^^\=\other -\catcode `\^^]=\other -\catcode `\^^^=\other -\catcode `\^^_=\other -\catcode `\@=\other -\catcode `\^=\other -\catcode `\~=\other -\catcode `\[=\other -\catcode `\]=\other -\catcode`\"=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode `\$=\other -\catcode `\#=\other -\catcode `\&=\other -% `\+ does not work, so use 43. -\catcode 43=\other -% Make the characters 128-255 be printing characters -{% - \count 1=128 - \def\loop{% - \catcode\count 1=\other - \advance\count 1 by 1 - \ifnum \count 1<256 \loop \fi - }% -}% -% the aux file uses ' as the escape. -% Turn off \ as an escape so we do not lose on -% entries which were dumped with control sequences in their names. -% For example, 'xrdef {$\leq $-fun}{page ...} made by @defun ^^ -% Reference to such entries still does not work the way one would wish, -% but at least they do not bomb out when the aux file is read in. -\catcode `\{=1 \catcode `\}=2 -\catcode `\%=\other -\catcode `\'=0 -\catcode`\^=7 % to make ^^e4 etc usable in xref tags -\catcode `\\=\other -\openin 1 \jobname.aux -\ifeof 1 \else \closein 1 \input \jobname.aux \global\havexrefstrue -\global\warnedobstrue -\fi -% Open the new aux file. Tex will close it automatically at exit. -\openout \auxfile=\jobname.aux -\endgroup} - - -% Footnotes. - -\newcount \footnoteno - -% The trailing space in the following definition for supereject is -% vital for proper filling; pages come out unaligned when you do a -% pagealignmacro call if that space before the closing brace is -% removed. -\def\supereject{\par\penalty -20000\footnoteno =0 } - -% @footnotestyle is meaningful for info output only.. -\let\footnotestyle=\comment - -\let\ptexfootnote=\footnote - -{\catcode `\@=11 -% -% Auto-number footnotes. Otherwise like plain. -\gdef\footnote{% - \global\advance\footnoteno by \@ne - \edef\thisfootno{$^{\the\footnoteno}$}% - % - % In case the footnote comes at the end of a sentence, preserve the - % extra spacing after we do the footnote number. - \let\@sf\empty - \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\/\fi - % - % Remove inadvertent blank space before typesetting the footnote number. - \unskip - \thisfootno\@sf - \footnotezzz -}% - -% Don't bother with the trickery in plain.tex to not require the -% footnote text as a parameter. Our footnotes don't need to be so general. -% -\long\gdef\footnotezzz#1{\insert\footins{% - % We want to typeset this text as a normal paragraph, even if the - % footnote reference occurs in (for example) a display environment. - % So reset some parameters. - \interlinepenalty\interfootnotelinepenalty - \splittopskip\ht\strutbox % top baseline for broken footnotes - \splitmaxdepth\dp\strutbox - \floatingpenalty\@MM - \leftskip\z@skip - \rightskip\z@skip - \spaceskip\z@skip - \xspaceskip\z@skip - \parindent\defaultparindent - % - % Hang the footnote text off the number. - \hang - \textindent{\thisfootno}% - % - % Don't crash into the line above the footnote text. Since this - % expands into a box, it must come within the paragraph, lest it - % provide a place where TeX can split the footnote. - \footstrut - #1\strut}% -} - -}%end \catcode `\@=11 - -% Set the baselineskip to #1, and the lineskip and strut size -% correspondingly. There is no deep meaning behind these magic numbers -% used as factors; they just match (closely enough) what Knuth defined. -% -\def\lineskipfactor{.08333} -\def\strutheightpercent{.70833} -\def\strutdepthpercent {.29167} -% -\def\setleading#1{% - \normalbaselineskip = #1\relax - \normallineskip = \lineskipfactor\normalbaselineskip - \normalbaselines - \setbox\strutbox =\hbox{% - \vrule width0pt height\strutheightpercent\baselineskip - depth \strutdepthpercent \baselineskip - }% -} - -% @| inserts a changebar to the left of the current line. It should -% surround any changed text. This approach does *not* work if the -% change spans more than two lines of output. To handle that, we would -% have adopt a much more difficult approach (putting marks into the main -% vertical list for the beginning and end of each change). -% -\def\|{% - % \vadjust can only be used in horizontal mode. - \leavevmode - % - % Append this vertical mode material after the current line in the output. - \vadjust{% - % We want to insert a rule with the height and depth of the current - % leading; that is exactly what \strutbox is supposed to record. - \vskip-\baselineskip - % - % \vadjust-items are inserted at the left edge of the type. So - % the \llap here moves out into the left-hand margin. - \llap{% - % - % For a thicker or thinner bar, change the `1pt'. - \vrule height\baselineskip width1pt - % - % This is the space between the bar and the text. - \hskip 12pt - }% - }% -} - -% For a final copy, take out the rectangles -% that mark overfull boxes (in case you have decided -% that the text looks ok even though it passes the margin). -% -\def\finalout{\overfullrule=0pt} - - -% End of control word definitions. - -\message{and turning on texinfo input format.} - -\def\openindices{% - \newindex{cp}% - \newcodeindex{fn}% - \newcodeindex{vr}% - \newcodeindex{tp}% - \newcodeindex{ky}% - \newcodeindex{pg}% -} - -% Set some numeric style parameters, for 8.5 x 11 format. - -%\hsize = 6.5in -\newdimen\defaultparindent \defaultparindent = 15pt -\parindent = \defaultparindent -\parskip 18pt plus 1pt -\setleading{15pt} -\advance\topskip by 1.2cm - -% Prevent underfull vbox error messages. -\vbadness=10000 - -% Following George Bush, just get rid of widows and orphans. -\widowpenalty=10000 -\clubpenalty=10000 - -% Use TeX 3.0's \emergencystretch to help line breaking, but if we're -% using an old version of TeX, don't do anything. We want the amount of -% stretch added to depend on the line length, hence the dependence on -% \hsize. This makes it come to about 9pt for the 8.5x11 format. -% -\ifx\emergencystretch\thisisundefined - % Allow us to assign to \emergencystretch anyway. - \def\emergencystretch{\dimen0}% -\else - \emergencystretch = \hsize - \divide\emergencystretch by 45 -\fi - -% Use @smallbook to reset parameters for 7x9.5 format (or else 7x9.25) -\def\smallbook{ - -% These values for secheadingskip and subsecheadingskip are -% experiments. RJC 7 Aug 1992 -\global\secheadingskip = 17pt plus 6pt minus 3pt -\global\subsecheadingskip = 14pt plus 6pt minus 3pt - -\global\lispnarrowing = 0.3in -\setleading{12pt} -\advance\topskip by -1cm -\global\parskip 3pt plus 1pt -\global\hsize = 5in -\global\vsize=7.5in -\global\tolerance=700 -\global\hfuzz=1pt -\global\contentsrightmargin=0pt -\global\deftypemargin=0pt -\global\defbodyindent=.5cm - -\global\pagewidth=\hsize -\global\pageheight=\vsize - -\global\let\smalllisp=\smalllispx -\global\let\smallexample=\smalllispx -\global\def\Esmallexample{\Esmalllisp} -} - -% Use @afourpaper to print on European A4 paper. -\def\afourpaper{ -\global\tolerance=700 -\global\hfuzz=1pt -\setleading{12pt} -\global\parskip 15pt plus 1pt - -\global\vsize= 53\baselineskip -\advance\vsize by \topskip -%\global\hsize= 5.85in % A4 wide 10pt -\global\hsize= 6.5in -\global\outerhsize=\hsize -\global\advance\outerhsize by 0.5in -\global\outervsize=\vsize -\global\advance\outervsize by 0.6in - -\global\pagewidth=\hsize -\global\pageheight=\vsize -} - -% Allow control of the text dimensions. Parameters in order: textheight; -% textwidth; \voffset; \hoffset (!); binding offset. All require a dimension; -% header is additional; added length extends the bottom of the page. - -\def\changepagesizes#1#2#3#4#5{ - \global\vsize= #1 - \advance\vsize by \topskip - \global\voffset= #3 - \global\hsize= #2 - \global\outerhsize=\hsize - \global\advance\outerhsize by 0.5in - \global\outervsize=\vsize - \global\advance\outervsize by 0.6in - \global\pagewidth=\hsize - \global\pageheight=\vsize - \global\normaloffset= #4 - \global\bindingoffset= #5} - -% This layout is compatible with Latex on A4 paper. - -\def\afourlatex{\changepagesizes{22cm}{15cm}{7mm}{4.6mm}{5mm}} - -% Use @afourwide to print on European A4 paper in wide format. -\def\afourwide{\afourpaper -\changepagesizes{9.5in}{6.5in}{\hoffset}{\normaloffset}{\bindingoffset}} - -% Define macros to output various characters with catcode for normal text. -\catcode`\"=\other -\catcode`\~=\other -\catcode`\^=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode`\+=\other -\def\normaldoublequote{"} -\def\normaltilde{~} -\def\normalcaret{^} -\def\normalunderscore{_} -\def\normalverticalbar{|} -\def\normalless{<} -\def\normalgreater{>} -\def\normalplus{+} - -% This macro is used to make a character print one way in ttfont -% where it can probably just be output, and another way in other fonts, -% where something hairier probably needs to be done. -% -% #1 is what to print if we are indeed using \tt; #2 is what to print -% otherwise. Since all the Computer Modern typewriter fonts have zero -% interword stretch (and shrink), and it is reasonable to expect all -% typewriter fonts to have this, we can check that font parameter. -% -\def\ifusingtt#1#2{\ifdim \fontdimen3\the\font=0pt #1\else #2\fi} - -% Turn off all special characters except @ -% (and those which the user can use as if they were ordinary). -% Most of these we simply print from the \tt font, but for some, we can -% use math or other variants that look better in normal text. - -\catcode`\"=\active -\def\activedoublequote{{\tt \char '042}} -\let"=\activedoublequote -\catcode`\~=\active -\def~{{\tt \char '176}} -\chardef\hat=`\^ -\catcode`\^=\active -\def\auxhat{\def^{'hat}} -\def^{{\tt \hat}} - -\catcode`\_=\active -\def_{\ifusingtt\normalunderscore\_} -% Subroutine for the previous macro. -\def\_{\lvvmode \kern.06em \vbox{\hrule width.3em height.1ex}} - -% \lvvmode is equivalent in function to \leavevmode. -% Using \leavevmode runs into trouble when written out to -% an index file due to the expansion of \leavevmode into ``\unhbox -% \voidb@x'' ---which looks to TeX like ``\unhbox \voidb\x'' due to our -% magic tricks with @. -\def\lvvmode{\vbox to 0pt{}} - -\catcode`\|=\active -\def|{{\tt \char '174}} -\chardef \less=`\< -\catcode`\<=\active -\def<{{\tt \less}} -\chardef \gtr=`\> -\catcode`\>=\active -\def>{{\tt \gtr}} -\catcode`\+=\active -\def+{{\tt \char 43}} -%\catcode 27=\active -%\def^^[{$\diamondsuit$} - -% Set up an active definition for =, but don't enable it most of the time. -{\catcode`\==\active -\global\def={{\tt \char 61}}} - -\catcode`+=\active -\catcode`\_=\active - -% If a .fmt file is being used, characters that might appear in a file -% name cannot be active until we have parsed the command line. -% So turn them off again, and have \everyjob (or @setfilename) turn them on. -% \otherifyactive is called near the end of this file. -\def\otherifyactive{\catcode`+=\other \catcode`\_=\other} - -\catcode`\@=0 - -% \rawbackslashxx output one backslash character in current font -\global\chardef\rawbackslashxx=`\\ -%{\catcode`\\=\other -%@gdef@rawbackslashxx{\}} - -% \rawbackslash redefines \ as input to do \rawbackslashxx. -{\catcode`\\=\active -@gdef@rawbackslash{@let\=@rawbackslashxx }} - -% \normalbackslash outputs one backslash in fixed width font. -\def\normalbackslash{{\tt\rawbackslashxx}} - -% Say @foo, not \foo, in error messages. -\escapechar=`\@ - -% \catcode 17=0 % Define control-q -\catcode`\\=\active - -% Used sometimes to turn off (effectively) the active characters -% even after parsing them. -@def@turnoffactive{@let"=@normaldoublequote -@let\=@realbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus} - -@def@normalturnoffactive{@let"=@normaldoublequote -@let\=@normalbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus} - -% Make _ and + \other characters, temporarily. -% This is canceled by @fixbackslash. -@otherifyactive - -% If a .fmt file is being used, we don't want the `\input texinfo' to show up. -% That is what \eatinput is for; after that, the `\' should revert to printing -% a backslash. -% -@gdef@eatinput input texinfo{@fixbackslash} -@global@let\ = @eatinput - -% On the other hand, perhaps the file did not have a `\input texinfo'. Then -% the first `\{ in the file would cause an error. This macro tries to fix -% that, assuming it is called before the first `\' could plausibly occur. -% Also back turn on active characters that might appear in the input -% file name, in case not using a pre-dumped format. -% -@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi - @catcode`+=@active @catcode`@_=@active} - -%% These look ok in all fonts, so just make them not special. The @rm below -%% makes sure that the current font starts out as the newly loaded cmr10 -@catcode`@$=@other @catcode`@%=@other @catcode`@&=@other @catcode`@#=@other - -@textfonts -@rm - -@c Local variables: -@c page-delimiter: "^\\\\message" -@c End: diff --git a/token.c b/token.c deleted file mode 100644 index abcadaa..0000000 --- a/token.c +++ /dev/null @@ -1,50 +0,0 @@ -/* token.c -- misc. access functions for mkid database tokens - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include "token.h" - -unsigned int -tok_flags (char const *buf) -{ - return *(unsigned char const *)&buf[strlen (buf) + 1]; -} - -#define TOK_COUNT_ADDR(buf) ((unsigned char const *)(TOK_FLAGS_ADDR (buf) + 1)) -#define TOK_HITS_ADDR(buf) ((unsigned char const *)(TOK_COUNT_ADDR (buf) + 2)) - -unsigned short -tok_count (char const *buf) -{ - unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1]; - unsigned char const *addr = flags + 1; - unsigned short count = *addr; - if (*flags & TOK_SHORT_COUNT) - count += (*++addr << 8); - return count; -} - -unsigned char const * -tok_hits_addr (char const *buf) -{ - unsigned char const *flags = (unsigned char const *)&buf[strlen (buf) + 1]; - unsigned char const *addr = flags + 2; - if (*flags & TOK_SHORT_COUNT) - addr++; - return addr; -} diff --git a/token.h b/token.h deleted file mode 100644 index f364de6..0000000 --- a/token.h +++ /dev/null @@ -1,40 +0,0 @@ -/* token.h -- defs for interface to token.c - Copyright (C) 1986, 1995 Greg McGary - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _token_h_ -#define _token_h_ - -/* token flags (struct token is in mkid.c) */ -#define TOK_VECTOR 0x01 /* 1 = hits are stored as a vector - 0 = hits are stored as a 8-way tree of bits - mkid chooses whichever is more compact. - vector is more compact for tokens with few hits */ -#define TOK_NUMBER 0x02 /* occurs as a number */ -#define TOK_NAME 0x04 /* occurs as a name */ -#define TOK_STRING 0x08 /* occurs in a string */ -#define TOK_LITERAL 0x10 /* occurs as a literal */ -#define TOK_COMMENT 0x20 /* occurs in a comment */ -#define TOK_UNUSED_1 0x40 -#define TOK_SHORT_COUNT 0x80 /* count is two bytes */ - -#define tok_string(buf) (buf) -unsigned int tok_flags __P((char const *buf)); -unsigned short tok_count __P((char const *buf)); -unsigned char const *tok_hits_addr __P((char const *buf)); - -#endif /* not _token_h_ */ diff --git a/version.texi b/version.texi deleted file mode 100644 index 65e414e..0000000 --- a/version.texi +++ /dev/null @@ -1 +0,0 @@ -@set VERSION 3.0.9 -- cgit v1.2.3