diff options
author | Jim Meyering <jim@meyering.net> | 1999-01-28 14:36:12 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 1999-01-28 14:36:12 +0000 |
commit | 9b8fb4cc0b90ad12000e68141e7e816c3f727dd9 (patch) | |
tree | c5441e16e305c1359f20c07cadc19bf52722e03b | |
parent | 8f97ee5a25ba2ed94e701d3015ea2d3426efe033 (diff) | |
download | idutils-9b8fb4cc0b90ad12000e68141e7e816c3f727dd9.tar.gz idutils-9b8fb4cc0b90ad12000e68141e7e816c3f727dd9.tar.bz2 idutils-9b8fb4cc0b90ad12000e68141e7e816c3f727dd9.zip |
.
-rw-r--r-- | lib/scanners.c | 1201 | ||||
-rw-r--r-- | lib/scanners.h | 67 |
2 files changed, 0 insertions, 1268 deletions
diff --git a/lib/scanners.c b/lib/scanners.c deleted file mode 100644 index 4f50245..0000000 --- a/lib/scanners.c +++ /dev/null @@ -1,1201 +0,0 @@ -/* scanners.c -- file & directory name manipulations - Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <getopt.h> - -#include <config.h> -#include "system.h" -#include "error.h" -#include "strxtra.h" -#include "token.h" -#include "alloc.h" -#include "scanners.h" - -#define DEBUG(args) /* printf args */ - -struct obstack lang_args_obstack; -struct lang_args *lang_args_default = 0; -struct lang_args *lang_args_list = 0; -struct obstack tokens_obstack; - -extern void usage __P((void)); -extern char *program_name; - -/****************************************************************************/ - -struct lang_args **parse_language_map_file __P((char const *file_name, struct lang_args **next_ptr)); -char *read_language_map_file __P((char const *file_name)); - -static struct token *get_token_c __P((FILE *in_FILE, void const *args, int *flags)); -static void *parse_args_c __P((char **argv, int argc)); -static void help_me_c __P((void)); - -static struct token *get_token_asm __P((FILE *in_FILE, void const *args, int *flags)); -static void *parse_args_asm __P((char **argv, int argc)); -static void help_me_asm __P((void)); - -static struct token *get_token_text __P((FILE *in_FILE, void const *args, int *flags)); -static void *parse_args_text __P((char **argv, int argc)); -static void help_me_text __P((void)); - -struct language languages_0[] = -{ - { "C", parse_args_c, get_token_c, help_me_c }, - { "asm", parse_args_asm, get_token_asm, help_me_asm }, - { "text", parse_args_text, get_token_text, help_me_text }, -}; -struct language const *languages_N = &languages_0[cardinalityof (languages_0)]; - -void -language_help_me (void) -{ - struct language *lang; - for (lang = languages_0; lang < languages_N; lang++) - { - putchar ('\n'); - (*lang->lg_help_me) (); - } -} - -void -language_save_arg (char *arg) -{ - static char horizontal_space[] = " \t"; - char *lang_name = strtok (arg, ":"); - struct language *lang = get_language (lang_name); - - if (lang == 0) - { - fprintf (stderr, _("unrecognized language: `%s'\n"), lang_name); - usage (); - } - if (lang->lg_argc == 0) - lang->lg_argv[lang->lg_argc++] = program_name; - lang->lg_argv[lang->lg_argc++] = strtok (0, horizontal_space); -} - -void -language_getopt () -{ - struct language *lang; - - for (lang = languages_0; lang < languages_N; lang++) - if (lang->lg_argc) - lang->lg_parse_args (lang->lg_argv, lang->lg_argc); -} - -struct language * -get_language (char const *lang_name) -{ - struct language *lang; - - for (lang = languages_0; lang < languages_N; lang++) - if (strequ (lang_name, lang->lg_name)) - { - DEBUG (("lang=%s", lang_name)); - return lang; - } - DEBUG (("!lang=%s", lang_name)); - return 0; -} - -/****************************************************************************/ - -int lang_args_index = 0; - -void -parse_language_map (char const *file_name) -{ - if (obstack_init (&lang_args_obstack) == 0) - error (1, 0, _("can't allocate language args obstack: memory exhausted")); - if (file_name == 0) - file_name = LANGUAGE_MAP; - parse_language_map_file (file_name, &lang_args_list); -} - -struct lang_args ** -parse_language_map_file (char const *file_name, struct lang_args **next_ptr) -{ - static char white_space[] = " \t\r\n\v\f"; - static char horizontal_space[] = " \t"; - static char vertical_space[] = "\r\n\v\f"; - char *lang_map_buffer; - char *lmp; - - lmp = lang_map_buffer = read_language_map_file (file_name); - for (;;) - { - struct lang_args *new_args; - struct language const *lang; - int pattern_size; - char *lang_name; - int space; - - /* Skip leading white space and full-line comments */ - while (*lmp) - { - lmp += strspn (lmp, white_space); - if (*lmp != '#') - break; - lmp += strcspn (lmp, vertical_space); - } - if (*lmp == '\0') - break; - - pattern_size = strcspn (lmp, white_space); - if (pattern_size == 3 && strnequ (lmp, "***", 3)) - { - lmp += pattern_size; - lmp += strspn (lmp, horizontal_space); - if (isspace (*lmp)) - next_ptr = parse_language_map_file (LANGUAGE_MAP, next_ptr); - else - { - char *end = lmp + strcspn (lmp, white_space); - *end = '\0'; - next_ptr = parse_language_map_file (lmp, next_ptr); - lmp = end + 1; - } - continue; - } - - new_args = OBSTACK_ALLOC (&lang_args_obstack, struct lang_args, 1); - if (new_args == 0) - error (1, 0, _("can't allocate language args: memory exhausted")); - new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp, pattern_size); - new_args->la_args_string = 0; - lmp += pattern_size; - lmp += strspn (lmp, horizontal_space); - if (isspace (*lmp)) - { - error (0, 0, _("language name expected following `%s' in file `%s'"), - new_args->la_pattern, file_name); - obstack_free (&lang_args_obstack, new_args); - continue; - } - lang_name = lmp; - lmp += strcspn (lmp, white_space); - space = *lmp; - *lmp++ = '\0'; - lmp += strspn (lmp, horizontal_space); - lang = new_args->la_language = get_language (lang_name); - - if (*lmp == '#') - lmp += strcspn (lmp, vertical_space); - else if (!isspace (*lmp) && (space == ' ' || space == '\t')) - { - int args_size = strcspn (lmp, vertical_space); - new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp, args_size); - lmp += args_size; - } - new_args->la_args_digested = (lang - ? lang->lg_parse_args (&new_args->la_args_string, 0) - : 0); - if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2)) - { - if (lang_args_default) - { - obstack_free (&lang_args_obstack, new_args); - continue; - } - lang_args_default = new_args; - DEBUG ((", <default>")); - } - else - { - new_args->la_index = lang_args_index++; - *next_ptr = new_args; - next_ptr = &new_args->la_next; - } - DEBUG ((", pat=%s\n", new_args->la_pattern)); - } - free (lang_map_buffer); - return next_ptr; -} - -char * -read_language_map_file (char const *file_name) -{ - int map_fd; - char *lang_map_buffer; - struct stat st; - int bytes; - - map_fd = open (file_name, O_RDONLY); - if (map_fd < 0) - error (1, errno, _("can't open language map file `%s'"), file_name); - if (fstat (map_fd, &st) < 0) - error (1, errno, _("can't get size of map file `%s'"), file_name); - - lang_map_buffer = MALLOC (char, st.st_size + 2); - if (lang_map_buffer == 0) - error (1, 0, _("can't allocate language args: memory exhausted")); - lang_map_buffer[st.st_size] = '\n'; - lang_map_buffer[st.st_size+1] = '\0'; - - bytes = read (map_fd, lang_map_buffer, st.st_size); - if (bytes < 0) - error (1, errno, _("can't read language map file `%s'"), file_name); - /* FIXME: handle interrupted & partial reads */ - if (bytes != st.st_size) - error (1, errno, _("can't read entire language map file `%s'"), file_name); - - close (map_fd); - return lang_map_buffer; -} - -/****************************************************************************/ - -void -tokenize_args_string (char *args_string, int *argcp, char ***argvp) -{ - static char horizontal_space[] = " \t"; - char **argv_0 = MALLOC (char *, strlen (args_string) / 2); - char **argv = argv_0; - char *arg; - - *argv++ = program_name; - arg = strtok (args_string, horizontal_space); - while (arg) - { - *argv++ = arg; - arg = strtok (0, horizontal_space); - } - *argcp = argv - argv_0; - *argvp = REALLOC (argv_0, char *, *argcp); -} - -static void -set_ushort_ctype (unsigned short *ctype, char const *chars, int type) -{ - unsigned short *rct = &ctype[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ushort_ctype (unsigned short *ctype, char const *chars, int type) -{ - unsigned short *rct = &ctype[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -set_uchar_ctype (unsigned char *ctype, char const *chars, int type) -{ - unsigned char *rct = &ctype[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_uchar_ctype (unsigned char *ctype, char const *chars, int type) -{ - unsigned char *rct = &ctype[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -/*************** C & C++ ****************************************************/ - -#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ -#define DG 0x0002 /* decimal digit [0-9] */ -#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ -#define C1 0x0008 /* C comment introduction char: / */ -#define C2 0x0010 /* C comment termination char: * */ -#define Q1 0x0020 /* single quote: ' */ -#define Q2 0x0040 /* double quote: " */ -#define ES 0x0080 /* escape char: \ */ -#define NL 0x0100 /* newline: \n */ -#define EF 0x0200 /* EOF */ -#define SK 0x0400 /* Make these chars valid for names within strings */ -#define VH 0x0800 /* VHIL comment introduction char: # */ -#define WS 0x1000 /* White space characters */ - -/* character class membership macros: */ - -#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ -#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ -#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ -#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ -#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ -#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ -#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ - -/* The `BORING' classes should be skipped over until something - interesting comes along... */ - -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ -#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ -#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */ -#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ -#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ - -static unsigned short ctype_c[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, -/*070*/ DG, DG, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, - /* FIXME: latin-1 */ -}; - -struct args_c -{ - int strip_underscore; - unsigned short *ctype; -}; - -static struct args_c args_c = { 0, ctype_c }; - -static struct option const long_options_c[] = -{ - { "keep", required_argument, 0, 'k' }, - { "ignore", required_argument, 0, 'i' }, - { "strip-underscore", no_argument, 0, 'u' }, - { 0 } -}; - -static void -help_me_c (void) -{ - printf (_("\ -C language:\n\ - -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\ - -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\ - -u,--strip-underscore Strip a leading underscore from single-token strings\n\ -")); -} - -static void * -parse_args_c (char **argv, int argc) -{ - char *tmp_string = 0; - struct args_c *args; - - if (argv == 0 || *argv == 0) - return &args_c; - - if (argc) - args = &args_c; - else - { - tmp_string = strdup (*argv); - tokenize_args_string (tmp_string, &argc, &argv); - args = MALLOC (struct args_c, 1); - args->strip_underscore = 0; - args->ctype = ctype_c; - } - - optind = 0; - for (;;) - { - int optc = getopt_long (argc, argv, "k:i:u", - long_options_c, (int *) 0); - if (optc < 0) - break; - if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c) - args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c)); - switch (optc) - { - case 'k': - set_ushort_ctype (args->ctype, optarg, SK); - break; - - case 'i': - clear_ushort_ctype (args->ctype, optarg, SK); - break; - - case 'u': - args->strip_underscore = 1; - break; - - default: - usage (); - } - } - if (tmp_string) - { - free (argv); - free (tmp_string); - } - return args; -} - - -/* Grab the next identifier from the C source file. This state - machine is built for speed, not elegance. */ - -static struct token * -get_token_c (FILE *in_FILE, void const *args, int *flags) -{ -#define ARGS ((struct args_c *) args) - static int new_line = 1; - unsigned short *rct = &ARGS->ctype[1]; - char id_0[BUFSIZ]; - char *id = id_0; - int c; - - obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); - -top: - c = getc (in_FILE); - if (new_line) - { - new_line = 0; - if (c != '#') - goto next; - c = getc (in_FILE); - while (ISBORING (c)) - c = getc (in_FILE); - if (!ISID1ST (c)) - goto next; - id = id_0; - *id++ = c; - while (ISIDREST (c = getc (in_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (id_0, "include")) - { - while (c == ' ' || c == '\t') - c = getc (in_FILE); - if (c == '\n') - { - new_line = 1; - goto top; - } - id = id_0; - if (c == '"') - { - c = getc (in_FILE); - while (c != '\n' && c != EOF && c != '"') - { - *id++ = c; - c = getc (in_FILE); - } - *flags = TOK_STRING; - } - else if (c == '<') - { - c = getc (in_FILE); - while (c != '\n' && c != EOF && c != '>') - { - *id++ = c; - c = getc (in_FILE); - } - *flags = TOK_STRING; - } - else if (ISID1ST (c)) - { - *id++ = c; - while (ISIDREST (c = getc (in_FILE))) - *id++ = c; - *flags = TOK_NAME; - } - else - { - while (c != '\n' && c != EOF) - c = getc (in_FILE); - new_line = 1; - goto top; - } - while (c != '\n' && c != EOF) - c = getc (in_FILE); - new_line = 1; - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); - } - if (strnequ (id_0, "if", 2) - || strequ (id_0, "define") - || strequ (id_0, "elif") /* ansi C */ - || strequ (id_0, "undef")) - goto next; - while ((c != '\n') && (c != EOF)) - c = getc (in_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (in_FILE); - - switch (c) - { - case '"': - id = id_0; - *id++ = c = getc (in_FILE); - for (;;) - { - while (ISQ2BORING (c)) - *id++ = c = getc (in_FILE); - if (c == '\\') - { - *id++ = c = getc (in_FILE); - continue; - } - else if (c != '"') - goto next; - break; - } - *--id = '\0'; - id = id_0; - while (ISSTRKEEP (*id)) - id++; - if (*id || id == id_0) - { - c = getc (in_FILE); - goto next; - } - *flags = TOK_STRING; - if (ARGS->strip_underscore && id_0[0] == '_' && id_0[1]) - obstack_grow0 (&tokens_obstack, id_0 + 1, id - id_0 - 1); - else - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); - - case '\'': - c = getc (in_FILE); - for (;;) - { - while (ISQ1BORING (c)) - c = getc (in_FILE); - if (c == '\\') - { - c = getc (in_FILE); - continue; - } - else if (c == '\'') - c = getc (in_FILE); - goto next; - } - - case '/': - c = getc (in_FILE); - if (c == '/') - { /* Cope with C++ comment */ - while (ISCCBORING (c)) - c = getc (in_FILE); - new_line = 1; - goto top; - } - else if (c != '*') - goto next; - c = getc (in_FILE); - for (;;) - { - while (ISCBORING (c)) - c = getc (in_FILE); - c = getc (in_FILE); - if (c == '/') - { - c = getc (in_FILE); - goto next; - } - else if (ISEOF (c)) - { - new_line = 1; - obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); - return 0; - } - } - - case '\n': - new_line = 1; - goto top; - - default: - if (ISEOF (c)) - { - new_line = 1; - obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); - return 0; - } - id = id_0; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (in_FILE))) - *id++ = c; - } - else if (ISDIGIT (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (in_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, _("junk: `%c'"), c); - else - fprintf (stderr, _("junk: `\\%03o'"), c); - } - ungetc (c, in_FILE); - *flags |= TOK_LITERAL; - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); - } -#undef ARGS -} - -#undef I1 -#undef DG -#undef NM -#undef C1 -#undef C2 -#undef Q1 -#undef Q2 -#undef ES -#undef NL -#undef EF -#undef SK -#undef VH -#undef WS -#undef ISDIGIT -#undef ISNUMBER -#undef ISEOF -#undef ISID1ST -#undef ISIDREST -#undef ISSTRKEEP -#undef ISSPACE -#undef ISBORING -#undef ISCBORING -#undef ISCCBORING -#undef ISQ1BORING -#undef ISQ2BORING - -/*************** Assembly ***************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define NL 0x04 /* newline: \n */ -#define CM 0x08 /* assembler comment char: usually # or | */ -#define IG 0x10 /* ignore `identifiers' with these chars in them */ -#define C1 0x20 /* C comment introduction char: / */ -#define C2 0x40 /* C comment termination char: * */ -#define EF 0x80 /* EOF */ - -/* Assembly Language character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISCOMMENT(c) ((rct)[c] & (CM)) -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) -#define ISCBORING(c) (!((rct)[c] & (EF|NL))) -#define ISCCBORING(c) (!((rct)[c] & (EF|C2))) -#define ISIGNORE(c) ((rct)[c] & (IG)) - -static unsigned char ctype_asm[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, - -}; - -struct args_asm -{ - int handle_cpp; - int strip_underscore; - unsigned char *ctype; -}; - -static struct args_asm args_asm = { 1, 0, ctype_asm }; - -static struct option const long_options_asm[] = -{ - { "comment", required_argument, 0, 'c' }, - { "keep", required_argument, 0, 'k' }, - { "ignore", required_argument, 0, 'i' }, - { "strip-underscore", no_argument, 0, 'u' }, - { "no-cpp", no_argument, 0, 'p' }, - { 0 } -}; - -static void -help_me_asm (void) -{ - printf (_("\ -Assembly language:\n\ - -c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\ - -k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\ - -i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\ - -u,--strip-underscore Strip a leading underscore from tokens\n\ - -n,--no-cpp Don't handle C pre-processor directives\n\ -")); -} - -static void * -parse_args_asm (char **argv, int argc) -{ - char *tmp_string = 0; - struct args_asm *args; - - if (argv == 0 || *argv == 0) - return &args_asm; - - if (argc) - args = &args_asm; - else - { - tmp_string = strdup (*argv); - tokenize_args_string (tmp_string, &argc, &argv); - args = MALLOC (struct args_asm, 1); - args->strip_underscore = 0; - args->ctype = ctype_asm; - } - - optind = 0; - for (;;) - { - int optc = getopt_long (argc, argv, "c:k:i:un", - long_options_asm, (int *) 0); - if (optc < 0) - break; - if ((optc == 'k' || optc == 'i' || optc == 'c') - && args->ctype == ctype_asm) - args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof (ctype_asm)); - switch (optc) - { - case 'c': - set_uchar_ctype (args->ctype, optarg, CM); - break; - - case 'k': - set_uchar_ctype (args->ctype, optarg, I1); - break; - - case 'i': - set_uchar_ctype (args->ctype, optarg, I1 | IG); - break; - - case 'u': - args->strip_underscore = 1; - break; - - case 'n': - args->handle_cpp = 0; - break; - - default: - usage (); - } - } - if (tmp_string) - { - free (argv); - free (tmp_string); - } - return args; -} - -/* Grab the next identifier the assembly language source file. This - state machine is built for speed, not elegance. */ - -static struct token * -get_token_asm (FILE *in_FILE, void const *args, int *flags) -{ -#define ARGS ((struct args_asm *) args) - static int new_line = 1; - unsigned char *rct = &ARGS->ctype[1]; - char id_0[BUFSIZ]; - char *id = id_0; - int c; - - obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); - -top: - c = getc (in_FILE); - if (ARGS->handle_cpp > 0 && new_line) - { - new_line = 0; - if (c != '#') - goto next; - while (ISBORING (c)) - c = getc (in_FILE); - if (!ISID1ST (c)) - goto next; - id = id_0; - *id++ = c; - while (ISIDREST (c = getc (in_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (id_0, "include")) - { - while (c != '"' && c != '<') - c = getc (in_FILE); - id = id_0; - *id++ = c = getc (in_FILE); - while ((c = getc (in_FILE)) != '"' && c != '>') - *id++ = c; - *flags = TOK_STRING; - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); - } - if (strnequ (id_0, "if", 2) - || strequ (id_0, "define") - || strequ (id_0, "undef")) - goto next; - while (c != '\n') - c = getc (in_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (in_FILE); - - if (ISCOMMENT (c)) - { - while (ISCBORING (c)) - c = getc (in_FILE); - new_line = 1; - } - - if (ISEOF (c)) - { - new_line = 1; - obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); - return 0; - } - - if (c == '\n') - { - new_line = 1; - goto top; - } - - if (c == '/') - { - if ((c = getc (in_FILE)) != '*') - goto next; - c = getc (in_FILE); - for (;;) - { - while (ISCCBORING (c)) - c = getc (in_FILE); - c = getc (in_FILE); - if (c == '/') - { - c = getc (in_FILE); - break; - } - else if (ISEOF (c)) - { - new_line = 1; - obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); - return 0; - } - } - goto next; - } - - id = id_0; - if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE))) - { - obstack_grow0 (&tokens_obstack, "_", 1); - return obstack_finish (&tokens_obstack); - } - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (in_FILE))) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (in_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, _("junk: `%c'"), c); - else - fprintf (stderr, _("junk: `\\%03o'"), c); - goto next; - } - - *id = '\0'; - for (id = id_0; *id; id++) - if (ISIGNORE (*id)) - goto next; - ungetc (c, in_FILE); - *flags |= TOK_LITERAL; - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); -#undef ARGS -} - -#undef I1 -#undef NM -#undef NL -#undef CM -#undef IG -#undef C1 -#undef C2 -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISCOMMENT -#undef ISBORING -#undef ISCBORING -#undef ISCCBORING -#undef ISIGNORE - -/*************** Text *******************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define SQ 0x04 /* squeeze these out (.,',-) */ -#define EF 0x80 /* EOF */ - -/* Text character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) -#define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) - -static unsigned char ctype_text[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -/*200*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*210*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*220*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*230*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*240*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*250*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*260*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*270*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*300*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*310*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*320*/ I1, I1, I1, I1, I1, I1, I1, 0, -/*330*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*340*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*350*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*360*/ I1, I1, I1, I1, I1, I1, I1, 0, -/*370*/ I1, I1, I1, I1, I1, I1, I1, I1, -}; - -struct args_text -{ - unsigned char *ctype; -}; - -static struct args_text args_text = { ctype_text }; - -static struct option const long_options_text[] = -{ - { "include", required_argument, 0, 'i' }, - { "exclude", required_argument, 0, 'x' }, - { 0 } -}; - -static void -help_me_text (void) -{ - printf (_("\ -Text language:\n\ - -i,--include=CHAR-CLASS Include characters from CHAR-CLASS in tokens\n\ - -x,--exclude=CHAR-CLASS Exclude characters from CHAR-CLASS from tokens\n\ -")); -} - -static void * -parse_args_text (char **argv, int argc) -{ - char *tmp_string = 0; - struct args_text *args; - - if (argv == 0 || *argv == 0) - return &args_text; - - if (argc) - args = &args_text; - else - { - tmp_string = strdup (*argv); - tokenize_args_string (tmp_string, &argc, &argv); - args = MALLOC (struct args_text, 1); - args->ctype = ctype_text; - } - - optind = 0; - for (;;) - { - int optc = getopt_long (argc, argv, "i:x:", - long_options_text, (int *) 0); - if (optc < 0) - break; - if ((optc == 'k' || optc == 'i') && args->ctype == ctype_text) - args->ctype = CLONE (ctype_text, unsigned char, cardinalityof (ctype_text)); - switch (optc) - { - case 'i': - set_uchar_ctype (args->ctype, optarg, I1); - break; - - case 'x': - clear_uchar_ctype (args->ctype, optarg, I1); - break; - - default: - usage (); - } - } - if (tmp_string) - { - free (argv); - free (tmp_string); - } - return args; -} - -/* Grab the next identifier the text source file. This state machine - is built for speed, not elegance. */ - -static struct token * -get_token_text (FILE *in_FILE, void const *args, int *flags) -{ -#define ARGS ((struct args_text *) args) - static char id_0[BUFSIZ]; - unsigned char *rct = &ARGS->ctype[1]; - int c; - char *id = id_0; - - obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); - -top: - c = getc (in_FILE); - while (ISBORING (c)) - c = getc (in_FILE); - if (ISEOF (c)) - { - obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); - return 0; - } - id = id_0; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (in_FILE))) - if (!ISIDSQUEEZE (c)) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (in_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, _("junk: `%c'"), c); - else - fprintf (stderr, _("junk: `\\%03o'"), c); - goto top; - } - - ungetc (c, in_FILE); - *flags |= TOK_LITERAL; - obstack_grow0 (&tokens_obstack, id_0, id - id_0); - return obstack_finish (&tokens_obstack); -#undef ARGS -} - -#undef I1 -#undef NM -#undef SQ -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISBORING -#undef ISIDSQUEEZE diff --git a/lib/scanners.h b/lib/scanners.h deleted file mode 100644 index 3c65a67..0000000 --- a/lib/scanners.h +++ /dev/null @@ -1,67 +0,0 @@ -/* scanners.h -- defs for interface to scanners.c - Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef _scanners_h_ -#define _scanners_h_ - -#include <obstack.h> - -#define MAX_LEVELS 5 /* log_8 of the max # of files: log_8 (32768) == 5 */ - -struct token -{ - unsigned short tok_count; - unsigned char tok_flags; - unsigned char tok_hits[MAX_LEVELS]; - char tok_name[1]; -}; - -typedef struct token *(*get_token_func_t) __P((FILE *in_FILE, void const *args, int *flags)); -typedef void *(*parse_args_func_t) __P((char **argv, int argc)); -typedef void (*help_me_func_t) __P((void)); - -struct language -{ - char const *lg_name; - parse_args_func_t lg_parse_args; - get_token_func_t lg_get_token; - help_me_func_t lg_help_me; - int lg_argc; - char *lg_argv[16]; -}; - -struct lang_args -{ - struct language const *la_language; - char const *la_pattern; /* fnmatch(3) pattern */ - char *la_args_string; /* human-readable scanner args */ - void const *la_args_digested; /* pre-parsed scanner args */ - int la_index; - struct lang_args *la_next; -}; - -extern void language_help_me __P((void)); -extern void language_save_arg __P((char *arg)); -extern struct language *get_language __P((char const *lang_name)); -extern void parse_language_map __P((char const *file_name)); - -extern struct lang_args *lang_args_default; -extern struct lang_args *lang_args_list; - -extern struct obstack tokens_obstack; - -#endif /* not _scanners_h_ */ |