diff options
Diffstat (limited to 'scanners.c')
-rw-r--r-- | scanners.c | 1216 |
1 files changed, 0 insertions, 1216 deletions
diff --git a/scanners.c b/scanners.c deleted file mode 100644 index f2a5d44..0000000 --- a/scanners.c +++ /dev/null @@ -1,1216 +0,0 @@ -/* scanners.c -- file & directory name manipulations - Copyright (C) 1986, 1995 Greg McGary - VHIL portions Copyright (C) 1988 Tom Horsley - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> - -#include <config.h> -#include "strxtra.h" -#include "token.h" -#include "alloc.h" -#include "scanners.h" - -extern char const *program_name; - -static char const *get_token_VHIL __P((FILE *input_FILE, int *flags)); -static char const *get_token_c __P((FILE *input_FILE, int *flags)); -static void set_args_c __P((char const *lang_name, int op, char const *arg)); -static void set_ctype_c __P((char const *chars, int type)); -static void clear_ctype_c __P((char const *chars, int type)); -static void usage_c __P((char const *lang_name)); - -static char const *get_token_asm __P((FILE *input_FILE, int *flags)); -static void set_ctype_asm __P((char const *chars, int type)); -static void clear_ctype_asm __P((char const *chars, int type)); -static void usage_asm __P((char const *lang_name)); -static void set_args_asm __P((char const *lang_name, int op, char const *arg)); - -static char const *get_token_text __P((FILE *input_FILE, int *flags)); -static void set_ctype_text __P((char const *chars, int type)); -static void clear_ctype_text __P((char const *chars, int type)); -static void usage_text __P((char const *lang_name)); -static void set_args_text __P((char const *lang_name, int op, char const *arg)); - -/****************************************************************************/ - -typedef void (*set_args_t) __P((char const *lang_name, int op, char const *arg)); - -struct language -{ - char const *lang_name; - get_token_t lang_get_token; - set_args_t lang_set_args; - char const *lang_filter; - struct language *lang_next; -}; - -struct suffix -{ - char const *suff_suffix; - char const *suff_lang_name; - struct language *suff_language; - struct suffix *suff_next; -}; - -static struct suffix *get_suffix_entry (char const *suffix); -static struct language *get_lang_entry (char const *lang_name); -static void usage_scan (void); - -struct language languages_0[] = -{ - { "C", get_token_c, set_args_c, NULL }, - { "TeX", get_token_text, set_args_text, NULL }, - { "VHIL", get_token_VHIL, set_args_c, NULL }, - { "asm", get_token_asm, set_args_asm, NULL }, -/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/ - { "gzip", NULL, NULL, "zcat %s" }, - { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" }, - { "text", get_token_text, set_args_text, NULL }, -}; -struct language *languages = languages_0; - -/* - This is a rather incomplete list of default associations - between suffixes and languages. You may add more to the - default list, or you may define them dynamically with the - `-S<suff>=<lang>' argument to mkid(1) and idx(1). e.g. to - associate a `.ada' suffix with the Ada language, use - `-S.ada=ada' -*/ -struct suffix suffixes_0[] = -{ - { "", "text" }, - { ".1", "roff" }, - { ".2", "roff" }, - { ".3", "roff" }, - { ".4", "roff" }, - { ".5", "roff" }, - { ".6", "roff" }, - { ".7", "roff" }, - { ".8", "roff" }, - { ".C", "C" }, - { ".H", "C" }, - { ".Z", "gzip" }, - { ".c", "C" }, - { ".cc", "C" }, - { ".cpp", "C" }, - { ".cxx", "C" }, - { ".doc", "text" }, -/*{ ".el", "elisp" },*/ - { ".gz", "gzip" }, - { ".h", "C" }, - { ".hh", "C" }, - { ".hpp", "C" }, - { ".hxx", "C" }, - { ".l", "C" }, - { ".lex", "C" }, - { ".ltx", "TeX" }, - { ".p", "pas" }, - { ".pas", "pas" }, - { ".s", "asm" }, - { ".S", "asm" }, - { ".tex", "TeX" }, - { ".x", "VHIL" }, - { ".y", "C" }, - { ".yacc", "C" }, - { ".z", "gzip" }, -}; -struct suffix *suffixes = suffixes_0; - -void -init_scanners (void) -{ - struct language *lang; - struct language *lang_N = &languages_0[(sizeof (languages_0) / sizeof (languages_0[0])) - 1]; - struct suffix *suff; - struct suffix *suff_N = &suffixes_0[(sizeof (suffixes_0) / sizeof (suffixes_0[0])) - 1]; - - for (lang = languages; lang <= lang_N; ++lang) - lang->lang_next = lang + 1; - lang_N->lang_next = NULL; - - for (suff = suffixes; suff <= suff_N; ++suff) { - lang = get_lang_entry (suff->suff_lang_name); - if (lang) - suff->suff_language = lang; - suff->suff_next = suff + 1; - } - suff_N->suff_next = NULL; -} - -/* Return a suffix table entry for the given suffix. */ -static struct suffix * -get_suffix_entry (char const *suffix) -{ - struct suffix *stp; - - if (suffix == NULL) - suffix = ""; - - for (stp = suffixes; stp; stp = stp->suff_next) - if (strequ (stp->suff_suffix, suffix)) - return stp; - return NULL; -} - -static struct language * -get_lang_entry (char const *lang_name) -{ - struct language *ltp; - - if (lang_name == NULL) - lang_name = ""; - - for (ltp = languages; ltp; ltp = ltp->lang_next) - if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name)) - return ltp; - return ltp; -} - -char const * -get_lang_name (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_name; -} - -char const * -get_filter (char const *suffix) -{ - struct suffix *stp; - - stp = get_suffix_entry (suffix); - if (stp == NULL) - return NULL; - return stp->suff_language->lang_filter; -} - -get_token_t -get_scanner (char const *lang) -{ - struct language *ltp; - - ltp = get_lang_entry (lang); - if (ltp == NULL) - return NULL; - return ltp->lang_get_token; -} - -void -set_scan_args (int op, char *arg) -{ - struct language *ltp, *ltp2; - struct suffix *stp; - char *lhs; - char *lhs2; - int count = 0; - - lhs = arg; - while (isalnum (*arg) || *arg == '.') - arg++; - - if (strequ (lhs, "?=?")) - { - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strnequ (lhs, "?=", 2)) - { - lhs += 2; - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - printf ("No scanner for language `%s'\n", lhs); - return; - } - for (stp = suffixes; stp->suff_next; stp = stp->suff_next) - if (stp->suff_language == ltp) - { - printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - } - if (count) - putchar ('\n'); - return; - } - - if (strequ (arg, "=?")) - { - lhs[strlen (lhs) - 2] = '\0'; - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - printf ("No scanner assigned to suffix `%s'\n", lhs); - return; - } - printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name); - if (stp->suff_language->lang_filter) - printf (" (%s)", stp->suff_language->lang_filter); - printf ("\n"); - return; - } - - if (*arg == '=') - { - *arg++ = '\0'; - - ltp = get_lang_entry (arg); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg); - return; - } - stp = get_suffix_entry (lhs); - if (stp == NULL) - { - stp = CALLOC (struct suffix, 1); - stp->suff_suffix = lhs; - stp->suff_language = ltp; - stp->suff_next = suffixes; - suffixes = stp; - } - else if (!strequ (arg, stp->suff_language->lang_name)) - { - fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name); - stp->suff_language = ltp; - } - return; - } - else if (*arg == '/') - { - *arg++ = '\0'; - ltp = get_lang_entry (lhs); - if (ltp->lang_next == NULL) - { - ltp = CALLOC (struct language, 1); - ltp->lang_name = lhs; - ltp->lang_get_token = get_token_text; - ltp->lang_set_args = set_args_text; - ltp->lang_filter = NULL; - ltp->lang_next = languages; - languages = ltp; - } - lhs2 = arg; - arg = strchr (arg, '/'); - if (arg == NULL) - ltp2 = ltp; - else - { - *arg++ = '\0'; - ltp2 = get_lang_entry (lhs2); - if (ltp2 == NULL) - { - fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2); - ltp2 = ltp; - } - } - ltp->lang_get_token = ltp2->lang_get_token; - ltp->lang_set_args = ltp2->lang_set_args; - if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter))) - fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter); - ltp->lang_filter = arg; - return; - } - - if (op == '+') - { - switch (op = *arg++) - { - case '+': - case '-': - case '?': - break; - default: - usage_scan (); - } - for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next) - (*ltp->lang_set_args) (NULL, op, arg); - return; - } - - if (*arg == '-' || *arg == '+' || *arg == '?') - { - op = *arg; - *arg++ = '\0'; - - ltp = get_lang_entry (lhs); - if (ltp == NULL) - { - fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs); - return; - } - (*ltp->lang_set_args) (lhs, op, arg); - return; - } - - usage_scan (); -} - -static void -usage_scan (void) -{ - fprintf (stderr, "Usage: %s [-S<suffix>=<lang>] [+S(+|-)<arg>] [-S<lang>(+|-)<arg>] [-S<lang>/<lang>/<filter>]\n", program_name); - exit (1); -} - -/*************** C & C++ ****************************************************/ - -#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ -#define DG 0x0002 /* decimal digit [0-9] */ -#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ -#define C1 0x0008 /* C comment introduction char: / */ -#define C2 0x0010 /* C comment termination char: * */ -#define Q1 0x0020 /* single quote: ' */ -#define Q2 0x0040 /* double quote: " */ -#define ES 0x0080 /* escape char: \ */ -#define NL 0x0100 /* newline: \n */ -#define EF 0x0200 /* EOF */ -#define SK 0x0400 /* Make these chars valid for names within strings */ -#define VH 0x0800 /* VHIL comment introduction char: # */ -#define WS 0x1000 /* White space characters */ - -/* - character class membership macros: -*/ -#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ -#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ -#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ -#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ -#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ -#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ -#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ -/* - The `BORING' classes should be skipped over - until something interesting comes along... -*/ -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ -#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ -#define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */ -#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ -#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ - -static unsigned short ctype_c[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, -/*070*/ DG, DG, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -static int eat_underscore = 1; -static int scan_VHIL = 0; - -static char const * -get_token_VHIL (FILE *input_FILE, int *flags) -{ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - return get_token_c (input_FILE, flags); -} - -/* - Grab the next identifier from the C source - file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_c (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - static int new_line = 1; - unsigned short *rct = &ctype_c[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - if (new_line) - { - new_line = 0; - if (c == '.') - { - /* Auto-recognize vhil code when you see a '.' in column 1. - also ignore lines that start with a '.' */ - if (!scan_VHIL) - set_args_c ("vhil", '+', "v"); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - if (c != '#') - goto next; - c = getc (input_FILE); - if (scan_VHIL && ISSPACE (c)) - { - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c == ' ' || c == '\t') - c = getc (input_FILE); - if (c == '\n') - { - new_line = 1; - goto top; - } - id = input_buffer; - if (c == '"') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '"') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (c == '<') - { - c = getc (input_FILE); - while (c != '\n' && c != EOF && c != '>') - { - *id++ = c; - c = getc (input_FILE); - } - *flags = TOK_STRING; - } - else if (ISID1ST (c)) - { - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *flags = TOK_NAME; - } - else - { - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - goto top; - } - while (c != '\n' && c != EOF) - c = getc (input_FILE); - new_line = 1; - *id = '\0'; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "elif") /* ansi C */ - || (scan_VHIL && strequ (input_buffer, "elsif")) - || strequ (input_buffer, "undef")) - goto next; - while ((c != '\n') && (c != EOF)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - switch (c) - { - case '"': - id = input_buffer; - *id++ = c = getc (input_FILE); - for (;;) - { - while (ISQ2BORING (c)) - *id++ = c = getc (input_FILE); - if (c == '\\') - { - *id++ = c = getc (input_FILE); - continue; - } - else if (c != '"') - goto next; - break; - } - *--id = '\0'; - id = input_buffer; - while (ISSTRKEEP (*id)) - id++; - if (*id || id == input_buffer) - { - c = getc (input_FILE); - goto next; - } - *flags = TOK_STRING; - if (eat_underscore && input_buffer[0] == '_' && input_buffer[1]) - return &input_buffer[1]; - else - return input_buffer; - - case '\'': - c = getc (input_FILE); - for (;;) - { - while (ISQ1BORING (c)) - c = getc (input_FILE); - if (c == '\\') - { - c = getc (input_FILE); - continue; - } - else if (c == '\'') - c = getc (input_FILE); - goto next; - } - - case '/': - c = getc (input_FILE); - if (c == '/') - { /* Cope with C++ comment */ - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - } - else if (c != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - goto next; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - - case '\n': - new_line = 1; - goto top; - - case '#': - if (!scan_VHIL) - { - /* Auto-recognize vhil when find a # in the middle of a line. */ - set_args_c ("vhil", '+', "v"); - } - c = getc (input_FILE); - while (ISVBORING (c)) - c = getc (input_FILE); - new_line = 1; - goto top; - default: - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISDIGIT (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - fprintf (stderr, "junk: `\\%3o'", c); - ungetc (c, input_FILE); - *id = '\0'; - *flags |= TOK_LITERAL; - return input_buffer; - } -} - -static void -set_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_c (char const *chars, int type) -{ - unsigned short *rct = &ctype_c[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_c (char const *lang_name) -{ - fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name); - exit (1); -} - -static char document_c[] = "\ -The C scanner arguments take the form -Sc<arg>, where <arg>\n\ -is one of the following: (<cc> denotes one or more characters)\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\ - (+|-)s<cc> . . Allow <cc> in string ids, and (keep|ignore) those ids.\n\ - -v . . . . . . Skip vhil comments."; - -static void -set_args_c (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_c); - return; - } - switch (*arg++) - { - case 'u': - eat_underscore = (op == '+'); - break; - case 's': - if (op == '+') - set_ctype_c (arg, SK); - else - clear_ctype_c (arg, SK); - break; - case 'v': - set_ctype_c ("$", I1); - set_ctype_c ("#", VH); - set_ctype_c (" \t", WS); - scan_VHIL = 1; - break; - default: - if (lang_name) - usage_c (lang_name); - break; - } -} - -#undef I1 -#undef DG -#undef NM -#undef C1 -#undef C2 -#undef Q1 -#undef Q2 -#undef ES -#undef NL -#undef EF -#undef SK -#undef VH -#undef WS -#undef ISDIGIT -#undef ISNUMBER -#undef ISEOF -#undef ISID1ST -#undef ISIDREST -#undef ISSTRKEEP -#undef ISSPACE -#undef ISBORING -#undef ISCBORING -#undef ISVBORING -#undef ISQ1BORING -#undef ISQ2BORING - -/*************** Assembly ***************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define NL 0x04 /* newline: \n */ -#define CM 0x08 /* assembler comment char: usually # or | */ -#define IG 0x10 /* ignore `identifiers' with these chars in them */ -#define C1 0x20 /* C comment introduction char: / */ -#define C2 0x40 /* C comment termination char: * */ -#define EF 0x80 /* EOF */ - -/* Assembly Language character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISCOMMENT(c) ((rct)[c] & (CM)) -#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) -#define ISCBORING(c) (!((rct)[c] & (EF|NL))) -#define ISCCBORING(c) (!((rct)[c] & (EF|C2))) -#define ISIGNORE(c) ((rct)[c] & (IG)) - -static unsigned char ctype_asm[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, C2, 0, 0, 0, 0, C1, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, - -}; - -static int cpp_on_asm = 1; - -/* - Grab the next identifier the assembly language - source file opened with the handle `input_FILE'. - This state machine is built for speed, not elegance. -*/ -static char const * -get_token_asm (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_asm[1]; - int c; - char *id = input_buffer; - static int new_line = 1; - -top: - c = getc (input_FILE); - if (cpp_on_asm > 0 && new_line) - { - new_line = 0; - if (c != '#') - goto next; - while (ISBORING (c)) - c = getc (input_FILE); - if (!ISID1ST (c)) - goto next; - id = input_buffer; - *id++ = c; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - *id = '\0'; - if (strequ (input_buffer, "include")) - { - while (c != '"' && c != '<') - c = getc (input_FILE); - id = input_buffer; - *id++ = c = getc (input_FILE); - while ((c = getc (input_FILE)) != '"' && c != '>') - *id++ = c; - *id = '\0'; - *flags = TOK_STRING; - return input_buffer; - } - if (strnequ (input_buffer, "if", 2) - || strequ (input_buffer, "define") - || strequ (input_buffer, "undef")) - goto next; - while (c != '\n') - c = getc (input_FILE); - new_line = 1; - goto top; - } - -next: - while (ISBORING (c)) - c = getc (input_FILE); - - if (ISCOMMENT (c)) - { - while (ISCBORING (c)) - c = getc (input_FILE); - new_line = 1; - } - - if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - - if (c == '\n') - { - new_line = 1; - goto top; - } - - if (c == '/') - { - if ((c = getc (input_FILE)) != '*') - goto next; - c = getc (input_FILE); - for (;;) - { - while (ISCCBORING (c)) - c = getc (input_FILE); - c = getc (input_FILE); - if (c == '/') - { - c = getc (input_FILE); - break; - } - else if (ISEOF (c)) - { - new_line = 1; - return NULL; - } - } - goto next; - } - - id = input_buffer; - if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE))) - { - ungetc (c, input_FILE); - return "_"; - } - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto next; - } - - *id = '\0'; - for (id = input_buffer; *id; id++) - if (ISIGNORE (*id)) - goto next; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_asm (char const *chars, int type) -{ - unsigned char *rct = &ctype_asm[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_asm (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([-c<cc>] [-u] [(+|-)a<cc>] [(+|-)p] [(+|-)C])\n", program_name, lang_name); - exit (1); -} - -static char document_asm[] = "\ -The Assembler scanner arguments take the form -Sasm<arg>, where\n\ -<arg> is one of the following: (<cc> denotes one or more characters)\n\ - -c<cc> . . . . <cc> introduce(s) a comment until end-of-line.\n\ - (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\ - (+|-)a<cc> . . Allow <cc> in ids, and (keep|ignore) those ids.\n\ - (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\ - (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)"; - -static void -set_args_asm (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_asm); - return; - } - switch (*arg++) - { - case 'a': - set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0)); - break; - case 'c': - set_ctype_asm (arg, CM); - break; - case 'u': - eat_underscore = (op == '+'); - break; - case 'p': - cpp_on_asm = (op == '+'); - break; - case 'C': - if (op == '+') - { - set_ctype_asm ("/", C1); - set_ctype_asm ("*", C2); - } - else - { - clear_ctype_asm ("/", C1); - clear_ctype_asm ("*", C2); - } - break; - default: - if (lang_name) - usage_asm (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef NL -#undef CM -#undef IG -#undef C1 -#undef C2 -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISCOMMENT -#undef ISBORING -#undef ISCBORING -#undef ISCCBORING -#undef ISIGNORE - -/*************** Text *******************************************************/ - -#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ -#define NM 0x02 /* digit [0-9a-fA-FxX] */ -#define SQ 0x04 /* squeeze these out (.,',-) */ -#define EF 0x80 /* EOF */ - -/* Text character classes */ -#define ISID1ST(c) ((rct)[c] & (I1)) -#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) -#define ISNUMBER(c) ((rct)[c] & (NM)) -#define ISEOF(c) ((rct)[c] & (EF)) -#define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) -#define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) - -static unsigned char ctype_text[257] = -{ - EF, -/* 0 1 2 3 4 5 6 7 */ -/* ----- ----- ----- ----- ----- ----- ----- ----- */ -/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*010*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*040*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*050*/ 0, 0, 0, 0, 0, 0, 0, 0, -/*060*/ NM, NM, NM, NM, NM, NM, NM, NM, -/*070*/ NM, NM, 0, 0, 0, 0, 0, 0, -/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, -/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, -/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, -/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, -/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, -}; - -/* - Grab the next identifier the text source file opened with the - handle `input_FILE'. This state machine is built for speed, not - elegance. -*/ -static char const * -get_token_text (FILE *input_FILE, int *flags) -{ - static char input_buffer[BUFSIZ]; - unsigned char *rct = &ctype_text[1]; - int c; - char *id = input_buffer; - -top: - c = getc (input_FILE); - while (ISBORING (c)) - c = getc (input_FILE); - if (ISEOF (c)) - return NULL; - id = input_buffer; - *id++ = c; - if (ISID1ST (c)) - { - *flags = TOK_NAME; - while (ISIDREST (c = getc (input_FILE))) - if (!ISIDSQUEEZE (c)) - *id++ = c; - } - else if (ISNUMBER (c)) - { - *flags = TOK_NUMBER; - while (ISNUMBER (c = getc (input_FILE))) - *id++ = c; - } - else - { - if (isprint (c)) - fprintf (stderr, "junk: `%c'", c); - else - fprintf (stderr, "junk: `\\%03o'", c); - goto top; - } - - *id = '\0'; - ungetc (c, input_FILE); - *flags |= TOK_LITERAL; - return input_buffer; -} - -static void -set_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] |= type; -} - -static void -clear_ctype_text (char const *chars, int type) -{ - unsigned char *rct = &ctype_text[1]; - - while (*chars) - rct[*chars++] &= ~type; -} - -static void -usage_text (char const *lang_name) -{ - fprintf (stderr, "Usage: %s -S%s([(+|-)a<cc>] [(+|-)s<cc>]\n", program_name, lang_name); - exit (1); -} - -static char document_text[] = "\ -The Text scanner arguments take the form -Stext<arg>, where\n\ -<arg> is one of the following: (<cc> denotes one or more characters)\n\ - (+|-)a<cc> . . Include (or exculde) <cc> in ids.\n\ - (+|-)s<cc> . . Squeeze (or don't squeeze) <cc> out of ids."; - -static void -set_args_text (char const *lang_name, int op, char const *arg) -{ - if (op == '?') - { - puts (document_text); - return; - } - switch (*arg++) - { - case 'a': - if (op == '+') - set_ctype_text (arg, I1); - else - clear_ctype_text (arg, I1); - break; - case 's': - if (op == '+') - set_ctype_text (arg, SQ); - else - clear_ctype_text (arg, SQ); - break; - default: - if (lang_name) - usage_text (lang_name); - break; - } -} - -#undef I1 -#undef NM -#undef SQ -#undef EF -#undef ISID1ST -#undef ISIDREST -#undef ISNUMBER -#undef ISEOF -#undef ISBORING -#undef ISIDSQUEEZE |