/* scanners.c -- file & directory name manipulations Copyright (C) 1986, 1995 Greg McGary VHIL portions Copyright (C) 1988 Tom Horsley This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "config.h" #include #include #include #include "strxtra.h" #include "token.h" #include "alloc.h" #include "scanners.h" extern char const *program_name; static char const *get_token_VHIL (FILE *input_FILE, int *flags); static char const *get_token_c (FILE *input_FILE, int *flags); static void set_args_c (char const *lang_name, int op, char const *arg); static void set_ctype_c (char const *chars, int type); static void clear_ctype_c (char const *chars, int type); static void usage_c (char const *lang_name); static char const *get_token_asm (FILE *input_FILE, int *flags); static void set_ctype_asm (char const *chars, int type); static void clear_ctype_asm (char const *chars, int type); static void usage_asm (char const *lang_name); static void set_args_asm (char const *lang_name, int op, char const *arg); static char const *get_token_text (FILE *input_FILE, int *flags); static void set_ctype_text (char const *chars, int type); static void clear_ctype_text (char const *chars, int type); static void usage_text (char const *lang_name); static void set_args_text (char const *lang_name, int op, char const *arg); /****************************************************************************/ struct language { char const *lang_name; char const *(*lang_get_token) (FILE *input_FILE, int *flags); void (*lang_set_args) (char const *lang_name, int op, char const *arg); char const *lang_filter; struct language *lang_next; }; struct suffix { char const *suff_suffix; char const *suff_lang_name; struct language *suff_language; struct suffix *suff_next; }; static struct suffix *get_suffix_entry (char const *suffix); static struct language *get_lang_entry (char const *lang_name); static void usage_scan (void); struct language languages[] = { /* must be sorted for bsearch(3) */ { "C", get_token_c, set_args_c, NULL }, { "TeX", get_token_text, set_args_text, NULL }, { "VHIL", get_token_VHIL, set_args_c, NULL }, { "asm", get_token_asm, set_args_asm, NULL }, /*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/ { "gzip", NULL, NULL, "zcat %s" }, { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" }, { "text", get_token_text, set_args_text, NULL }, }; /* This is a rather incomplete list of default associations between suffixes and languages. You may add more to the default list, or you may define them dynamically with the `-S=' argument to mkid(1) and idx(1). e.g. to associate a `.ada' suffix with the Ada language, use `-S.ada=ada' */ struct suffix suffixes[] = { { "", "text" }, { ".1", "roff" }, { ".2", "roff" }, { ".3", "roff" }, { ".4", "roff" }, { ".5", "roff" }, { ".6", "roff" }, { ".7", "roff" }, { ".8", "roff" }, { ".C", "C" }, { ".H", "C" }, { ".Z", "gzip" }, { ".c", "C" }, { ".cc", "C" }, { ".cpp", "C" }, { ".cxx", "C" }, { ".doc", "text" }, /*{ ".el", "elisp" },*/ { ".gz", "gzip" }, { ".h", "C" }, { ".hh", "C" }, { ".hpp", "C" }, { ".hxx", "C" }, { ".l", "C" }, { ".lex", "C" }, { ".ltx", "TeX" }, { ".p", "pas" }, { ".pas", "pas" }, { ".s", "asm" }, { ".S", "asm" }, { ".tex", "TeX" }, { ".x", "VHIL" }, { ".y", "C" }, { ".yacc", "C" }, { ".z", "gzip" }, }; void init_scanners (void) { struct language *lang; struct language *lang_N = &languages[(sizeof (languages) / sizeof (languages[0])) - 1]; struct suffix *suff; struct suffix *suff_N = &suffixes[(sizeof (suffixes) / sizeof (suffixes[0])) - 1]; for (lang = languages; lang <= lang_N; ++lang) lang->lang_next = lang + 1; lang_N->lang_next = NULL; for (suff = suffixes; suff <= suff_N; ++suff) { lang = get_lang_entry (suff->suff_lang_name); if (lang) suff->suff_language = lang; suff->suff_next = suff + 1; } suff_N->suff_next = NULL; } /* Return a suffix table entry for the given suffix. */ static struct suffix * get_suffix_entry (char const *suffix) { struct suffix *stp; if (suffix == NULL) suffix = ""; for (stp = suffixes; stp; stp = stp->suff_next) if (strequ (stp->suff_suffix, suffix)) return stp; return stp; } static struct language * get_lang_entry (char const *lang_name) { struct language *ltp; if (lang_name == NULL) lang_name = ""; for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next) if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name)) return ltp; return ltp; } char const * get_lang_name (char const *suffix) { struct suffix *stp; stp = get_suffix_entry (suffix); if (stp->suff_next == NULL) return NULL; return stp->suff_language->lang_name; } char const * get_filter (char const *suffix) { struct suffix *stp; stp = get_suffix_entry (suffix); if (stp->suff_next == NULL) return NULL; return stp->suff_language->lang_filter; } char const *(* get_scanner (char const *lang) ) (FILE *input_FILE, int *flags) { struct language *ltp; ltp = get_lang_entry (lang); if (ltp->lang_next == NULL) return NULL; return ltp->lang_get_token; } void set_scan_args (int op, char *arg) { struct language *ltp, *ltp2; struct suffix *stp; char *lhs; char *lhs2; int count = 0; lhs = arg; while (isalnum (*arg) || *arg == '.') arg++; if (strequ (lhs, "?=?")) { for (stp = suffixes; stp->suff_next; stp = stp->suff_next) { printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name); if (stp->suff_language->lang_filter) printf (" (%s)", stp->suff_language->lang_filter); } if (count) putchar ('\n'); return; } if (strnequ (lhs, "?=", 2)) { lhs += 2; ltp = get_lang_entry (lhs); if (ltp->lang_next == NULL) { printf ("No scanner for language `%s'\n", lhs); return; } for (stp = suffixes; stp->suff_next; stp = stp->suff_next) if (stp->suff_language == ltp) { printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name); if (stp->suff_language->lang_filter) printf (" (%s)", stp->suff_language->lang_filter); } if (count) putchar ('\n'); return; } if (strequ (arg, "=?")) { lhs[strlen (lhs) - 2] = '\0'; stp = get_suffix_entry (lhs); if (stp->suff_next == NULL) { printf ("No scanner assigned to suffix `%s'\n", lhs); return; } printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name); if (stp->suff_language->lang_filter) printf (" (%s)", stp->suff_language->lang_filter); printf ("\n"); return; } if (*arg == '=') { *arg++ = '\0'; ltp = get_lang_entry (arg); if (ltp->lang_next == NULL) { fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg); return; } stp = get_suffix_entry (lhs); if (stp->suff_next == NULL) { stp->suff_suffix = lhs; stp->suff_language = ltp; stp->suff_next = CALLOC (struct suffix, 1); } else if (!strequ (arg, stp->suff_language->lang_name)) { fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name); stp->suff_language = ltp; } return; } else if (*arg == '/') { *arg++ = '\0'; ltp = get_lang_entry (lhs); if (ltp->lang_next == NULL) { ltp->lang_name = lhs; ltp->lang_get_token = get_token_text; ltp->lang_set_args = set_args_text; ltp->lang_filter = NULL; ltp->lang_next = CALLOC (struct language, 1); } lhs2 = arg; arg = strchr (arg, '/'); if (arg == NULL) ltp2 = ltp; else { *arg++ = '\0'; ltp2 = get_lang_entry (lhs2); if (ltp2->lang_next == NULL) { fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2); ltp2 = ltp; } } ltp->lang_get_token = ltp2->lang_get_token; ltp->lang_set_args = ltp2->lang_set_args; if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter))) fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter); ltp->lang_filter = arg; return; } if (op == '+') { switch (op = *arg++) { case '+': case '-': case '?': break; default: usage_scan (); } for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next) (*ltp->lang_set_args) (NULL, op, arg); return; } if (*arg == '-' || *arg == '+' || *arg == '?') { op = *arg; *arg++ = '\0'; ltp = get_lang_entry (lhs); if (ltp->lang_next == NULL) { fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs); return; } (*ltp->lang_set_args) (lhs, op, arg); return; } usage_scan (); } static void usage_scan (void) { fprintf (stderr, "Usage: %s [-S=] [+S(+|-)] [-S(+|-)] [-S//]\n", program_name); exit (1); } /*************** C & C++ ****************************************************/ #define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ #define DG 0x0002 /* decimal digit [0-9] */ #define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ #define C1 0x0008 /* C comment introduction char: / */ #define C2 0x0010 /* C comment termination char: * */ #define Q1 0x0020 /* single quote: ' */ #define Q2 0x0040 /* double quote: " */ #define ES 0x0080 /* escape char: \ */ #define NL 0x0100 /* newline: \n */ #define EF 0x0200 /* EOF */ #define SK 0x0400 /* Make these chars valid for names within strings */ #define VH 0x0800 /* VHIL comment introduction char: # */ #define WS 0x1000 /* White space characters */ /* character class membership macros: */ #define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ #define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ #define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ #define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ #define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ #define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ #define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ /* The `BORING' classes should be skipped over until something interesting comes along... */ #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ #define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ #define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */ #define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ #define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ static short ctype_c[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, /*050*/ 0, 0, C2, 0, 0, 0, 0, C1, /*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /*070*/ DG, DG, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, }; static int eat_underscore = 1; static int scan_VHIL = 0; static char const * get_token_VHIL (FILE *input_FILE, int *flags) { if (!scan_VHIL) set_args_c ("vhil", '+', "v"); return get_token_c (input_FILE, flags); } /* Grab the next identifier the C source file opened with the handle `input_FILE'. This state machine is built for speed, not elegance. */ static char const * get_token_c (FILE *input_FILE, int *flags) { static char input_buffer[BUFSIZ]; static int new_line = 1; short *rct = &ctype_c[1]; int c; char *id = input_buffer; top: c = getc (input_FILE); if (new_line) { new_line = 0; if (c == '.') { /* Auto-recognize vhil code when you see a '.' in column 1. also ignore lines that start with a '.' */ if (!scan_VHIL) set_args_c ("vhil", '+', "v"); while (ISVBORING (c)) c = getc (input_FILE); new_line = 1; goto top; } if (c != '#') goto next; c = getc (input_FILE); if (scan_VHIL && ISSPACE (c)) { while (ISVBORING (c)) c = getc (input_FILE); new_line = 1; goto top; } while (ISBORING (c)) c = getc (input_FILE); if (!ISID1ST (c)) goto next; id = input_buffer; *id++ = c; while (ISIDREST (c = getc (input_FILE))) *id++ = c; *id = '\0'; if (strequ (input_buffer, "include")) { while (c == ' ' || c == '\t') c = getc (input_FILE); if (c == '\n') { new_line = 1; goto top; } id = input_buffer; if (c == '"') { c = getc (input_FILE); while (c != '\n' && c != EOF && c != '"') { *id++ = c; c = getc (input_FILE); } *flags = TOK_STRING; } else if (c == '<') { c = getc (input_FILE); while (c != '\n' && c != EOF && c != '>') { *id++ = c; c = getc (input_FILE); } *flags = TOK_STRING; } else if (ISID1ST (c)) { *id++ = c; while (ISIDREST (c = getc (input_FILE))) *id++ = c; *flags = TOK_NAME; } else { while (c != '\n' && c != EOF) c = getc (input_FILE); new_line = 1; goto top; } while (c != '\n' && c != EOF) c = getc (input_FILE); new_line = 1; *id = '\0'; return input_buffer; } if (strnequ (input_buffer, "if", 2) || strequ (input_buffer, "define") || strequ (input_buffer, "elif") /* ansi C */ || (scan_VHIL && strequ (input_buffer, "elsif")) || strequ (input_buffer, "undef")) goto next; while ((c != '\n') && (c != EOF)) c = getc (input_FILE); new_line = 1; goto top; } next: while (ISBORING (c)) c = getc (input_FILE); switch (c) { case '"': id = input_buffer; *id++ = c = getc (input_FILE); for (;;) { while (ISQ2BORING (c)) *id++ = c = getc (input_FILE); if (c == '\\') { *id++ = c = getc (input_FILE); continue; } else if (c != '"') goto next; break; } *--id = '\0'; id = input_buffer; while (ISSTRKEEP (*id)) id++; if (*id || id == input_buffer) { c = getc (input_FILE); goto next; } *flags = TOK_STRING; if (eat_underscore && input_buffer[0] == '_' && input_buffer[1]) return &input_buffer[1]; else return input_buffer; case '\'': c = getc (input_FILE); for (;;) { while (ISQ1BORING (c)) c = getc (input_FILE); if (c == '\\') { c = getc (input_FILE); continue; } else if (c == '\'') c = getc (input_FILE); goto next; } case '/': c = getc (input_FILE); if (c == '/') { /* Cope with C++ comment */ while (ISVBORING (c)) c = getc (input_FILE); new_line = 1; goto top; } else if (c != '*') goto next; c = getc (input_FILE); for (;;) { while (ISCBORING (c)) c = getc (input_FILE); c = getc (input_FILE); if (c == '/') { c = getc (input_FILE); goto next; } else if (ISEOF (c)) { new_line = 1; return NULL; } } case '\n': new_line = 1; goto top; case '#': if (!scan_VHIL) { /* Auto-recognize vhil when find a # in the middle of a line. */ set_args_c ("vhil", '+', "v"); } c = getc (input_FILE); while (ISVBORING (c)) c = getc (input_FILE); new_line = 1; goto top; default: if (ISEOF (c)) { new_line = 1; return NULL; } id = input_buffer; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (input_FILE))) *id++ = c; } else if (ISDIGIT (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (input_FILE))) *id++ = c; } else fprintf (stderr, "junk: `\\%3o'", c); ungetc (c, input_FILE); *id = '\0'; *flags |= TOK_LITERAL; return input_buffer; } } static void set_ctype_c (char const *chars, int type) { short *rct = &ctype_c[1]; while (*chars) rct[*chars++] |= type; } static void clear_ctype_c (char const *chars, int type) { short *rct = &ctype_c[1]; while (*chars) rct[*chars++] &= ~type; } static void usage_c (char const *lang_name) { fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name); exit (1); } static char document_c[] = "\ The C scanner arguments take the form -Sc, where \n\ is one of the following: ( denotes one or more characters)\n\ (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\ (+|-)s . . Allow in string ids, and (keep|ignore) those ids.\n\ -v . . . . . . Skip vhil comments."; static void set_args_c (char const *lang_name, int op, char const *arg) { if (op == '?') { puts (document_c); return; } switch (*arg++) { case 'u': eat_underscore = (op == '+'); break; case 's': if (op == '+') set_ctype_c (arg, SK); else clear_ctype_c (arg, SK); break; case 'v': set_ctype_c ("$", I1); set_ctype_c ("#", VH); set_ctype_c (" \t", WS); scan_VHIL = 1; break; default: if (lang_name) usage_c (lang_name); break; } } #undef I1 #undef DG #undef NM #undef C1 #undef C2 #undef Q1 #undef Q2 #undef ES #undef NL #undef EF #undef SK #undef VH #undef WS #undef ISDIGIT #undef ISNUMBER #undef ISEOF #undef ISID1ST #undef ISIDREST #undef ISSTRKEEP #undef ISSPACE #undef ISBORING #undef ISCBORING #undef ISVBORING #undef ISQ1BORING #undef ISQ2BORING /*************** Assembly ***************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define NL 0x04 /* newline: \n */ #define CM 0x08 /* assembler comment char: usually # or | */ #define IG 0x10 /* ignore `identifiers' with these chars in them */ #define C1 0x20 /* C comment introduction char: / */ #define C2 0x40 /* C comment termination char: * */ #define EF 0x80 /* EOF */ /* Assembly Language character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISCOMMENT(c) ((rct)[c] & (CM)) #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) #define ISCBORING(c) (!((rct)[c] & (EF|NL))) #define ISCCBORING(c) (!((rct)[c] & (EF|C2))) #define ISIGNORE(c) ((rct)[c] & (IG)) static char ctype_asm[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, 0, /*050*/ 0, 0, C2, 0, 0, 0, 0, C1, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, }; static int cpp_on_asm = 1; /* Grab the next identifier the assembly language source file opened with the handle `input_FILE'. This state machine is built for speed, not elegance. */ static char const * get_token_asm (FILE *input_FILE, int *flags) { static char input_buffer[BUFSIZ]; char *rct = &ctype_asm[1]; int c; char *id = input_buffer; static int new_line = 1; top: c = getc (input_FILE); if (cpp_on_asm > 0 && new_line) { new_line = 0; if (c != '#') goto next; while (ISBORING (c)) c = getc (input_FILE); if (!ISID1ST (c)) goto next; id = input_buffer; *id++ = c; while (ISIDREST (c = getc (input_FILE))) *id++ = c; *id = '\0'; if (strequ (input_buffer, "include")) { while (c != '"' && c != '<') c = getc (input_FILE); id = input_buffer; *id++ = c = getc (input_FILE); while ((c = getc (input_FILE)) != '"' && c != '>') *id++ = c; *id = '\0'; *flags = TOK_STRING; return input_buffer; } if (strnequ (input_buffer, "if", 2) || strequ (input_buffer, "define") || strequ (input_buffer, "undef")) goto next; while (c != '\n') c = getc (input_FILE); new_line = 1; goto top; } next: while (ISBORING (c)) c = getc (input_FILE); if (ISCOMMENT (c)) { while (ISCBORING (c)) c = getc (input_FILE); new_line = 1; } if (ISEOF (c)) { new_line = 1; return NULL; } if (c == '\n') { new_line = 1; goto top; } if (c == '/') { if ((c = getc (input_FILE)) != '*') goto next; c = getc (input_FILE); for (;;) { while (ISCCBORING (c)) c = getc (input_FILE); c = getc (input_FILE); if (c == '/') { c = getc (input_FILE); break; } else if (ISEOF (c)) { new_line = 1; return NULL; } } goto next; } id = input_buffer; if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE))) { ungetc (c, input_FILE); return "_"; } *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (input_FILE))) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (input_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, "junk: `%c'", c); else fprintf (stderr, "junk: `\\%03o'", c); goto next; } *id = '\0'; for (id = input_buffer; *id; id++) if (ISIGNORE (*id)) goto next; ungetc (c, input_FILE); *flags |= TOK_LITERAL; return input_buffer; } static void set_ctype_asm (char const *chars, int type) { char *rct = &ctype_asm[1]; while (*chars) rct[*chars++] |= type; } static void clear_ctype_asm (char const *chars, int type) { char *rct = &ctype_asm[1]; while (*chars) rct[*chars++] &= ~type; } static void usage_asm (char const *lang_name) { fprintf (stderr, "Usage: %s -S%s([-c] [-u] [(+|-)a] [(+|-)p] [(+|-)C])\n", program_name, lang_name); exit (1); } static char document_asm[] = "\ The Assembler scanner arguments take the form -Sasm, where\n\ is one of the following: ( denotes one or more characters)\n\ -c . . . . introduce(s) a comment until end-of-line.\n\ (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\ (+|-)a . . Allow in ids, and (keep|ignore) those ids.\n\ (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\ (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)"; static void set_args_asm (char const *lang_name, int op, char const *arg) { if (op == '?') { puts (document_asm); return; } switch (*arg++) { case 'a': set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0)); break; case 'c': set_ctype_asm (arg, CM); break; case 'u': eat_underscore = (op == '+'); break; case 'p': cpp_on_asm = (op == '+'); break; case 'C': if (op == '+') { set_ctype_asm ("/", C1); set_ctype_asm ("*", C2); } else { clear_ctype_asm ("/", C1); clear_ctype_asm ("*", C2); } break; default: if (lang_name) usage_asm (lang_name); break; } } #undef I1 #undef NM #undef NL #undef CM #undef IG #undef C1 #undef C2 #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISCOMMENT #undef ISBORING #undef ISCBORING #undef ISCCBORING #undef ISIGNORE /*************** Text *******************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define SQ 0x04 /* squeeze these out (.,',-) */ #define EF 0x80 /* EOF */ /* Text character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) #define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) static char ctype_text[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, 0, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, SQ, /*050*/ 0, 0, 0, 0, 0, SQ, SQ, 0, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, }; /* Grab the next identifier the text source file opened with the handle `input_FILE'. This state machine is built for speed, not elegance. */ static char const * get_token_text (FILE *input_FILE, int *flags) { static char input_buffer[BUFSIZ]; char *rct = &ctype_text[1]; int c; char *id = input_buffer; top: c = getc (input_FILE); while (ISBORING (c)) c = getc (input_FILE); if (ISEOF (c)) return NULL; id = input_buffer; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (input_FILE))) if (!ISIDSQUEEZE (c)) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (input_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, "junk: `%c'", c); else fprintf (stderr, "junk: `\\%03o'", c); goto top; } *id = '\0'; ungetc (c, input_FILE); *flags |= TOK_LITERAL; return input_buffer; } static void set_ctype_text (char const *chars, int type) { char *rct = &ctype_text[1]; while (*chars) rct[*chars++] |= type; } static void clear_ctype_text (char const *chars, int type) { char *rct = &ctype_text[1]; while (*chars) rct[*chars++] &= ~type; } static void usage_text (char const *lang_name) { fprintf (stderr, "Usage: %s -S%s([(+|-)a] [(+|-)s]\n", program_name, lang_name); exit (1); } static char document_text[] = "\ The Text scanner arguments take the form -Stext, where\n\ is one of the following: ( denotes one or more characters)\n\ (+|-)a . . Include (or exculde) in ids.\n\ (+|-)s . . Squeeze (or don't squeeze) out of ids."; static void set_args_text (char const *lang_name, int op, char const *arg) { if (op == '?') { puts (document_text); return; } switch (*arg++) { case 'a': if (op == '+') set_ctype_text (arg, I1); else clear_ctype_text (arg, I1); break; case 's': if (op == '+') set_ctype_text (arg, SQ); else clear_ctype_text (arg, SQ); break; default: if (lang_name) usage_text (lang_name); break; } } #undef I1 #undef NM #undef SQ #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISBORING #undef ISIDSQUEEZE