summaryrefslogtreecommitdiffstats
path: root/scanners.c
diff options
context:
space:
mode:
Diffstat (limited to 'scanners.c')
-rw-r--r--scanners.c1209
1 files changed, 1209 insertions, 0 deletions
diff --git a/scanners.c b/scanners.c
new file mode 100644
index 0000000..9020a1c
--- /dev/null
+++ b/scanners.c
@@ -0,0 +1,1209 @@
+/* scanners.c -- file & directory name manipulations
+ Copyright (C) 1986, 1995 Greg McGary
+ VHIL portions Copyright (C) 1988 Tom Horsley
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "strxtra.h"
+#include "token.h"
+#include "alloc.h"
+#include "scanners.h"
+
+extern char const *program_name;
+
+static char const *get_token_VHIL (FILE *input_FILE, int *flags);
+static char const *get_token_c (FILE *input_FILE, int *flags);
+static void set_args_c (char const *lang_name, int op, char const *arg);
+static void set_ctype_c (char const *chars, int type);
+static void clear_ctype_c (char const *chars, int type);
+static void usage_c (char const *lang_name);
+
+static char const *get_token_asm (FILE *input_FILE, int *flags);
+static void set_ctype_asm (char const *chars, int type);
+static void clear_ctype_asm (char const *chars, int type);
+static void usage_asm (char const *lang_name);
+static void set_args_asm (char const *lang_name, int op, char const *arg);
+
+static char const *get_token_text (FILE *input_FILE, int *flags);
+static void set_ctype_text (char const *chars, int type);
+static void clear_ctype_text (char const *chars, int type);
+static void usage_text (char const *lang_name);
+static void set_args_text (char const *lang_name, int op, char const *arg);
+
+/****************************************************************************/
+
+struct language
+{
+ char const *lang_name;
+ char const *(*lang_get_token) (FILE *input_FILE, int *flags);
+ void (*lang_set_args) (char const *lang_name, int op, char const *arg);
+ char const *lang_filter;
+ struct language *lang_next;
+};
+
+struct suffix
+{
+ char const *suff_suffix;
+ char const *suff_lang_name;
+ struct language *suff_language;
+ struct suffix *suff_next;
+};
+
+static struct suffix *get_suffix_entry (char const *suffix);
+static struct language *get_lang_entry (char const *lang_name);
+static void usage_scan (void);
+
+struct language languages[] =
+{
+ /* must be sorted for bsearch(3) */
+ { "C", get_token_c, set_args_c, NULL },
+ { "TeX", get_token_text, set_args_text, NULL },
+ { "VHIL", get_token_VHIL, set_args_c, NULL },
+ { "asm", get_token_asm, set_args_asm, NULL },
+/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/
+ { "gzip", NULL, NULL, "zcat %s" },
+ { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" },
+ { "text", get_token_text, set_args_text, NULL },
+};
+
+/*
+ This is a rather incomplete list of default associations
+ between suffixes and languages. You may add more to the
+ default list, or you may define them dynamically with the
+ `-S<suff>=<lang>' argument to mkid(1) and idx(1). e.g. to
+ associate a `.ada' suffix with the Ada language, use
+ `-S.ada=ada'
+*/
+struct suffix suffixes[] =
+{
+ { "", "text" },
+ { ".1", "roff" },
+ { ".2", "roff" },
+ { ".3", "roff" },
+ { ".4", "roff" },
+ { ".5", "roff" },
+ { ".6", "roff" },
+ { ".7", "roff" },
+ { ".8", "roff" },
+ { ".C", "C" },
+ { ".H", "C" },
+ { ".Z", "gzip" },
+ { ".c", "C" },
+ { ".cc", "C" },
+ { ".cpp", "C" },
+ { ".cxx", "C" },
+ { ".doc", "text" },
+/*{ ".el", "elisp" },*/
+ { ".gz", "gzip" },
+ { ".h", "C" },
+ { ".hh", "C" },
+ { ".hpp", "C" },
+ { ".hxx", "C" },
+ { ".l", "C" },
+ { ".lex", "C" },
+ { ".ltx", "TeX" },
+ { ".p", "pas" },
+ { ".pas", "pas" },
+ { ".s", "asm" },
+ { ".S", "asm" },
+ { ".tex", "TeX" },
+ { ".x", "VHIL" },
+ { ".y", "C" },
+ { ".yacc", "C" },
+ { ".z", "gzip" },
+};
+
+void
+init_scanners (void)
+{
+ struct language *lang;
+ struct language *lang_N = &languages[(sizeof(languages)/sizeof(languages[0])) - 1];
+ struct suffix *suff;
+ struct suffix *suff_N = &suffixes[(sizeof(suffixes)/sizeof(suffixes[0])) - 1];
+
+ for (lang = languages; lang <= lang_N; ++lang)
+ lang->lang_next = lang + 1;
+ lang_N->lang_next = NULL;
+
+ for (suff = suffixes; suff <= suff_N; ++suff) {
+ lang = get_lang_entry (suff->suff_lang_name);
+ if (lang)
+ suff->suff_language = lang;
+ suff->suff_next = suff + 1;
+ }
+ suff_N->suff_next = NULL;
+}
+
+/* Return a suffix table entry for the given suffix. */
+static struct suffix *
+get_suffix_entry (char const *suffix)
+{
+ struct suffix *stp;
+
+ if (suffix == NULL)
+ suffix = "";
+
+ for (stp = suffixes; stp; stp = stp->suff_next)
+ if (strequ (stp->suff_suffix, suffix))
+ return stp;
+ return stp;
+}
+
+static struct language *
+get_lang_entry (char const *lang_name)
+{
+ struct language *ltp;
+
+ if (lang_name == NULL)
+ lang_name = "";
+
+ for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next)
+ if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name))
+ return ltp;
+ return ltp;
+}
+
+char const *
+get_lang_name (char const *suffix)
+{
+ struct suffix *stp;
+
+ stp = get_suffix_entry (suffix);
+ if (stp->suff_next == NULL)
+ return NULL;
+ return stp->suff_language->lang_name;
+}
+
+char const *
+get_filter (char const *suffix)
+{
+ struct suffix *stp;
+
+ stp = get_suffix_entry (suffix);
+ if (stp->suff_next == NULL)
+ return NULL;
+ return stp->suff_language->lang_filter;
+}
+
+char const *(*
+get_scanner (char const *lang)
+ ) (FILE *input_FILE, int *flags)
+{
+ struct language *ltp;
+
+ ltp = get_lang_entry (lang);
+ if (ltp->lang_next == NULL)
+ return NULL;
+ return ltp->lang_get_token;
+}
+
+void
+set_scan_args (int op, char *arg)
+{
+ struct language *ltp, *ltp2;
+ struct suffix *stp;
+ char *lhs;
+ char *lhs2;
+ int count = 0;
+
+ lhs = arg;
+ while (isalnum (*arg) || *arg == '.')
+ arg++;
+
+ if (strequ (lhs, "?=?"))
+ {
+ for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
+ {
+ printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name);
+ if (stp->suff_language->lang_filter)
+ printf (" (%s)", stp->suff_language->lang_filter);
+ }
+ if (count)
+ putchar ('\n');
+ return;
+ }
+
+ if (strnequ (lhs, "?=", 2))
+ {
+ lhs += 2;
+ ltp = get_lang_entry (lhs);
+ if (ltp->lang_next == NULL)
+ {
+ printf ("No scanner for language `%s'\n", lhs);
+ return;
+ }
+ for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
+ if (stp->suff_language == ltp)
+ {
+ printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name);
+ if (stp->suff_language->lang_filter)
+ printf (" (%s)", stp->suff_language->lang_filter);
+ }
+ if (count)
+ putchar ('\n');
+ return;
+ }
+
+ if (strequ (arg, "=?"))
+ {
+ lhs[strlen (lhs) - 2] = '\0';
+ stp = get_suffix_entry (lhs);
+ if (stp->suff_next == NULL)
+ {
+ printf ("No scanner assigned to suffix `%s'\n", lhs);
+ return;
+ }
+ printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name);
+ if (stp->suff_language->lang_filter)
+ printf (" (%s)", stp->suff_language->lang_filter);
+ printf ("\n");
+ return;
+ }
+
+ if (*arg == '=')
+ {
+ *arg++ = '\0';
+
+ ltp = get_lang_entry (arg);
+ if (ltp->lang_next == NULL)
+ {
+ fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg);
+ return;
+ }
+ stp = get_suffix_entry (lhs);
+ if (stp->suff_next == NULL)
+ {
+ stp->suff_suffix = lhs;
+ stp->suff_language = ltp;
+ stp->suff_next = CALLOC (struct suffix, 1);
+ }
+ else if (!strequ (arg, stp->suff_language->lang_name))
+ {
+ fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name);
+ stp->suff_language = ltp;
+ }
+ return;
+ }
+ else if (*arg == '/')
+ {
+ *arg++ = '\0';
+ ltp = get_lang_entry (lhs);
+ if (ltp->lang_next == NULL)
+ {
+ ltp->lang_name = lhs;
+ ltp->lang_get_token = get_token_text;
+ ltp->lang_set_args = set_args_text;
+ ltp->lang_filter = NULL;
+ ltp->lang_next = CALLOC (struct language, 1);
+ }
+ lhs2 = arg;
+ arg = strchr (arg, '/');
+ if (arg == NULL)
+ ltp2 = ltp;
+ else
+ {
+ *arg++ = '\0';
+ ltp2 = get_lang_entry (lhs2);
+ if (ltp2->lang_next == NULL)
+ {
+ fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2);
+ ltp2 = ltp;
+ }
+ }
+ ltp->lang_get_token = ltp2->lang_get_token;
+ ltp->lang_set_args = ltp2->lang_set_args;
+ if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter)))
+ fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter);
+ ltp->lang_filter = arg;
+ return;
+ }
+
+ if (op == '+')
+ {
+ switch (op = *arg++)
+ {
+ case '+':
+ case '-':
+ case '?':
+ break;
+ default:
+ usage_scan ();
+ }
+ for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next)
+ (*ltp->lang_set_args) (NULL, op, arg);
+ return;
+ }
+
+ if (*arg == '-' || *arg == '+' || *arg == '?')
+ {
+ op = *arg;
+ *arg++ = '\0';
+
+ ltp = get_lang_entry (lhs);
+ if (ltp->lang_next == NULL)
+ {
+ fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs);
+ return;
+ }
+ (*ltp->lang_set_args) (lhs, op, arg);
+ return;
+ }
+
+ usage_scan ();
+}
+
+static void
+usage_scan (void)
+{
+ fprintf (stderr, "Usage: %s [-S<suffix>=<lang>] [+S(+|-)<arg>] [-S<lang>(+|-)<arg>] [-S<lang>/<lang>/<filter>]\n", program_name);
+ exit (1);
+}
+
+/*************** C & C++ ****************************************************/
+
+#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
+#define DG 0x0002 /* decimal digit [0-9] */
+#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */
+#define C1 0x0008 /* C comment introduction char: / */
+#define C2 0x0010 /* C comment termination char: * */
+#define Q1 0x0020 /* single quote: ' */
+#define Q2 0x0040 /* double quote: " */
+#define ES 0x0080 /* escape char: \ */
+#define NL 0x0100 /* newline: \n */
+#define EF 0x0200 /* EOF */
+#define SK 0x0400 /* Make these chars valid for names within strings */
+#define VH 0x0800 /* VHIL comment introduction char: # */
+#define WS 0x1000 /* White space characters */
+
+/*
+ character class membership macros:
+*/
+#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
+#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
+#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
+#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
+#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
+#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
+#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
+/*
+ The `BORING' classes should be skipped over
+ until something interesting comes along...
+*/
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */
+#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */
+#define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */
+#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */
+#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
+
+static short ctype_c[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
+/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+};
+
+static int eat_underscore = 1;
+static int scan_VHIL = 0;
+
+static char const *
+get_token_VHIL (FILE *input_FILE, int *flags)
+{
+ if (!scan_VHIL)
+ set_args_c ("vhil", '+', "v");
+ return get_token_c (input_FILE, flags);
+}
+
+/*
+ Grab the next identifier the C source
+ file opened with the handle `input_FILE'.
+ This state machine is built for speed, not elegance.
+*/
+static char const *
+get_token_c (FILE *input_FILE, int *flags)
+{
+ static char input_buffer[BUFSIZ];
+ static int new_line = 1;
+ short *rct = &ctype_c[1];
+ int c;
+ char *id = input_buffer;
+
+top:
+ c = getc (input_FILE);
+ if (new_line)
+ {
+ new_line = 0;
+ if (c == '.')
+ {
+ /* Auto-recognize vhil code when you see a '.' in column 1.
+ also ignore lines that start with a '.' */
+ if (!scan_VHIL)
+ set_args_c ("vhil", '+', "v");
+ while (ISVBORING (c))
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+ if (c != '#')
+ goto next;
+ c = getc (input_FILE);
+ if (scan_VHIL && ISSPACE (c))
+ {
+ while (ISVBORING (c))
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+ while (ISBORING (c))
+ c = getc (input_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = input_buffer;
+ *id++ = c;
+ while (ISIDREST (c = getc (input_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (input_buffer, "include"))
+ {
+ while (c == ' ' || c == '\t')
+ c = getc (input_FILE);
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+ id = input_buffer;
+ if (c == '"')
+ {
+ c = getc (input_FILE);
+ while (c != '\n' && c != EOF && c != '"')
+ {
+ *id++ = c;
+ c = getc (input_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (c == '<')
+ {
+ c = getc (input_FILE);
+ while (c != '\n' && c != EOF && c != '>')
+ {
+ *id++ = c;
+ c = getc (input_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (ISID1ST (c))
+ {
+ *id++ = c;
+ while (ISIDREST (c = getc (input_FILE)))
+ *id++ = c;
+ *flags = TOK_NAME;
+ }
+ else
+ {
+ while (c != '\n' && c != EOF)
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+ while (c != '\n' && c != EOF)
+ c = getc (input_FILE);
+ new_line = 1;
+ *id = '\0';
+ return input_buffer;
+ }
+ if (strnequ (input_buffer, "if", 2)
+ || strequ (input_buffer, "define")
+ || strequ (input_buffer, "elif") /* ansi C */
+ || (scan_VHIL && strequ (input_buffer, "elsif"))
+ || strequ (input_buffer, "undef"))
+ goto next;
+ while ((c != '\n') && (c != EOF))
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (input_FILE);
+
+ switch (c)
+ {
+ case '"':
+ id = input_buffer;
+ *id++ = c = getc (input_FILE);
+ for (;;)
+ {
+ while (ISQ2BORING (c))
+ *id++ = c = getc (input_FILE);
+ if (c == '\\')
+ {
+ *id++ = c = getc (input_FILE);
+ continue;
+ }
+ else if (c != '"')
+ goto next;
+ break;
+ }
+ *--id = '\0';
+ id = input_buffer;
+ while (ISSTRKEEP (*id))
+ id++;
+ if (*id || id == input_buffer)
+ {
+ c = getc (input_FILE);
+ goto next;
+ }
+ *flags = TOK_STRING;
+ if (eat_underscore && input_buffer[0] == '_' && input_buffer[1])
+ return &input_buffer[1];
+ else
+ return input_buffer;
+
+ case '\'':
+ c = getc (input_FILE);
+ for (;;)
+ {
+ while (ISQ1BORING (c))
+ c = getc (input_FILE);
+ if (c == '\\')
+ {
+ c = getc (input_FILE);
+ continue;
+ }
+ else if (c == '\'')
+ c = getc (input_FILE);
+ goto next;
+ }
+
+ case '/':
+ c = getc (input_FILE);
+ if (c == '/')
+ { /* Cope with C++ comment */
+ while (ISVBORING (c))
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+ else if (c != '*')
+ goto next;
+ c = getc (input_FILE);
+ for (;;)
+ {
+ while (ISCBORING (c))
+ c = getc (input_FILE);
+ c = getc (input_FILE);
+ if (c == '/')
+ {
+ c = getc (input_FILE);
+ goto next;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ return NULL;
+ }
+ }
+
+ case '\n':
+ new_line = 1;
+ goto top;
+
+ case '#':
+ if (!scan_VHIL)
+ {
+ /* Auto-recognize vhil when find a # in the middle of a line. */
+ set_args_c ("vhil", '+', "v");
+ }
+ c = getc (input_FILE);
+ while (ISVBORING (c))
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ default:
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ return NULL;
+ }
+ id = input_buffer;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (input_FILE)))
+ *id++ = c;
+ }
+ else if (ISDIGIT (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (input_FILE)))
+ *id++ = c;
+ }
+ else
+ fprintf (stderr, "junk: `\\%3o'", c);
+ ungetc (c, input_FILE);
+ *id = '\0';
+ *flags |= TOK_LITERAL;
+ return input_buffer;
+ }
+}
+
+static void
+set_ctype_c (char const *chars, int type)
+{
+ short *rct = &ctype_c[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_c (char const *chars, int type)
+{
+ short *rct = &ctype_c[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+static void
+usage_c (char const *lang_name)
+{
+ fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name);
+ exit (1);
+}
+
+static char document_c[] = "\
+The C scanner arguments take the form -Sc<arg>, where <arg>\n\
+is one of the following: (<cc> denotes one or more characters)\n\
+ (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\
+ (+|-)s<cc> . . Allow <cc> in string ids, and (keep|ignore) those ids.\n\
+ -v . . . . . . Skip vhil comments.";
+
+static void
+set_args_c (char const *lang_name, int op, char const *arg)
+{
+ if (op == '?')
+ {
+ puts (document_c);
+ return;
+ }
+ switch (*arg++)
+ {
+ case 'u':
+ eat_underscore = (op == '+');
+ break;
+ case 's':
+ if (op == '+')
+ set_ctype_c (arg, SK);
+ else
+ clear_ctype_c (arg, SK);
+ break;
+ case 'v':
+ set_ctype_c ("$", I1);
+ set_ctype_c ("#", VH);
+ set_ctype_c (" \t", WS);
+ scan_VHIL = 1;
+ break;
+ default:
+ if (lang_name)
+ usage_c (lang_name);
+ break;
+ }
+}
+
+#undef I1
+#undef DG
+#undef NM
+#undef C1
+#undef C2
+#undef Q1
+#undef Q2
+#undef ES
+#undef NL
+#undef EF
+#undef SK
+#undef VH
+#undef WS
+#undef ISDIGIT
+#undef ISNUMBER
+#undef ISEOF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISSTRKEEP
+#undef ISSPACE
+#undef ISBORING
+#undef ISCBORING
+#undef ISVBORING
+#undef ISQ1BORING
+#undef ISQ2BORING
+
+/*************** Assembly ***************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define NL 0x04 /* newline: \n */
+#define CM 0x08 /* assembler comment char: usually # or | */
+#define IG 0x10 /* ignore `identifiers' with these chars in them */
+#define C1 0x20 /* C comment introduction char: / */
+#define C2 0x40 /* C comment termination char: * */
+#define EF 0x80 /* EOF */
+
+/* Assembly Language character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISCOMMENT(c) ((rct)[c] & (CM))
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
+#define ISCBORING(c) (!((rct)[c] & (EF|NL)))
+#define ISCCBORING(c) (!((rct)[c] & (EF|C2)))
+#define ISIGNORE(c) ((rct)[c] & (IG))
+
+static char ctype_asm[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+
+};
+
+static int cpp_on_asm = 1;
+
+/*
+ Grab the next identifier the assembly language
+ source file opened with the handle `input_FILE'.
+ This state machine is built for speed, not elegance.
+*/
+static char const *
+get_token_asm (FILE *input_FILE, int *flags)
+{
+ static char input_buffer[BUFSIZ];
+ char *rct = &ctype_asm[1];
+ int c;
+ char *id = input_buffer;
+ static int new_line = 1;
+
+top:
+ c = getc (input_FILE);
+ if (cpp_on_asm > 0 && new_line)
+ {
+ new_line = 0;
+ if (c != '#')
+ goto next;
+ while (ISBORING (c))
+ c = getc (input_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = input_buffer;
+ *id++ = c;
+ while (ISIDREST (c = getc (input_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (input_buffer, "include"))
+ {
+ while (c != '"' && c != '<')
+ c = getc (input_FILE);
+ id = input_buffer;
+ *id++ = c = getc (input_FILE);
+ while ((c = getc (input_FILE)) != '"' && c != '>')
+ *id++ = c;
+ *id = '\0';
+ *flags = TOK_STRING;
+ return input_buffer;
+ }
+ if (strnequ (input_buffer, "if", 2)
+ || strequ (input_buffer, "define")
+ || strequ (input_buffer, "undef"))
+ goto next;
+ while (c != '\n')
+ c = getc (input_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (input_FILE);
+
+ if (ISCOMMENT (c))
+ {
+ while (ISCBORING (c))
+ c = getc (input_FILE);
+ new_line = 1;
+ }
+
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ return NULL;
+ }
+
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+
+ if (c == '/')
+ {
+ if ((c = getc (input_FILE)) != '*')
+ goto next;
+ c = getc (input_FILE);
+ for (;;)
+ {
+ while (ISCCBORING (c))
+ c = getc (input_FILE);
+ c = getc (input_FILE);
+ if (c == '/')
+ {
+ c = getc (input_FILE);
+ break;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ return NULL;
+ }
+ }
+ goto next;
+ }
+
+ id = input_buffer;
+ if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE)))
+ {
+ ungetc (c, input_FILE);
+ return "_";
+ }
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (input_FILE)))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (input_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, "junk: `%c'", c);
+ else
+ fprintf (stderr, "junk: `\\%03o'", c);
+ goto next;
+ }
+
+ *id = '\0';
+ for (id = input_buffer; *id; id++)
+ if (ISIGNORE (*id))
+ goto next;
+ ungetc (c, input_FILE);
+ *flags |= TOK_LITERAL;
+ return input_buffer;
+}
+
+static void
+set_ctype_asm (char const *chars, int type)
+{
+ char *rct = &ctype_asm[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_asm (char const *chars, int type)
+{
+ char *rct = &ctype_asm[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+static void
+usage_asm (char const *lang_name)
+{
+ fprintf (stderr, "Usage: %s -S%s([-c<cc>] [-u] [(+|-)a<cc>] [(+|-)p] [(+|-)C])\n", program_name, lang_name);
+ exit (1);
+}
+
+static char document_asm[] = "\
+The Assembler scanner arguments take the form -Sasm<arg>, where\n\
+<arg> is one of the following: (<cc> denotes one or more characters)\n\
+ -c<cc> . . . . <cc> introduce(s) a comment until end-of-line.\n\
+ (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\
+ (+|-)a<cc> . . Allow <cc> in ids, and (keep|ignore) those ids.\n\
+ (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\
+ (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)";
+
+static void
+set_args_asm (char const *lang_name, int op, char const *arg)
+{
+ if (op == '?')
+ {
+ puts (document_asm);
+ return;
+ }
+ switch (*arg++)
+ {
+ case 'a':
+ set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0));
+ break;
+ case 'c':
+ set_ctype_asm (arg, CM);
+ break;
+ case 'u':
+ eat_underscore = (op == '+');
+ break;
+ case 'p':
+ cpp_on_asm = (op == '+');
+ break;
+ case 'C':
+ if (op == '+')
+ {
+ set_ctype_asm ("/", C1);
+ set_ctype_asm ("*", C2);
+ }
+ else
+ {
+ clear_ctype_asm ("/", C1);
+ clear_ctype_asm ("*", C2);
+ }
+ break;
+ default:
+ if (lang_name)
+ usage_asm (lang_name);
+ break;
+ }
+}
+
+#undef I1
+#undef NM
+#undef NL
+#undef CM
+#undef IG
+#undef C1
+#undef C2
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISCOMMENT
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISIGNORE
+
+/*************** Text *******************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define SQ 0x04 /* squeeze these out (.,',-) */
+#define EF 0x80 /* EOF */
+
+/* Text character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISBORING(c) (!((rct)[c] & (I1|NM|EF)))
+#define ISIDSQUEEZE(c) ((rct)[c] & (SQ))
+
+static char ctype_text[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, SQ,
+/*050*/ 0, 0, 0, 0, 0, SQ, SQ, 0,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+};
+
+/*
+ Grab the next identifier the text source file opened with the
+ handle `input_FILE'. This state machine is built for speed, not
+ elegance.
+*/
+static char const *
+get_token_text (FILE *input_FILE, int *flags)
+{
+ static char input_buffer[BUFSIZ];
+ char *rct = &ctype_text[1];
+ int c;
+ char *id = input_buffer;
+
+top:
+ c = getc (input_FILE);
+ while (ISBORING (c))
+ c = getc (input_FILE);
+ if (ISEOF (c))
+ return NULL;
+ id = input_buffer;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (input_FILE)))
+ if (!ISIDSQUEEZE (c))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (input_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, "junk: `%c'", c);
+ else
+ fprintf (stderr, "junk: `\\%03o'", c);
+ goto top;
+ }
+
+ *id = '\0';
+ ungetc (c, input_FILE);
+ *flags |= TOK_LITERAL;
+ return input_buffer;
+}
+
+static void
+set_ctype_text (char const *chars, int type)
+{
+ char *rct = &ctype_text[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_text (char const *chars, int type)
+{
+ char *rct = &ctype_text[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+static void
+usage_text (char const *lang_name)
+{
+ fprintf (stderr, "Usage: %s -S%s([(+|-)a<cc>] [(+|-)s<cc>]\n", program_name, lang_name);
+ exit (1);
+}
+
+static char document_text[] = "\
+The Text scanner arguments take the form -Stext<arg>, where\n\
+<arg> is one of the following: (<cc> denotes one or more characters)\n\
+ (+|-)a<cc> . . Include (or exculde) <cc> in ids.\n\
+ (+|-)s<cc> . . Squeeze (or don't squeeze) <cc> out of ids.";
+
+static void
+set_args_text (char const *lang_name, int op, char const *arg)
+{
+ if (op == '?')
+ {
+ puts (document_text);
+ return;
+ }
+ switch (*arg++)
+ {
+ case 'a':
+ if (op == '+')
+ set_ctype_text (arg, I1);
+ else
+ clear_ctype_text (arg, I1);
+ break;
+ case 's':
+ if (op == '+')
+ set_ctype_text (arg, SQ);
+ else
+ clear_ctype_text (arg, SQ);
+ break;
+ default:
+ if (lang_name)
+ usage_text (lang_name);
+ break;
+ }
+}
+
+#undef I1
+#undef NM
+#undef SQ
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISBORING
+#undef ISIDSQUEEZE