1 files changed, 1209 insertions, 0 deletions
diff --git a/scanners.c b/scanners.c
new file mode 100644
index 0000000..9020a1c
--- /dev/null
+++ b/scanners.c
@@ -0,0 +1,1209 @@
+/* scanners.c -- file & directory name manipulations
+   Copyright (C) 1986, 1995 Greg McGary
+   VHIL portions Copyright (C) 1988 Tom Horsley
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+  
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to the
+   Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "strxtra.h"
+#include "token.h"
+#include "alloc.h"
+#include "scanners.h"
+
+extern char const *program_name;
+
+static char const *get_token_VHIL (FILE *input_FILE, int *flags);
+static char const *get_token_c (FILE *input_FILE, int *flags);
+static void set_args_c (char const *lang_name, int op, char const *arg);
+static void set_ctype_c (char const *chars, int type);
+static void clear_ctype_c (char const *chars, int type);
+static void usage_c (char const *lang_name);
+
+static char const *get_token_asm (FILE *input_FILE, int *flags);
+static void set_ctype_asm (char const *chars, int type);
+static void clear_ctype_asm (char const *chars, int type);
+static void usage_asm (char const *lang_name);
+static void set_args_asm (char const *lang_name, int op, char const *arg);
+
+static char const *get_token_text (FILE *input_FILE, int *flags);
+static void set_ctype_text (char const *chars, int type);
+static void clear_ctype_text (char const *chars, int type);
+static void usage_text (char const *lang_name);
+static void set_args_text (char const *lang_name, int op, char const *arg);
+
+/****************************************************************************/
+
+struct language
+{
+  char const *lang_name;
+  char const *(*lang_get_token) (FILE *input_FILE, int *flags);
+  void (*lang_set_args) (char const *lang_name, int op, char const *arg);
+  char const *lang_filter;
+  struct language *lang_next;
+};
+
+struct suffix
+{
+  char const *suff_suffix;
+  char const *suff_lang_name;
+  struct language *suff_language;
+  struct suffix *suff_next;
+};
+
+static struct suffix *get_suffix_entry (char const *suffix);
+static struct language *get_lang_entry (char const *lang_name);
+static void usage_scan (void);
+
+struct language languages[] =
+{
+  /* must be sorted for bsearch(3) */
+  { "C", get_token_c, set_args_c, NULL },
+  { "TeX", get_token_text, set_args_text, NULL },
+  { "VHIL", get_token_VHIL, set_args_c, NULL },
+  { "asm", get_token_asm, set_args_asm, NULL },
+/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/
+  { "gzip", NULL, NULL, "zcat %s" },
+  { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" },
+  { "text", get_token_text, set_args_text, NULL },
+};
+
+/*
+	This is a rather incomplete list of default associations
+	between suffixes and languages.  You may add more to the
+	default list, or you may define them dynamically with the
+	`-S<suff>=<lang>' argument to mkid(1) and idx(1).  e.g. to
+	associate a `.ada' suffix with the Ada language, use
+	`-S.ada=ada'
+*/
+struct suffix suffixes[] =
+{
+  { "", "text" },
+  { ".1", "roff" },
+  { ".2", "roff" },
+  { ".3", "roff" },
+  { ".4", "roff" },
+  { ".5", "roff" },
+  { ".6", "roff" },
+  { ".7", "roff" },
+  { ".8", "roff" },
+  { ".C", "C" },
+  { ".H", "C" },
+  { ".Z", "gzip" },
+  { ".c", "C" },
+  { ".cc", "C" },
+  { ".cpp", "C" },
+  { ".cxx", "C" },
+  { ".doc", "text" },
+/*{ ".el", "elisp" },*/
+  { ".gz", "gzip" },
+  { ".h", "C" },
+  { ".hh", "C" },
+  { ".hpp", "C" },
+  { ".hxx", "C" },
+  { ".l", "C" },
+  { ".lex", "C" },
+  { ".ltx", "TeX" },
+  { ".p", "pas" },
+  { ".pas", "pas" },
+  { ".s", "asm" },
+  { ".S", "asm" },
+  { ".tex", "TeX" },
+  { ".x", "VHIL" },
+  { ".y", "C" },
+  { ".yacc", "C" },
+  { ".z", "gzip" },
+};
+
+void
+init_scanners (void)
+{
+  struct language *lang;
+  struct language *lang_N = &languages[(sizeof(languages)/sizeof(languages[0])) - 1];
+  struct suffix *suff;
+  struct suffix *suff_N = &suffixes[(sizeof(suffixes)/sizeof(suffixes[0])) - 1];
+  
+  for (lang = languages; lang <= lang_N; ++lang)
+    lang->lang_next = lang + 1;
+  lang_N->lang_next = NULL;
+
+  for (suff = suffixes; suff <= suff_N; ++suff) {
+    lang = get_lang_entry (suff->suff_lang_name);
+    if (lang)
+      suff->suff_language = lang;
+    suff->suff_next = suff + 1;
+  }
+  suff_N->suff_next = NULL;
+}
+
+/* Return a suffix table entry for the given suffix. */
+static struct suffix *
+get_suffix_entry (char const *suffix)
+{
+  struct suffix *stp;
+
+  if (suffix == NULL)
+    suffix = "";
+
+  for (stp = suffixes; stp; stp = stp->suff_next)
+    if (strequ (stp->suff_suffix, suffix))
+      return stp;
+  return stp;
+}
+
+static struct language *
+get_lang_entry (char const *lang_name)
+{
+  struct language *ltp;
+
+  if (lang_name == NULL)
+    lang_name = "";
+
+  for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next)
+    if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name))
+      return ltp;
+  return ltp;
+}
+
+char const *
+get_lang_name (char const *suffix)
+{
+  struct suffix *stp;
+
+  stp = get_suffix_entry (suffix);
+  if (stp->suff_next == NULL)
+    return NULL;
+  return stp->suff_language->lang_name;
+}
+
+char const *
+get_filter (char const *suffix)
+{
+  struct suffix *stp;
+
+  stp = get_suffix_entry (suffix);
+  if (stp->suff_next == NULL)
+    return NULL;
+  return stp->suff_language->lang_filter;
+}
+
+char const *(*
+get_scanner (char const *lang)
+       ) (FILE *input_FILE, int *flags)
+{
+  struct language *ltp;
+
+  ltp = get_lang_entry (lang);
+  if (ltp->lang_next == NULL)
+    return NULL;
+  return ltp->lang_get_token;
+}
+
+void
+set_scan_args (int op, char *arg)
+{
+  struct language *ltp, *ltp2;
+  struct suffix *stp;
+  char *lhs;
+  char *lhs2;
+  int count = 0;
+
+  lhs = arg;
+  while (isalnum (*arg) || *arg == '.')
+    arg++;
+
+  if (strequ (lhs, "?=?"))
+    {
+      for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
+	{
+	  printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name);
+	  if (stp->suff_language->lang_filter)
+	    printf (" (%s)", stp->suff_language->lang_filter);
+	}
+      if (count)
+	putchar ('\n');
+      return;
+    }
+
+  if (strnequ (lhs, "?=", 2))
+    {
+      lhs += 2;
+      ltp = get_lang_entry (lhs);
+      if (ltp->lang_next == NULL)
+	{
+	  printf ("No scanner for language `%s'\n", lhs);
+	  return;
+	}
+      for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
+	if (stp->suff_language == ltp)
+	  {
+	    printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name);
+	    if (stp->suff_language->lang_filter)
+	      printf (" (%s)", stp->suff_language->lang_filter);
+	  }
+      if (count)
+	putchar ('\n');
+      return;
+    }
+
+  if (strequ (arg, "=?"))
+    {
+      lhs[strlen (lhs) - 2] = '\0';
+      stp = get_suffix_entry (lhs);
+      if (stp->suff_next == NULL)
+	{
+	  printf ("No scanner assigned to suffix `%s'\n", lhs);
+	  return;
+	}
+      printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name);
+      if (stp->suff_language->lang_filter)
+	printf (" (%s)", stp->suff_language->lang_filter);
+      printf ("\n");
+      return;
+    }
+
+  if (*arg == '=')
+    {
+      *arg++ = '\0';
+
+      ltp = get_lang_entry (arg);
+      if (ltp->lang_next == NULL)
+	{
+	  fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg);
+	  return;
+	}
+      stp = get_suffix_entry (lhs);
+      if (stp->suff_next == NULL)
+	{
+	  stp->suff_suffix = lhs;
+	  stp->suff_language = ltp;
+	  stp->suff_next = CALLOC (struct suffix, 1);
+	}
+      else if (!strequ (arg, stp->suff_language->lang_name))
+	{
+	  fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name);
+	  stp->suff_language = ltp;
+	}
+      return;
+    }
+  else if (*arg == '/')
+    {
+      *arg++ = '\0';
+      ltp = get_lang_entry (lhs);
+      if (ltp->lang_next == NULL)
+	{
+	  ltp->lang_name = lhs;
+	  ltp->lang_get_token = get_token_text;
+	  ltp->lang_set_args = set_args_text;
+	  ltp->lang_filter = NULL;
+	  ltp->lang_next = CALLOC (struct language, 1);
+	}
+      lhs2 = arg;
+      arg = strchr (arg, '/');
+      if (arg == NULL)
+	ltp2 = ltp;
+      else
+	{
+	  *arg++ = '\0';
+	  ltp2 = get_lang_entry (lhs2);
+	  if (ltp2->lang_next == NULL)
+	    {
+	      fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2);
+	      ltp2 = ltp;
+	    }
+	}
+      ltp->lang_get_token = ltp2->lang_get_token;
+      ltp->lang_set_args = ltp2->lang_set_args;
+      if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter)))
+	fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter);
+      ltp->lang_filter = arg;
+      return;
+    }
+
+  if (op == '+')
+    {
+      switch (op = *arg++)
+	{
+	case '+':
+	case '-':
+	case '?':
+	  break;
+	default:
+	  usage_scan ();
+	}
+      for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next)
+	(*ltp->lang_set_args) (NULL, op, arg);
+      return;
+    }
+
+  if (*arg == '-' || *arg == '+' || *arg == '?')
+    {
+      op = *arg;
+      *arg++ = '\0';
+
+      ltp = get_lang_entry (lhs);
+      if (ltp->lang_next == NULL)
+	{
+	  fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs);
+	  return;
+	}
+      (*ltp->lang_set_args) (lhs, op, arg);
+      return;
+    }
+
+  usage_scan ();
+}
+
+static void
+usage_scan (void)
+{
+  fprintf (stderr, "Usage: %s [-S<suffix>=<lang>] [+S(+|-)<arg>] [-S<lang>(+|-)<arg>] [-S<lang>/<lang>/<filter>]\n", program_name);
+  exit (1);
+}
+
+/*************** C & C++ ****************************************************/
+
+#define I1	0x0001		/* 1st char of an identifier [a-zA-Z_] */
+#define DG	0x0002		/* decimal digit [0-9] */
+#define NM	0x0004		/* extra chars in a hex or long number [a-fA-FxXlL] */
+#define C1	0x0008		/* C comment introduction char: / */
+#define C2	0x0010		/* C comment termination  char: * */
+#define Q1	0x0020		/* single quote: ' */
+#define Q2	0x0040		/* double quote: " */
+#define ES	0x0080		/* escape char: \ */
+#define NL	0x0100		/* newline: \n */
+#define EF	0x0200		/* EOF */
+#define SK	0x0400		/* Make these chars valid for names within strings */
+#define VH	0x0800		/* VHIL comment introduction char: # */
+#define WS	0x1000		/* White space characters */
+
+/*
+	character class membership macros:
+*/
+#define ISDIGIT(c)	((rct)[c] & (DG))	/* digit */
+#define ISNUMBER(c)	((rct)[c] & (DG|NM))	/* legal in a number */
+#define ISEOF(c)	((rct)[c] & (EF))	/* EOF */
+#define ISID1ST(c)	((rct)[c] & (I1))	/* 1st char of an identifier */
+#define ISIDREST(c)	((rct)[c] & (I1|DG))	/* rest of an identifier */
+#define ISSTRKEEP(c)	((rct)[c] & (I1|DG|SK))	/* keep contents of string */
+#define ISSPACE(c)	((rct)[c] & (WS))	/* white space character */
+/*
+	The `BORING' classes should be skipped over
+	until something interesting comes along...
+*/
+#define ISBORING(c)	(!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH)))	/* fluff */
+#define ISCBORING(c)	(!((rct)[c] & (EF|C2)))	/* comment fluff */
+#define ISVBORING(c)	(!((rct)[c] & (EF|NL)))	/* vhil comment fluff */
+#define ISQ1BORING(c)	(!((rct)[c] & (EF|NL|Q1|ES)))	/* char const fluff */
+#define ISQ2BORING(c)	(!((rct)[c] & (EF|NL|Q2|ES)))	/* quoted str fluff */
+
+static short ctype_c[257] =
+{
+  EF,
+/*      0       1       2       3       4       5       6       7   */
+/*    -----   -----   -----   -----   -----   -----   -----   ----- */
+/*000*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*010*/ 0,	0,	NL,	0,	0,	0,	0,	0,
+/*020*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*030*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*040*/ 0,	0,	Q2,	0,	0,	0,	0,	Q1,
+/*050*/ 0,	0,	C2,	0,	0,	0,	0,	C1,
+/*060*/ DG,	DG,	DG,	DG,	DG,	DG,	DG,	DG,
+/*070*/ DG,	DG,	0,	0,	0,	0,	0,	0,
+/*100*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*110*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*120*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*130*/ I1|NM,	I1,	I1,	0,	ES,	0,	0,	I1,
+/*140*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*150*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*160*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*170*/ I1|NM,	I1,	I1,	0,	0,	0,	0,	0,
+};
+
+static int eat_underscore = 1;
+static int scan_VHIL = 0;
+
+static char const *
+get_token_VHIL (FILE *input_FILE, int *flags)
+{
+  if (!scan_VHIL)
+    set_args_c ("vhil", '+', "v");
+  return get_token_c (input_FILE, flags);
+}
+
+/*
+	Grab the next identifier the C source
+	file opened with the handle `input_FILE'.
+	This state machine is built for speed, not elegance.
+*/
+static char const *
+get_token_c (FILE *input_FILE, int *flags)
+{
+  static char input_buffer[BUFSIZ];
+  static int new_line = 1;
+  short *rct = &ctype_c[1];
+  int c;
+  char *id = input_buffer;
+
+top:
+  c = getc (input_FILE);
+  if (new_line)
+    {
+      new_line = 0;
+      if (c == '.')
+	{
+	  /* Auto-recognize vhil code when you see a '.' in column 1.
+	     also ignore lines that start with a '.' */
+	  if (!scan_VHIL)
+	    set_args_c ("vhil", '+', "v");
+	  while (ISVBORING (c))
+	    c = getc (input_FILE);
+	  new_line = 1;
+	  goto top;
+	}
+      if (c != '#')
+	goto next;
+      c = getc (input_FILE);
+      if (scan_VHIL && ISSPACE (c))
+	{
+	  while (ISVBORING (c))
+	    c = getc (input_FILE);
+	  new_line = 1;
+	  goto top;
+	}
+      while (ISBORING (c))
+	c = getc (input_FILE);
+      if (!ISID1ST (c))
+	goto next;
+      id = input_buffer;
+      *id++ = c;
+      while (ISIDREST (c = getc (input_FILE)))
+	*id++ = c;
+      *id = '\0';
+      if (strequ (input_buffer, "include"))
+	{
+	  while (c == ' ' || c == '\t')
+	    c = getc (input_FILE);
+	  if (c == '\n')
+	    {
+	      new_line = 1;
+	      goto top;
+	    }
+	  id = input_buffer;
+	  if (c == '"')
+	    {
+	      c = getc (input_FILE);
+	      while (c != '\n' && c != EOF && c != '"')
+		{
+		  *id++ = c;
+		  c = getc (input_FILE);
+		}
+	      *flags = TOK_STRING;
+	    }
+	  else if (c == '<')
+	    {
+	      c = getc (input_FILE);
+	      while (c != '\n' && c != EOF && c != '>')
+		{
+		  *id++ = c;
+		  c = getc (input_FILE);
+		}
+	      *flags = TOK_STRING;
+	    }
+	  else if (ISID1ST (c))
+	    {
+	      *id++ = c;
+	      while (ISIDREST (c = getc (input_FILE)))
+		*id++ = c;
+	      *flags = TOK_NAME;
+	    }
+	  else
+	    {
+	      while (c != '\n' && c != EOF)
+		c = getc (input_FILE);
+	      new_line = 1;
+	      goto top;
+	    }
+	  while (c != '\n' && c != EOF)
+	    c = getc (input_FILE);
+	  new_line = 1;
+	  *id = '\0';
+	  return input_buffer;
+	}
+      if (strnequ (input_buffer, "if", 2)
+	  || strequ (input_buffer, "define")
+	  || strequ (input_buffer, "elif")	/* ansi C */
+	  || (scan_VHIL && strequ (input_buffer, "elsif"))
+	  || strequ (input_buffer, "undef"))
+	goto next;
+      while ((c != '\n') && (c != EOF))
+	c = getc (input_FILE);
+      new_line = 1;
+      goto top;
+    }
+
+next:
+  while (ISBORING (c))
+    c = getc (input_FILE);
+
+  switch (c)
+    {
+    case '"':
+      id = input_buffer;
+      *id++ = c = getc (input_FILE);
+      for (;;)
+	{
+	  while (ISQ2BORING (c))
+	    *id++ = c = getc (input_FILE);
+	  if (c == '\\')
+	    {
+	      *id++ = c = getc (input_FILE);
+	      continue;
+	    }
+	  else if (c != '"')
+	    goto next;
+	  break;
+	}
+      *--id = '\0';
+      id = input_buffer;
+      while (ISSTRKEEP (*id))
+	id++;
+      if (*id || id == input_buffer)
+	{
+	  c = getc (input_FILE);
+	  goto next;
+	}
+      *flags = TOK_STRING;
+      if (eat_underscore && input_buffer[0] == '_' && input_buffer[1])
+	return &input_buffer[1];
+      else
+	return input_buffer;
+
+    case '\'':
+      c = getc (input_FILE);
+      for (;;)
+	{
+	  while (ISQ1BORING (c))
+	    c = getc (input_FILE);
+	  if (c == '\\')
+	    {
+	      c = getc (input_FILE);
+	      continue;
+	    }
+	  else if (c == '\'')
+	    c = getc (input_FILE);
+	  goto next;
+	}
+
+    case '/':
+      c = getc (input_FILE);
+      if (c == '/')
+	{			/* Cope with C++ comment */
+	  while (ISVBORING (c))
+	    c = getc (input_FILE);
+	  new_line = 1;
+	  goto top;
+	}
+      else if (c != '*')
+	goto next;
+      c = getc (input_FILE);
+      for (;;)
+	{
+	  while (ISCBORING (c))
+	    c = getc (input_FILE);
+	  c = getc (input_FILE);
+	  if (c == '/')
+	    {
+	      c = getc (input_FILE);
+	      goto next;
+	    }
+	  else if (ISEOF (c))
+	    {
+	      new_line = 1;
+	      return NULL;
+	    }
+	}
+
+    case '\n':
+      new_line = 1;
+      goto top;
+
+    case '#':
+      if (!scan_VHIL)
+	{
+	  /* Auto-recognize vhil when find a # in the middle of a line. */
+	  set_args_c ("vhil", '+', "v");
+	}
+      c = getc (input_FILE);
+      while (ISVBORING (c))
+	c = getc (input_FILE);
+      new_line = 1;
+      goto top;
+    default:
+      if (ISEOF (c))
+	{
+	  new_line = 1;
+	  return NULL;
+	}
+      id = input_buffer;
+      *id++ = c;
+      if (ISID1ST (c))
+	{
+	  *flags = TOK_NAME;
+	  while (ISIDREST (c = getc (input_FILE)))
+	    *id++ = c;
+	}
+      else if (ISDIGIT (c))
+	{
+	  *flags = TOK_NUMBER;
+	  while (ISNUMBER (c = getc (input_FILE)))
+	    *id++ = c;
+	}
+      else
+	fprintf (stderr, "junk: `\\%3o'", c);
+      ungetc (c, input_FILE);
+      *id = '\0';
+      *flags |= TOK_LITERAL;
+      return input_buffer;
+    }
+}
+
+static void
+set_ctype_c (char const *chars, int type)
+{
+  short *rct = &ctype_c[1];
+
+  while (*chars)
+    rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_c (char const *chars, int type)
+{
+  short *rct = &ctype_c[1];
+
+  while (*chars)
+    rct[*chars++] &= ~type;
+}
+
+static void
+usage_c (char const *lang_name)
+{
+  fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name);
+  exit (1);
+}
+
+static char document_c[] = "\
+The C scanner arguments take the form -Sc<arg>, where <arg>\n\
+is one of the following: (<cc> denotes one or more characters)\n\
+  (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\
+  (+|-)s<cc> . . Allow <cc> in string ids, and (keep|ignore) those ids.\n\
+  -v . . . . . . Skip vhil comments.";
+
+static void
+set_args_c (char const *lang_name, int op, char const *arg)
+{
+  if (op == '?')
+    {
+      puts (document_c);
+      return;
+    }
+  switch (*arg++)
+    {
+    case 'u':
+      eat_underscore = (op == '+');
+      break;
+    case 's':
+      if (op == '+')
+	set_ctype_c (arg, SK);
+      else
+	clear_ctype_c (arg, SK);
+      break;
+    case 'v':
+      set_ctype_c ("$", I1);
+      set_ctype_c ("#", VH);
+      set_ctype_c (" \t", WS);
+      scan_VHIL = 1;
+      break;
+    default:
+      if (lang_name)
+	usage_c (lang_name);
+      break;
+    }
+}
+
+#undef I1
+#undef DG
+#undef NM
+#undef C1
+#undef C2
+#undef Q1
+#undef Q2
+#undef ES
+#undef NL
+#undef EF
+#undef SK
+#undef VH
+#undef WS
+#undef ISDIGIT
+#undef ISNUMBER
+#undef ISEOF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISSTRKEEP
+#undef ISSPACE
+#undef ISBORING
+#undef ISCBORING
+#undef ISVBORING
+#undef ISQ1BORING
+#undef ISQ2BORING
+
+/*************** Assembly ***************************************************/
+
+#define I1	0x01		/* 1st char of an identifier [a-zA-Z_] */
+#define NM	0x02		/* digit [0-9a-fA-FxX] */
+#define NL	0x04		/* newline: \n */
+#define CM	0x08		/* assembler comment char: usually # or | */
+#define IG	0x10		/* ignore `identifiers' with these chars in them */
+#define C1	0x20		/* C comment introduction char: / */
+#define C2	0x40		/* C comment termination  char: * */
+#define EF	0x80		/* EOF */
+
+/* Assembly Language character classes */
+#define ISID1ST(c)	((rct)[c] & (I1))
+#define ISIDREST(c)	((rct)[c] & (I1|NM))
+#define ISNUMBER(c)	((rct)[c] & (NM))
+#define ISEOF(c)	((rct)[c] & (EF))
+#define ISCOMMENT(c)	((rct)[c] & (CM))
+#define ISBORING(c)	(!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
+#define ISCBORING(c)	(!((rct)[c] & (EF|NL)))
+#define ISCCBORING(c)	(!((rct)[c] & (EF|C2)))
+#define ISIGNORE(c)	((rct)[c] & (IG))
+
+static char ctype_asm[257] =
+{
+  EF,
+/*      0       1       2       3       4       5       6       7   */
+/*    -----   -----   -----   -----   -----   -----   -----   ----- */
+/*000*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*010*/ 0,	0,	NL,	0,	0,	0,	0,	0,
+/*020*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*030*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*040*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*050*/ 0,	0,	C2,	0,	0,	0,	0,	C1,
+/*060*/ NM,	NM,	NM,	NM,	NM,	NM,	NM,	NM,
+/*070*/ NM,	NM,	0,	0,	0,	0,	0,	0,
+/*100*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*110*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*120*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*130*/ I1|NM,	I1,	I1,	0,	0,	0,	0,	I1,
+/*140*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*150*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*160*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*170*/ I1|NM,	I1,	I1,	0,	0,	0,	0,	0,
+
+};
+
+static int cpp_on_asm = 1;
+
+/*
+	Grab the next identifier the assembly language
+	source file opened with the handle `input_FILE'.
+	This state machine is built for speed, not elegance.
+*/
+static char const *
+get_token_asm (FILE *input_FILE, int *flags)
+{
+  static char input_buffer[BUFSIZ];
+  char *rct = &ctype_asm[1];
+  int c;
+  char *id = input_buffer;
+  static int new_line = 1;
+
+top:
+  c = getc (input_FILE);
+  if (cpp_on_asm > 0 && new_line)
+    {
+      new_line = 0;
+      if (c != '#')
+	goto next;
+      while (ISBORING (c))
+	c = getc (input_FILE);
+      if (!ISID1ST (c))
+	goto next;
+      id = input_buffer;
+      *id++ = c;
+      while (ISIDREST (c = getc (input_FILE)))
+	*id++ = c;
+      *id = '\0';
+      if (strequ (input_buffer, "include"))
+	{
+	  while (c != '"' && c != '<')
+	    c = getc (input_FILE);
+	  id = input_buffer;
+	  *id++ = c = getc (input_FILE);
+	  while ((c = getc (input_FILE)) != '"' && c != '>')
+	    *id++ = c;
+	  *id = '\0';
+	  *flags = TOK_STRING;
+	  return input_buffer;
+	}
+      if (strnequ (input_buffer, "if", 2)
+	  || strequ (input_buffer, "define")
+	  || strequ (input_buffer, "undef"))
+	goto next;
+      while (c != '\n')
+	c = getc (input_FILE);
+      new_line = 1;
+      goto top;
+    }
+
+next:
+  while (ISBORING (c))
+    c = getc (input_FILE);
+
+  if (ISCOMMENT (c))
+    {
+      while (ISCBORING (c))
+	c = getc (input_FILE);
+      new_line = 1;
+    }
+
+  if (ISEOF (c))
+    {
+      new_line = 1;
+      return NULL;
+    }
+
+  if (c == '\n')
+    {
+      new_line = 1;
+      goto top;
+    }
+
+  if (c == '/')
+    {
+      if ((c = getc (input_FILE)) != '*')
+	goto next;
+      c = getc (input_FILE);
+      for (;;)
+	{
+	  while (ISCCBORING (c))
+	    c = getc (input_FILE);
+	  c = getc (input_FILE);
+	  if (c == '/')
+	    {
+	      c = getc (input_FILE);
+	      break;
+	    }
+	  else if (ISEOF (c))
+	    {
+	      new_line = 1;
+	      return NULL;
+	    }
+	}
+      goto next;
+    }
+
+  id = input_buffer;
+  if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE)))
+    {
+      ungetc (c, input_FILE);
+      return "_";
+    }
+  *id++ = c;
+  if (ISID1ST (c))
+    {
+      *flags = TOK_NAME;
+      while (ISIDREST (c = getc (input_FILE)))
+	*id++ = c;
+    }
+  else if (ISNUMBER (c))
+    {
+      *flags = TOK_NUMBER;
+      while (ISNUMBER (c = getc (input_FILE)))
+	*id++ = c;
+    }
+  else
+    {
+      if (isprint (c))
+	fprintf (stderr, "junk: `%c'", c);
+      else
+	fprintf (stderr, "junk: `\\%03o'", c);
+      goto next;
+    }
+
+  *id = '\0';
+  for (id = input_buffer; *id; id++)
+    if (ISIGNORE (*id))
+      goto next;
+  ungetc (c, input_FILE);
+  *flags |= TOK_LITERAL;
+  return input_buffer;
+}
+
+static void
+set_ctype_asm (char const *chars, int type)
+{
+  char *rct = &ctype_asm[1];
+
+  while (*chars)
+    rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_asm (char const *chars, int type)
+{
+  char *rct = &ctype_asm[1];
+
+  while (*chars)
+    rct[*chars++] &= ~type;
+}
+
+static void
+usage_asm (char const *lang_name)
+{
+  fprintf (stderr, "Usage: %s -S%s([-c<cc>] [-u] [(+|-)a<cc>] [(+|-)p] [(+|-)C])\n", program_name, lang_name);
+  exit (1);
+}
+
+static char document_asm[] = "\
+The Assembler scanner arguments take the form -Sasm<arg>, where\n\
+<arg> is one of the following: (<cc> denotes one or more characters)\n\
+  -c<cc> . . . . <cc> introduce(s) a comment until end-of-line.\n\
+  (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\
+  (+|-)a<cc> . . Allow <cc> in ids, and (keep|ignore) those ids.\n\
+  (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\
+  (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)";
+
+static void
+set_args_asm (char const *lang_name, int op, char const *arg)
+{
+  if (op == '?')
+    {
+      puts (document_asm);
+      return;
+    }
+  switch (*arg++)
+    {
+    case 'a':
+      set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0));
+      break;
+    case 'c':
+      set_ctype_asm (arg, CM);
+      break;
+    case 'u':
+      eat_underscore = (op == '+');
+      break;
+    case 'p':
+      cpp_on_asm = (op == '+');
+      break;
+    case 'C':
+      if (op == '+')
+	{
+	  set_ctype_asm ("/", C1);
+	  set_ctype_asm ("*", C2);
+	}
+      else
+	{
+	  clear_ctype_asm ("/", C1);
+	  clear_ctype_asm ("*", C2);
+	}
+      break;
+    default:
+      if (lang_name)
+	usage_asm (lang_name);
+      break;
+    }
+}
+
+#undef I1
+#undef NM
+#undef NL
+#undef CM
+#undef IG
+#undef C1
+#undef C2
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISCOMMENT
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISIGNORE
+
+/*************** Text *******************************************************/
+
+#define I1	0x01		/* 1st char of an identifier [a-zA-Z_] */
+#define NM	0x02		/* digit [0-9a-fA-FxX] */
+#define SQ	0x04		/* squeeze these out (.,',-) */
+#define EF	0x80		/* EOF */
+
+/* Text character classes */
+#define ISID1ST(c)	((rct)[c] & (I1))
+#define ISIDREST(c)	((rct)[c] & (I1|NM|SQ))
+#define ISNUMBER(c)	((rct)[c] & (NM))
+#define ISEOF(c)	((rct)[c] & (EF))
+#define ISBORING(c)	(!((rct)[c] & (I1|NM|EF)))
+#define ISIDSQUEEZE(c)	((rct)[c] & (SQ))
+
+static char ctype_text[257] =
+{
+  EF,
+/*      0       1       2       3       4       5       6       7   */
+/*    -----   -----   -----   -----   -----   -----   -----   ----- */
+/*000*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*010*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*020*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*030*/ 0,	0,	0,	0,	0,	0,	0,	0,
+/*040*/ 0,	0,	0,	0,	0,	0,	0,	SQ,
+/*050*/ 0,	0,	0,	0,	0,	SQ,	SQ,	0,
+/*060*/ NM,	NM,	NM,	NM,	NM,	NM,	NM,	NM,
+/*070*/ NM,	NM,	0,	0,	0,	0,	0,	0,
+/*100*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*110*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*120*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*130*/ I1|NM,	I1,	I1,	0,	0,	0,	0,	I1,
+/*140*/ 0,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1|NM,	I1,
+/*150*/ I1,	I1,	I1,	I1,	I1|NM,	I1,	I1,	I1,
+/*160*/ I1,	I1,	I1,	I1,	I1,	I1,	I1,	I1,
+/*170*/ I1|NM,	I1,	I1,	0,	0,	0,	0,	0,
+};
+
+/*
+        Grab the next identifier the text source file opened with the
+	handle `input_FILE'.  This state machine is built for speed, not
+	elegance.
+*/
+static char const *
+get_token_text (FILE *input_FILE, int *flags)
+{
+  static char input_buffer[BUFSIZ];
+  char *rct = &ctype_text[1];
+  int c;
+  char *id = input_buffer;
+
+top:
+  c = getc (input_FILE);
+  while (ISBORING (c))
+    c = getc (input_FILE);
+  if (ISEOF (c))
+    return NULL;
+  id = input_buffer;
+  *id++ = c;
+  if (ISID1ST (c))
+    {
+      *flags = TOK_NAME;
+      while (ISIDREST (c = getc (input_FILE)))
+	if (!ISIDSQUEEZE (c))
+	  *id++ = c;
+    }
+  else if (ISNUMBER (c))
+    {
+      *flags = TOK_NUMBER;
+      while (ISNUMBER (c = getc (input_FILE)))
+	*id++ = c;
+    }
+  else
+    {
+      if (isprint (c))
+	fprintf (stderr, "junk: `%c'", c);
+      else
+	fprintf (stderr, "junk: `\\%03o'", c);
+      goto top;
+    }
+
+  *id = '\0';
+  ungetc (c, input_FILE);
+  *flags |= TOK_LITERAL;
+  return input_buffer;
+}
+
+static void
+set_ctype_text (char const *chars, int type)
+{
+  char *rct = &ctype_text[1];
+
+  while (*chars)
+    rct[*chars++] |= type;
+}
+
+static void
+clear_ctype_text (char const *chars, int type)
+{
+  char *rct = &ctype_text[1];
+
+  while (*chars)
+    rct[*chars++] &= ~type;
+}
+
+static void
+usage_text (char const *lang_name)
+{
+  fprintf (stderr, "Usage: %s -S%s([(+|-)a<cc>] [(+|-)s<cc>]\n", program_name, lang_name);
+  exit (1);
+}
+
+static char document_text[] = "\
+The Text scanner arguments take the form -Stext<arg>, where\n\
+<arg> is one of the following: (<cc> denotes one or more characters)\n\
+  (+|-)a<cc> . . Include (or exculde) <cc> in ids.\n\
+  (+|-)s<cc> . . Squeeze (or don't squeeze) <cc> out of ids.";
+
+static void
+set_args_text (char const *lang_name, int op, char const *arg)
+{
+  if (op == '?')
+    {
+      puts (document_text);
+      return;
+    }
+  switch (*arg++)
+    {
+    case 'a':
+      if (op == '+')
+	set_ctype_text (arg, I1);
+      else
+	clear_ctype_text (arg, I1);
+      break;
+    case 's':
+      if (op == '+')
+	set_ctype_text (arg, SQ);
+      else
+	clear_ctype_text (arg, SQ);
+      break;
+    default:
+      if (lang_name)
+	usage_text (lang_name);
+      break;
+    }
+}
+
+#undef I1
+#undef NM
+#undef SQ
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISBORING
+#undef ISIDSQUEEZE