summaryrefslogtreecommitdiffstats
path: root/scanners.c
diff options
context:
space:
mode:
Diffstat (limited to 'scanners.c')
-rw-r--r--scanners.c1216
1 files changed, 0 insertions, 1216 deletions
diff --git a/scanners.c b/scanners.c
deleted file mode 100644
index f2a5d44..0000000
--- a/scanners.c
+++ /dev/null
@@ -1,1216 +0,0 @@
-/* scanners.c -- file & directory name manipulations
- Copyright (C) 1986, 1995 Greg McGary
- VHIL portions Copyright (C) 1988 Tom Horsley
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-
-#include <config.h>
-#include "strxtra.h"
-#include "token.h"
-#include "alloc.h"
-#include "scanners.h"
-
-extern char const *program_name;
-
-static char const *get_token_VHIL __P((FILE *input_FILE, int *flags));
-static char const *get_token_c __P((FILE *input_FILE, int *flags));
-static void set_args_c __P((char const *lang_name, int op, char const *arg));
-static void set_ctype_c __P((char const *chars, int type));
-static void clear_ctype_c __P((char const *chars, int type));
-static void usage_c __P((char const *lang_name));
-
-static char const *get_token_asm __P((FILE *input_FILE, int *flags));
-static void set_ctype_asm __P((char const *chars, int type));
-static void clear_ctype_asm __P((char const *chars, int type));
-static void usage_asm __P((char const *lang_name));
-static void set_args_asm __P((char const *lang_name, int op, char const *arg));
-
-static char const *get_token_text __P((FILE *input_FILE, int *flags));
-static void set_ctype_text __P((char const *chars, int type));
-static void clear_ctype_text __P((char const *chars, int type));
-static void usage_text __P((char const *lang_name));
-static void set_args_text __P((char const *lang_name, int op, char const *arg));
-
-/****************************************************************************/
-
-typedef void (*set_args_t) __P((char const *lang_name, int op, char const *arg));
-
-struct language
-{
- char const *lang_name;
- get_token_t lang_get_token;
- set_args_t lang_set_args;
- char const *lang_filter;
- struct language *lang_next;
-};
-
-struct suffix
-{
- char const *suff_suffix;
- char const *suff_lang_name;
- struct language *suff_language;
- struct suffix *suff_next;
-};
-
-static struct suffix *get_suffix_entry (char const *suffix);
-static struct language *get_lang_entry (char const *lang_name);
-static void usage_scan (void);
-
-struct language languages_0[] =
-{
- { "C", get_token_c, set_args_c, NULL },
- { "TeX", get_token_text, set_args_text, NULL },
- { "VHIL", get_token_VHIL, set_args_c, NULL },
- { "asm", get_token_asm, set_args_asm, NULL },
-/*{ "elisp", get_token_elisp, set_args_elisp, NULL },*/
- { "gzip", NULL, NULL, "zcat %s" },
- { "roff", get_token_text, set_args_text, "sed '/^\\.so/d' < %s | deroff" },
- { "text", get_token_text, set_args_text, NULL },
-};
-struct language *languages = languages_0;
-
-/*
- This is a rather incomplete list of default associations
- between suffixes and languages. You may add more to the
- default list, or you may define them dynamically with the
- `-S<suff>=<lang>' argument to mkid(1) and idx(1). e.g. to
- associate a `.ada' suffix with the Ada language, use
- `-S.ada=ada'
-*/
-struct suffix suffixes_0[] =
-{
- { "", "text" },
- { ".1", "roff" },
- { ".2", "roff" },
- { ".3", "roff" },
- { ".4", "roff" },
- { ".5", "roff" },
- { ".6", "roff" },
- { ".7", "roff" },
- { ".8", "roff" },
- { ".C", "C" },
- { ".H", "C" },
- { ".Z", "gzip" },
- { ".c", "C" },
- { ".cc", "C" },
- { ".cpp", "C" },
- { ".cxx", "C" },
- { ".doc", "text" },
-/*{ ".el", "elisp" },*/
- { ".gz", "gzip" },
- { ".h", "C" },
- { ".hh", "C" },
- { ".hpp", "C" },
- { ".hxx", "C" },
- { ".l", "C" },
- { ".lex", "C" },
- { ".ltx", "TeX" },
- { ".p", "pas" },
- { ".pas", "pas" },
- { ".s", "asm" },
- { ".S", "asm" },
- { ".tex", "TeX" },
- { ".x", "VHIL" },
- { ".y", "C" },
- { ".yacc", "C" },
- { ".z", "gzip" },
-};
-struct suffix *suffixes = suffixes_0;
-
-void
-init_scanners (void)
-{
- struct language *lang;
- struct language *lang_N = &languages_0[(sizeof (languages_0) / sizeof (languages_0[0])) - 1];
- struct suffix *suff;
- struct suffix *suff_N = &suffixes_0[(sizeof (suffixes_0) / sizeof (suffixes_0[0])) - 1];
-
- for (lang = languages; lang <= lang_N; ++lang)
- lang->lang_next = lang + 1;
- lang_N->lang_next = NULL;
-
- for (suff = suffixes; suff <= suff_N; ++suff) {
- lang = get_lang_entry (suff->suff_lang_name);
- if (lang)
- suff->suff_language = lang;
- suff->suff_next = suff + 1;
- }
- suff_N->suff_next = NULL;
-}
-
-/* Return a suffix table entry for the given suffix. */
-static struct suffix *
-get_suffix_entry (char const *suffix)
-{
- struct suffix *stp;
-
- if (suffix == NULL)
- suffix = "";
-
- for (stp = suffixes; stp; stp = stp->suff_next)
- if (strequ (stp->suff_suffix, suffix))
- return stp;
- return NULL;
-}
-
-static struct language *
-get_lang_entry (char const *lang_name)
-{
- struct language *ltp;
-
- if (lang_name == NULL)
- lang_name = "";
-
- for (ltp = languages; ltp; ltp = ltp->lang_next)
- if (ltp->lang_name == lang_name || strequ (ltp->lang_name, lang_name))
- return ltp;
- return ltp;
-}
-
-char const *
-get_lang_name (char const *suffix)
-{
- struct suffix *stp;
-
- stp = get_suffix_entry (suffix);
- if (stp == NULL)
- return NULL;
- return stp->suff_language->lang_name;
-}
-
-char const *
-get_filter (char const *suffix)
-{
- struct suffix *stp;
-
- stp = get_suffix_entry (suffix);
- if (stp == NULL)
- return NULL;
- return stp->suff_language->lang_filter;
-}
-
-get_token_t
-get_scanner (char const *lang)
-{
- struct language *ltp;
-
- ltp = get_lang_entry (lang);
- if (ltp == NULL)
- return NULL;
- return ltp->lang_get_token;
-}
-
-void
-set_scan_args (int op, char *arg)
-{
- struct language *ltp, *ltp2;
- struct suffix *stp;
- char *lhs;
- char *lhs2;
- int count = 0;
-
- lhs = arg;
- while (isalnum (*arg) || *arg == '.')
- arg++;
-
- if (strequ (lhs, "?=?"))
- {
- for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
- {
- printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, stp->suff_language->lang_name);
- if (stp->suff_language->lang_filter)
- printf (" (%s)", stp->suff_language->lang_filter);
- }
- if (count)
- putchar ('\n');
- return;
- }
-
- if (strnequ (lhs, "?=", 2))
- {
- lhs += 2;
- ltp = get_lang_entry (lhs);
- if (ltp == NULL)
- {
- printf ("No scanner for language `%s'\n", lhs);
- return;
- }
- for (stp = suffixes; stp->suff_next; stp = stp->suff_next)
- if (stp->suff_language == ltp)
- {
- printf ("%s%s=%s", (count++ > 0) ? ", " : "", stp->suff_suffix, ltp->lang_name);
- if (stp->suff_language->lang_filter)
- printf (" (%s)", stp->suff_language->lang_filter);
- }
- if (count)
- putchar ('\n');
- return;
- }
-
- if (strequ (arg, "=?"))
- {
- lhs[strlen (lhs) - 2] = '\0';
- stp = get_suffix_entry (lhs);
- if (stp == NULL)
- {
- printf ("No scanner assigned to suffix `%s'\n", lhs);
- return;
- }
- printf ("%s=%s", stp->suff_suffix, stp->suff_language->lang_name);
- if (stp->suff_language->lang_filter)
- printf (" (%s)", stp->suff_language->lang_filter);
- printf ("\n");
- return;
- }
-
- if (*arg == '=')
- {
- *arg++ = '\0';
-
- ltp = get_lang_entry (arg);
- if (ltp == NULL)
- {
- fprintf (stderr, "%s: Language undefined: %s\n", program_name, arg);
- return;
- }
- stp = get_suffix_entry (lhs);
- if (stp == NULL)
- {
- stp = CALLOC (struct suffix, 1);
- stp->suff_suffix = lhs;
- stp->suff_language = ltp;
- stp->suff_next = suffixes;
- suffixes = stp;
- }
- else if (!strequ (arg, stp->suff_language->lang_name))
- {
- fprintf (stderr, "%s: Note: `%s=%s' overrides `%s=%s'\n", program_name, lhs, arg, lhs, stp->suff_language->lang_name);
- stp->suff_language = ltp;
- }
- return;
- }
- else if (*arg == '/')
- {
- *arg++ = '\0';
- ltp = get_lang_entry (lhs);
- if (ltp->lang_next == NULL)
- {
- ltp = CALLOC (struct language, 1);
- ltp->lang_name = lhs;
- ltp->lang_get_token = get_token_text;
- ltp->lang_set_args = set_args_text;
- ltp->lang_filter = NULL;
- ltp->lang_next = languages;
- languages = ltp;
- }
- lhs2 = arg;
- arg = strchr (arg, '/');
- if (arg == NULL)
- ltp2 = ltp;
- else
- {
- *arg++ = '\0';
- ltp2 = get_lang_entry (lhs2);
- if (ltp2 == NULL)
- {
- fprintf (stderr, "%s: language %s not defined.\n", program_name, lhs2);
- ltp2 = ltp;
- }
- }
- ltp->lang_get_token = ltp2->lang_get_token;
- ltp->lang_set_args = ltp2->lang_set_args;
- if (ltp->lang_filter && (!strequ (arg, ltp->lang_filter)))
- fprintf (stderr, "%s: Note: `%s/%s' overrides `%s/%s'\n", program_name, lhs, arg, lhs, ltp->lang_filter);
- ltp->lang_filter = arg;
- return;
- }
-
- if (op == '+')
- {
- switch (op = *arg++)
- {
- case '+':
- case '-':
- case '?':
- break;
- default:
- usage_scan ();
- }
- for (ltp = languages; ltp->lang_next; ltp = ltp->lang_next)
- (*ltp->lang_set_args) (NULL, op, arg);
- return;
- }
-
- if (*arg == '-' || *arg == '+' || *arg == '?')
- {
- op = *arg;
- *arg++ = '\0';
-
- ltp = get_lang_entry (lhs);
- if (ltp == NULL)
- {
- fprintf (stderr, "%s: Language undefined: %s\n", program_name, lhs);
- return;
- }
- (*ltp->lang_set_args) (lhs, op, arg);
- return;
- }
-
- usage_scan ();
-}
-
-static void
-usage_scan (void)
-{
- fprintf (stderr, "Usage: %s [-S<suffix>=<lang>] [+S(+|-)<arg>] [-S<lang>(+|-)<arg>] [-S<lang>/<lang>/<filter>]\n", program_name);
- exit (1);
-}
-
-/*************** C & C++ ****************************************************/
-
-#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
-#define DG 0x0002 /* decimal digit [0-9] */
-#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */
-#define C1 0x0008 /* C comment introduction char: / */
-#define C2 0x0010 /* C comment termination char: * */
-#define Q1 0x0020 /* single quote: ' */
-#define Q2 0x0040 /* double quote: " */
-#define ES 0x0080 /* escape char: \ */
-#define NL 0x0100 /* newline: \n */
-#define EF 0x0200 /* EOF */
-#define SK 0x0400 /* Make these chars valid for names within strings */
-#define VH 0x0800 /* VHIL comment introduction char: # */
-#define WS 0x1000 /* White space characters */
-
-/*
- character class membership macros:
-*/
-#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
-#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
-#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
-#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
-#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
-#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
-#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
-/*
- The `BORING' classes should be skipped over
- until something interesting comes along...
-*/
-#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */
-#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */
-#define ISVBORING(c) (!((rct)[c] & (EF|NL))) /* vhil comment fluff */
-#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */
-#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
-
-static unsigned short ctype_c[257] =
-{
- EF,
-/* 0 1 2 3 4 5 6 7 */
-/* ----- ----- ----- ----- ----- ----- ----- ----- */
-/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
-/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
-/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
-/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
-/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
-/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
-/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
-};
-
-static int eat_underscore = 1;
-static int scan_VHIL = 0;
-
-static char const *
-get_token_VHIL (FILE *input_FILE, int *flags)
-{
- if (!scan_VHIL)
- set_args_c ("vhil", '+', "v");
- return get_token_c (input_FILE, flags);
-}
-
-/*
- Grab the next identifier from the C source
- file opened with the handle `input_FILE'.
- This state machine is built for speed, not elegance.
-*/
-static char const *
-get_token_c (FILE *input_FILE, int *flags)
-{
- static char input_buffer[BUFSIZ];
- static int new_line = 1;
- unsigned short *rct = &ctype_c[1];
- int c;
- char *id = input_buffer;
-
-top:
- c = getc (input_FILE);
- if (new_line)
- {
- new_line = 0;
- if (c == '.')
- {
- /* Auto-recognize vhil code when you see a '.' in column 1.
- also ignore lines that start with a '.' */
- if (!scan_VHIL)
- set_args_c ("vhil", '+', "v");
- while (ISVBORING (c))
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
- if (c != '#')
- goto next;
- c = getc (input_FILE);
- if (scan_VHIL && ISSPACE (c))
- {
- while (ISVBORING (c))
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
- while (ISBORING (c))
- c = getc (input_FILE);
- if (!ISID1ST (c))
- goto next;
- id = input_buffer;
- *id++ = c;
- while (ISIDREST (c = getc (input_FILE)))
- *id++ = c;
- *id = '\0';
- if (strequ (input_buffer, "include"))
- {
- while (c == ' ' || c == '\t')
- c = getc (input_FILE);
- if (c == '\n')
- {
- new_line = 1;
- goto top;
- }
- id = input_buffer;
- if (c == '"')
- {
- c = getc (input_FILE);
- while (c != '\n' && c != EOF && c != '"')
- {
- *id++ = c;
- c = getc (input_FILE);
- }
- *flags = TOK_STRING;
- }
- else if (c == '<')
- {
- c = getc (input_FILE);
- while (c != '\n' && c != EOF && c != '>')
- {
- *id++ = c;
- c = getc (input_FILE);
- }
- *flags = TOK_STRING;
- }
- else if (ISID1ST (c))
- {
- *id++ = c;
- while (ISIDREST (c = getc (input_FILE)))
- *id++ = c;
- *flags = TOK_NAME;
- }
- else
- {
- while (c != '\n' && c != EOF)
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
- while (c != '\n' && c != EOF)
- c = getc (input_FILE);
- new_line = 1;
- *id = '\0';
- return input_buffer;
- }
- if (strnequ (input_buffer, "if", 2)
- || strequ (input_buffer, "define")
- || strequ (input_buffer, "elif") /* ansi C */
- || (scan_VHIL && strequ (input_buffer, "elsif"))
- || strequ (input_buffer, "undef"))
- goto next;
- while ((c != '\n') && (c != EOF))
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
-
-next:
- while (ISBORING (c))
- c = getc (input_FILE);
-
- switch (c)
- {
- case '"':
- id = input_buffer;
- *id++ = c = getc (input_FILE);
- for (;;)
- {
- while (ISQ2BORING (c))
- *id++ = c = getc (input_FILE);
- if (c == '\\')
- {
- *id++ = c = getc (input_FILE);
- continue;
- }
- else if (c != '"')
- goto next;
- break;
- }
- *--id = '\0';
- id = input_buffer;
- while (ISSTRKEEP (*id))
- id++;
- if (*id || id == input_buffer)
- {
- c = getc (input_FILE);
- goto next;
- }
- *flags = TOK_STRING;
- if (eat_underscore && input_buffer[0] == '_' && input_buffer[1])
- return &input_buffer[1];
- else
- return input_buffer;
-
- case '\'':
- c = getc (input_FILE);
- for (;;)
- {
- while (ISQ1BORING (c))
- c = getc (input_FILE);
- if (c == '\\')
- {
- c = getc (input_FILE);
- continue;
- }
- else if (c == '\'')
- c = getc (input_FILE);
- goto next;
- }
-
- case '/':
- c = getc (input_FILE);
- if (c == '/')
- { /* Cope with C++ comment */
- while (ISVBORING (c))
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
- else if (c != '*')
- goto next;
- c = getc (input_FILE);
- for (;;)
- {
- while (ISCBORING (c))
- c = getc (input_FILE);
- c = getc (input_FILE);
- if (c == '/')
- {
- c = getc (input_FILE);
- goto next;
- }
- else if (ISEOF (c))
- {
- new_line = 1;
- return NULL;
- }
- }
-
- case '\n':
- new_line = 1;
- goto top;
-
- case '#':
- if (!scan_VHIL)
- {
- /* Auto-recognize vhil when find a # in the middle of a line. */
- set_args_c ("vhil", '+', "v");
- }
- c = getc (input_FILE);
- while (ISVBORING (c))
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- default:
- if (ISEOF (c))
- {
- new_line = 1;
- return NULL;
- }
- id = input_buffer;
- *id++ = c;
- if (ISID1ST (c))
- {
- *flags = TOK_NAME;
- while (ISIDREST (c = getc (input_FILE)))
- *id++ = c;
- }
- else if (ISDIGIT (c))
- {
- *flags = TOK_NUMBER;
- while (ISNUMBER (c = getc (input_FILE)))
- *id++ = c;
- }
- else
- fprintf (stderr, "junk: `\\%3o'", c);
- ungetc (c, input_FILE);
- *id = '\0';
- *flags |= TOK_LITERAL;
- return input_buffer;
- }
-}
-
-static void
-set_ctype_c (char const *chars, int type)
-{
- unsigned short *rct = &ctype_c[1];
-
- while (*chars)
- rct[*chars++] |= type;
-}
-
-static void
-clear_ctype_c (char const *chars, int type)
-{
- unsigned short *rct = &ctype_c[1];
-
- while (*chars)
- rct[*chars++] &= ~type;
-}
-
-static void
-usage_c (char const *lang_name)
-{
- fprintf (stderr, "Usage: %s does not accept %s scanner arguments\n", program_name, lang_name);
- exit (1);
-}
-
-static char document_c[] = "\
-The C scanner arguments take the form -Sc<arg>, where <arg>\n\
-is one of the following: (<cc> denotes one or more characters)\n\
- (+|-)u . . . . (Do|Don't) strip a leading `_' from ids in strings.\n\
- (+|-)s<cc> . . Allow <cc> in string ids, and (keep|ignore) those ids.\n\
- -v . . . . . . Skip vhil comments.";
-
-static void
-set_args_c (char const *lang_name, int op, char const *arg)
-{
- if (op == '?')
- {
- puts (document_c);
- return;
- }
- switch (*arg++)
- {
- case 'u':
- eat_underscore = (op == '+');
- break;
- case 's':
- if (op == '+')
- set_ctype_c (arg, SK);
- else
- clear_ctype_c (arg, SK);
- break;
- case 'v':
- set_ctype_c ("$", I1);
- set_ctype_c ("#", VH);
- set_ctype_c (" \t", WS);
- scan_VHIL = 1;
- break;
- default:
- if (lang_name)
- usage_c (lang_name);
- break;
- }
-}
-
-#undef I1
-#undef DG
-#undef NM
-#undef C1
-#undef C2
-#undef Q1
-#undef Q2
-#undef ES
-#undef NL
-#undef EF
-#undef SK
-#undef VH
-#undef WS
-#undef ISDIGIT
-#undef ISNUMBER
-#undef ISEOF
-#undef ISID1ST
-#undef ISIDREST
-#undef ISSTRKEEP
-#undef ISSPACE
-#undef ISBORING
-#undef ISCBORING
-#undef ISVBORING
-#undef ISQ1BORING
-#undef ISQ2BORING
-
-/*************** Assembly ***************************************************/
-
-#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
-#define NM 0x02 /* digit [0-9a-fA-FxX] */
-#define NL 0x04 /* newline: \n */
-#define CM 0x08 /* assembler comment char: usually # or | */
-#define IG 0x10 /* ignore `identifiers' with these chars in them */
-#define C1 0x20 /* C comment introduction char: / */
-#define C2 0x40 /* C comment termination char: * */
-#define EF 0x80 /* EOF */
-
-/* Assembly Language character classes */
-#define ISID1ST(c) ((rct)[c] & (I1))
-#define ISIDREST(c) ((rct)[c] & (I1|NM))
-#define ISNUMBER(c) ((rct)[c] & (NM))
-#define ISEOF(c) ((rct)[c] & (EF))
-#define ISCOMMENT(c) ((rct)[c] & (CM))
-#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
-#define ISCBORING(c) (!((rct)[c] & (EF|NL)))
-#define ISCCBORING(c) (!((rct)[c] & (EF|C2)))
-#define ISIGNORE(c) ((rct)[c] & (IG))
-
-static unsigned char ctype_asm[257] =
-{
- EF,
-/* 0 1 2 3 4 5 6 7 */
-/* ----- ----- ----- ----- ----- ----- ----- ----- */
-/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
-/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
-/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
-/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
-/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
-/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
-
-};
-
-static int cpp_on_asm = 1;
-
-/*
- Grab the next identifier the assembly language
- source file opened with the handle `input_FILE'.
- This state machine is built for speed, not elegance.
-*/
-static char const *
-get_token_asm (FILE *input_FILE, int *flags)
-{
- static char input_buffer[BUFSIZ];
- unsigned char *rct = &ctype_asm[1];
- int c;
- char *id = input_buffer;
- static int new_line = 1;
-
-top:
- c = getc (input_FILE);
- if (cpp_on_asm > 0 && new_line)
- {
- new_line = 0;
- if (c != '#')
- goto next;
- while (ISBORING (c))
- c = getc (input_FILE);
- if (!ISID1ST (c))
- goto next;
- id = input_buffer;
- *id++ = c;
- while (ISIDREST (c = getc (input_FILE)))
- *id++ = c;
- *id = '\0';
- if (strequ (input_buffer, "include"))
- {
- while (c != '"' && c != '<')
- c = getc (input_FILE);
- id = input_buffer;
- *id++ = c = getc (input_FILE);
- while ((c = getc (input_FILE)) != '"' && c != '>')
- *id++ = c;
- *id = '\0';
- *flags = TOK_STRING;
- return input_buffer;
- }
- if (strnequ (input_buffer, "if", 2)
- || strequ (input_buffer, "define")
- || strequ (input_buffer, "undef"))
- goto next;
- while (c != '\n')
- c = getc (input_FILE);
- new_line = 1;
- goto top;
- }
-
-next:
- while (ISBORING (c))
- c = getc (input_FILE);
-
- if (ISCOMMENT (c))
- {
- while (ISCBORING (c))
- c = getc (input_FILE);
- new_line = 1;
- }
-
- if (ISEOF (c))
- {
- new_line = 1;
- return NULL;
- }
-
- if (c == '\n')
- {
- new_line = 1;
- goto top;
- }
-
- if (c == '/')
- {
- if ((c = getc (input_FILE)) != '*')
- goto next;
- c = getc (input_FILE);
- for (;;)
- {
- while (ISCCBORING (c))
- c = getc (input_FILE);
- c = getc (input_FILE);
- if (c == '/')
- {
- c = getc (input_FILE);
- break;
- }
- else if (ISEOF (c))
- {
- new_line = 1;
- return NULL;
- }
- }
- goto next;
- }
-
- id = input_buffer;
- if (eat_underscore && c == '_' && !ISID1ST (c = getc (input_FILE)))
- {
- ungetc (c, input_FILE);
- return "_";
- }
- *id++ = c;
- if (ISID1ST (c))
- {
- *flags = TOK_NAME;
- while (ISIDREST (c = getc (input_FILE)))
- *id++ = c;
- }
- else if (ISNUMBER (c))
- {
- *flags = TOK_NUMBER;
- while (ISNUMBER (c = getc (input_FILE)))
- *id++ = c;
- }
- else
- {
- if (isprint (c))
- fprintf (stderr, "junk: `%c'", c);
- else
- fprintf (stderr, "junk: `\\%03o'", c);
- goto next;
- }
-
- *id = '\0';
- for (id = input_buffer; *id; id++)
- if (ISIGNORE (*id))
- goto next;
- ungetc (c, input_FILE);
- *flags |= TOK_LITERAL;
- return input_buffer;
-}
-
-static void
-set_ctype_asm (char const *chars, int type)
-{
- unsigned char *rct = &ctype_asm[1];
-
- while (*chars)
- rct[*chars++] |= type;
-}
-
-static void
-clear_ctype_asm (char const *chars, int type)
-{
- unsigned char *rct = &ctype_asm[1];
-
- while (*chars)
- rct[*chars++] &= ~type;
-}
-
-static void
-usage_asm (char const *lang_name)
-{
- fprintf (stderr, "Usage: %s -S%s([-c<cc>] [-u] [(+|-)a<cc>] [(+|-)p] [(+|-)C])\n", program_name, lang_name);
- exit (1);
-}
-
-static char document_asm[] = "\
-The Assembler scanner arguments take the form -Sasm<arg>, where\n\
-<arg> is one of the following: (<cc> denotes one or more characters)\n\
- -c<cc> . . . . <cc> introduce(s) a comment until end-of-line.\n\
- (+|-)u . . . . (Do|Don't) strip a leading `_' from ids.\n\
- (+|-)a<cc> . . Allow <cc> in ids, and (keep|ignore) those ids.\n\
- (+|-)p . . . . (Do|Don't) handle C-preprocessor directives.\n\
- (+|-)C . . . . (Do|Don't) handle C-style comments. (/* */)";
-
-static void
-set_args_asm (char const *lang_name, int op, char const *arg)
-{
- if (op == '?')
- {
- puts (document_asm);
- return;
- }
- switch (*arg++)
- {
- case 'a':
- set_ctype_asm (arg, I1 | ((op == '-') ? IG : 0));
- break;
- case 'c':
- set_ctype_asm (arg, CM);
- break;
- case 'u':
- eat_underscore = (op == '+');
- break;
- case 'p':
- cpp_on_asm = (op == '+');
- break;
- case 'C':
- if (op == '+')
- {
- set_ctype_asm ("/", C1);
- set_ctype_asm ("*", C2);
- }
- else
- {
- clear_ctype_asm ("/", C1);
- clear_ctype_asm ("*", C2);
- }
- break;
- default:
- if (lang_name)
- usage_asm (lang_name);
- break;
- }
-}
-
-#undef I1
-#undef NM
-#undef NL
-#undef CM
-#undef IG
-#undef C1
-#undef C2
-#undef EF
-#undef ISID1ST
-#undef ISIDREST
-#undef ISNUMBER
-#undef ISEOF
-#undef ISCOMMENT
-#undef ISBORING
-#undef ISCBORING
-#undef ISCCBORING
-#undef ISIGNORE
-
-/*************** Text *******************************************************/
-
-#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
-#define NM 0x02 /* digit [0-9a-fA-FxX] */
-#define SQ 0x04 /* squeeze these out (.,',-) */
-#define EF 0x80 /* EOF */
-
-/* Text character classes */
-#define ISID1ST(c) ((rct)[c] & (I1))
-#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ))
-#define ISNUMBER(c) ((rct)[c] & (NM))
-#define ISEOF(c) ((rct)[c] & (EF))
-#define ISBORING(c) (!((rct)[c] & (I1|NM|EF)))
-#define ISIDSQUEEZE(c) ((rct)[c] & (SQ))
-
-static unsigned char ctype_text[257] =
-{
- EF,
-/* 0 1 2 3 4 5 6 7 */
-/* ----- ----- ----- ----- ----- ----- ----- ----- */
-/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*050*/ 0, 0, 0, 0, 0, 0, 0, 0,
-/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
-/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
-/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
-/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
-/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
-/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
-};
-
-/*
- Grab the next identifier the text source file opened with the
- handle `input_FILE'. This state machine is built for speed, not
- elegance.
-*/
-static char const *
-get_token_text (FILE *input_FILE, int *flags)
-{
- static char input_buffer[BUFSIZ];
- unsigned char *rct = &ctype_text[1];
- int c;
- char *id = input_buffer;
-
-top:
- c = getc (input_FILE);
- while (ISBORING (c))
- c = getc (input_FILE);
- if (ISEOF (c))
- return NULL;
- id = input_buffer;
- *id++ = c;
- if (ISID1ST (c))
- {
- *flags = TOK_NAME;
- while (ISIDREST (c = getc (input_FILE)))
- if (!ISIDSQUEEZE (c))
- *id++ = c;
- }
- else if (ISNUMBER (c))
- {
- *flags = TOK_NUMBER;
- while (ISNUMBER (c = getc (input_FILE)))
- *id++ = c;
- }
- else
- {
- if (isprint (c))
- fprintf (stderr, "junk: `%c'", c);
- else
- fprintf (stderr, "junk: `\\%03o'", c);
- goto top;
- }
-
- *id = '\0';
- ungetc (c, input_FILE);
- *flags |= TOK_LITERAL;
- return input_buffer;
-}
-
-static void
-set_ctype_text (char const *chars, int type)
-{
- unsigned char *rct = &ctype_text[1];
-
- while (*chars)
- rct[*chars++] |= type;
-}
-
-static void
-clear_ctype_text (char const *chars, int type)
-{
- unsigned char *rct = &ctype_text[1];
-
- while (*chars)
- rct[*chars++] &= ~type;
-}
-
-static void
-usage_text (char const *lang_name)
-{
- fprintf (stderr, "Usage: %s -S%s([(+|-)a<cc>] [(+|-)s<cc>]\n", program_name, lang_name);
- exit (1);
-}
-
-static char document_text[] = "\
-The Text scanner arguments take the form -Stext<arg>, where\n\
-<arg> is one of the following: (<cc> denotes one or more characters)\n\
- (+|-)a<cc> . . Include (or exculde) <cc> in ids.\n\
- (+|-)s<cc> . . Squeeze (or don't squeeze) <cc> out of ids.";
-
-static void
-set_args_text (char const *lang_name, int op, char const *arg)
-{
- if (op == '?')
- {
- puts (document_text);
- return;
- }
- switch (*arg++)
- {
- case 'a':
- if (op == '+')
- set_ctype_text (arg, I1);
- else
- clear_ctype_text (arg, I1);
- break;
- case 's':
- if (op == '+')
- set_ctype_text (arg, SQ);
- else
- clear_ctype_text (arg, SQ);
- break;
- default:
- if (lang_name)
- usage_text (lang_name);
- break;
- }
-}
-
-#undef I1
-#undef NM
-#undef SQ
-#undef EF
-#undef ISID1ST
-#undef ISIDREST
-#undef ISNUMBER
-#undef ISEOF
-#undef ISBORING
-#undef ISIDSQUEEZE