summaryrefslogtreecommitdiffstats
path: root/lib/scanners.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/scanners.c')
-rw-r--r--lib/scanners.c1201
1 files changed, 1201 insertions, 0 deletions
diff --git a/lib/scanners.c b/lib/scanners.c
new file mode 100644
index 0000000..4f50245
--- /dev/null
+++ b/lib/scanners.c
@@ -0,0 +1,1201 @@
+/* scanners.c -- file & directory name manipulations
+ Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <getopt.h>
+
+#include <config.h>
+#include "system.h"
+#include "error.h"
+#include "strxtra.h"
+#include "token.h"
+#include "alloc.h"
+#include "scanners.h"
+
+#define DEBUG(args) /* printf args */
+
+struct obstack lang_args_obstack;
+struct lang_args *lang_args_default = 0;
+struct lang_args *lang_args_list = 0;
+struct obstack tokens_obstack;
+
+extern void usage __P((void));
+extern char *program_name;
+
+/****************************************************************************/
+
+struct lang_args **parse_language_map_file __P((char const *file_name, struct lang_args **next_ptr));
+char *read_language_map_file __P((char const *file_name));
+
+static struct token *get_token_c __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_c __P((char **argv, int argc));
+static void help_me_c __P((void));
+
+static struct token *get_token_asm __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_asm __P((char **argv, int argc));
+static void help_me_asm __P((void));
+
+static struct token *get_token_text __P((FILE *in_FILE, void const *args, int *flags));
+static void *parse_args_text __P((char **argv, int argc));
+static void help_me_text __P((void));
+
+struct language languages_0[] =
+{
+ { "C", parse_args_c, get_token_c, help_me_c },
+ { "asm", parse_args_asm, get_token_asm, help_me_asm },
+ { "text", parse_args_text, get_token_text, help_me_text },
+};
+struct language const *languages_N = &languages_0[cardinalityof (languages_0)];
+
+void
+language_help_me (void)
+{
+ struct language *lang;
+ for (lang = languages_0; lang < languages_N; lang++)
+ {
+ putchar ('\n');
+ (*lang->lg_help_me) ();
+ }
+}
+
+void
+language_save_arg (char *arg)
+{
+ static char horizontal_space[] = " \t";
+ char *lang_name = strtok (arg, ":");
+ struct language *lang = get_language (lang_name);
+
+ if (lang == 0)
+ {
+ fprintf (stderr, _("unrecognized language: `%s'\n"), lang_name);
+ usage ();
+ }
+ if (lang->lg_argc == 0)
+ lang->lg_argv[lang->lg_argc++] = program_name;
+ lang->lg_argv[lang->lg_argc++] = strtok (0, horizontal_space);
+}
+
+void
+language_getopt ()
+{
+ struct language *lang;
+
+ for (lang = languages_0; lang < languages_N; lang++)
+ if (lang->lg_argc)
+ lang->lg_parse_args (lang->lg_argv, lang->lg_argc);
+}
+
+struct language *
+get_language (char const *lang_name)
+{
+ struct language *lang;
+
+ for (lang = languages_0; lang < languages_N; lang++)
+ if (strequ (lang_name, lang->lg_name))
+ {
+ DEBUG (("lang=%s", lang_name));
+ return lang;
+ }
+ DEBUG (("!lang=%s", lang_name));
+ return 0;
+}
+
+/****************************************************************************/
+
+int lang_args_index = 0;
+
+void
+parse_language_map (char const *file_name)
+{
+ if (obstack_init (&lang_args_obstack) == 0)
+ error (1, 0, _("can't allocate language args obstack: memory exhausted"));
+ if (file_name == 0)
+ file_name = LANGUAGE_MAP;
+ parse_language_map_file (file_name, &lang_args_list);
+}
+
+struct lang_args **
+parse_language_map_file (char const *file_name, struct lang_args **next_ptr)
+{
+ static char white_space[] = " \t\r\n\v\f";
+ static char horizontal_space[] = " \t";
+ static char vertical_space[] = "\r\n\v\f";
+ char *lang_map_buffer;
+ char *lmp;
+
+ lmp = lang_map_buffer = read_language_map_file (file_name);
+ for (;;)
+ {
+ struct lang_args *new_args;
+ struct language const *lang;
+ int pattern_size;
+ char *lang_name;
+ int space;
+
+ /* Skip leading white space and full-line comments */
+ while (*lmp)
+ {
+ lmp += strspn (lmp, white_space);
+ if (*lmp != '#')
+ break;
+ lmp += strcspn (lmp, vertical_space);
+ }
+ if (*lmp == '\0')
+ break;
+
+ pattern_size = strcspn (lmp, white_space);
+ if (pattern_size == 3 && strnequ (lmp, "***", 3))
+ {
+ lmp += pattern_size;
+ lmp += strspn (lmp, horizontal_space);
+ if (isspace (*lmp))
+ next_ptr = parse_language_map_file (LANGUAGE_MAP, next_ptr);
+ else
+ {
+ char *end = lmp + strcspn (lmp, white_space);
+ *end = '\0';
+ next_ptr = parse_language_map_file (lmp, next_ptr);
+ lmp = end + 1;
+ }
+ continue;
+ }
+
+ new_args = OBSTACK_ALLOC (&lang_args_obstack, struct lang_args, 1);
+ if (new_args == 0)
+ error (1, 0, _("can't allocate language args: memory exhausted"));
+ new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp, pattern_size);
+ new_args->la_args_string = 0;
+ lmp += pattern_size;
+ lmp += strspn (lmp, horizontal_space);
+ if (isspace (*lmp))
+ {
+ error (0, 0, _("language name expected following `%s' in file `%s'"),
+ new_args->la_pattern, file_name);
+ obstack_free (&lang_args_obstack, new_args);
+ continue;
+ }
+ lang_name = lmp;
+ lmp += strcspn (lmp, white_space);
+ space = *lmp;
+ *lmp++ = '\0';
+ lmp += strspn (lmp, horizontal_space);
+ lang = new_args->la_language = get_language (lang_name);
+
+ if (*lmp == '#')
+ lmp += strcspn (lmp, vertical_space);
+ else if (!isspace (*lmp) && (space == ' ' || space == '\t'))
+ {
+ int args_size = strcspn (lmp, vertical_space);
+ new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp, args_size);
+ lmp += args_size;
+ }
+ new_args->la_args_digested = (lang
+ ? lang->lg_parse_args (&new_args->la_args_string, 0)
+ : 0);
+ if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2))
+ {
+ if (lang_args_default)
+ {
+ obstack_free (&lang_args_obstack, new_args);
+ continue;
+ }
+ lang_args_default = new_args;
+ DEBUG ((", <default>"));
+ }
+ else
+ {
+ new_args->la_index = lang_args_index++;
+ *next_ptr = new_args;
+ next_ptr = &new_args->la_next;
+ }
+ DEBUG ((", pat=%s\n", new_args->la_pattern));
+ }
+ free (lang_map_buffer);
+ return next_ptr;
+}
+
+char *
+read_language_map_file (char const *file_name)
+{
+ int map_fd;
+ char *lang_map_buffer;
+ struct stat st;
+ int bytes;
+
+ map_fd = open (file_name, O_RDONLY);
+ if (map_fd < 0)
+ error (1, errno, _("can't open language map file `%s'"), file_name);
+ if (fstat (map_fd, &st) < 0)
+ error (1, errno, _("can't get size of map file `%s'"), file_name);
+
+ lang_map_buffer = MALLOC (char, st.st_size + 2);
+ if (lang_map_buffer == 0)
+ error (1, 0, _("can't allocate language args: memory exhausted"));
+ lang_map_buffer[st.st_size] = '\n';
+ lang_map_buffer[st.st_size+1] = '\0';
+
+ bytes = read (map_fd, lang_map_buffer, st.st_size);
+ if (bytes < 0)
+ error (1, errno, _("can't read language map file `%s'"), file_name);
+ /* FIXME: handle interrupted & partial reads */
+ if (bytes != st.st_size)
+ error (1, errno, _("can't read entire language map file `%s'"), file_name);
+
+ close (map_fd);
+ return lang_map_buffer;
+}
+
+/****************************************************************************/
+
+void
+tokenize_args_string (char *args_string, int *argcp, char ***argvp)
+{
+ static char horizontal_space[] = " \t";
+ char **argv_0 = MALLOC (char *, strlen (args_string) / 2);
+ char **argv = argv_0;
+ char *arg;
+
+ *argv++ = program_name;
+ arg = strtok (args_string, horizontal_space);
+ while (arg)
+ {
+ *argv++ = arg;
+ arg = strtok (0, horizontal_space);
+ }
+ *argcp = argv - argv_0;
+ *argvp = REALLOC (argv_0, char *, *argcp);
+}
+
+static void
+set_ushort_ctype (unsigned short *ctype, char const *chars, int type)
+{
+ unsigned short *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_ushort_ctype (unsigned short *ctype, char const *chars, int type)
+{
+ unsigned short *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+static void
+set_uchar_ctype (unsigned char *ctype, char const *chars, int type)
+{
+ unsigned char *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] |= type;
+}
+
+static void
+clear_uchar_ctype (unsigned char *ctype, char const *chars, int type)
+{
+ unsigned char *rct = &ctype[1];
+
+ while (*chars)
+ rct[*chars++] &= ~type;
+}
+
+/*************** C & C++ ****************************************************/
+
+#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
+#define DG 0x0002 /* decimal digit [0-9] */
+#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */
+#define C1 0x0008 /* C comment introduction char: / */
+#define C2 0x0010 /* C comment termination char: * */
+#define Q1 0x0020 /* single quote: ' */
+#define Q2 0x0040 /* double quote: " */
+#define ES 0x0080 /* escape char: \ */
+#define NL 0x0100 /* newline: \n */
+#define EF 0x0200 /* EOF */
+#define SK 0x0400 /* Make these chars valid for names within strings */
+#define VH 0x0800 /* VHIL comment introduction char: # */
+#define WS 0x1000 /* White space characters */
+
+/* character class membership macros: */
+
+#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
+#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
+#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
+#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
+#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
+#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
+#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
+
+/* The `BORING' classes should be skipped over until something
+ interesting comes along... */
+
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */
+#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */
+#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */
+#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */
+#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
+
+static unsigned short ctype_c[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
+/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+ /* FIXME: latin-1 */
+};
+
+struct args_c
+{
+ int strip_underscore;
+ unsigned short *ctype;
+};
+
+static struct args_c args_c = { 0, ctype_c };
+
+static struct option const long_options_c[] =
+{
+ { "keep", required_argument, 0, 'k' },
+ { "ignore", required_argument, 0, 'i' },
+ { "strip-underscore", no_argument, 0, 'u' },
+ { 0 }
+};
+
+static void
+help_me_c (void)
+{
+ printf (_("\
+C language:\n\
+ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
+ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
+ -u,--strip-underscore Strip a leading underscore from single-token strings\n\
+"));
+}
+
+static void *
+parse_args_c (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_c *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_c;
+
+ if (argc)
+ args = &args_c;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_c, 1);
+ args->strip_underscore = 0;
+ args->ctype = ctype_c;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "k:i:u",
+ long_options_c, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c)
+ args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c));
+ switch (optc)
+ {
+ case 'k':
+ set_ushort_ctype (args->ctype, optarg, SK);
+ break;
+
+ case 'i':
+ clear_ushort_ctype (args->ctype, optarg, SK);
+ break;
+
+ case 'u':
+ args->strip_underscore = 1;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+
+/* Grab the next identifier from the C source file. This state
+ machine is built for speed, not elegance. */
+
+static struct token *
+get_token_c (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_c *) args)
+ static int new_line = 1;
+ unsigned short *rct = &ARGS->ctype[1];
+ char id_0[BUFSIZ];
+ char *id = id_0;
+ int c;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ if (new_line)
+ {
+ new_line = 0;
+ if (c != '#')
+ goto next;
+ c = getc (in_FILE);
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = id_0;
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (id_0, "include"))
+ {
+ while (c == ' ' || c == '\t')
+ c = getc (in_FILE);
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+ id = id_0;
+ if (c == '"')
+ {
+ c = getc (in_FILE);
+ while (c != '\n' && c != EOF && c != '"')
+ {
+ *id++ = c;
+ c = getc (in_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (c == '<')
+ {
+ c = getc (in_FILE);
+ while (c != '\n' && c != EOF && c != '>')
+ {
+ *id++ = c;
+ c = getc (in_FILE);
+ }
+ *flags = TOK_STRING;
+ }
+ else if (ISID1ST (c))
+ {
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *flags = TOK_NAME;
+ }
+ else
+ {
+ while (c != '\n' && c != EOF)
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+ while (c != '\n' && c != EOF)
+ c = getc (in_FILE);
+ new_line = 1;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+ if (strnequ (id_0, "if", 2)
+ || strequ (id_0, "define")
+ || strequ (id_0, "elif") /* ansi C */
+ || strequ (id_0, "undef"))
+ goto next;
+ while ((c != '\n') && (c != EOF))
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (in_FILE);
+
+ switch (c)
+ {
+ case '"':
+ id = id_0;
+ *id++ = c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISQ2BORING (c))
+ *id++ = c = getc (in_FILE);
+ if (c == '\\')
+ {
+ *id++ = c = getc (in_FILE);
+ continue;
+ }
+ else if (c != '"')
+ goto next;
+ break;
+ }
+ *--id = '\0';
+ id = id_0;
+ while (ISSTRKEEP (*id))
+ id++;
+ if (*id || id == id_0)
+ {
+ c = getc (in_FILE);
+ goto next;
+ }
+ *flags = TOK_STRING;
+ if (ARGS->strip_underscore && id_0[0] == '_' && id_0[1])
+ obstack_grow0 (&tokens_obstack, id_0 + 1, id - id_0 - 1);
+ else
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+
+ case '\'':
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISQ1BORING (c))
+ c = getc (in_FILE);
+ if (c == '\\')
+ {
+ c = getc (in_FILE);
+ continue;
+ }
+ else if (c == '\'')
+ c = getc (in_FILE);
+ goto next;
+ }
+
+ case '/':
+ c = getc (in_FILE);
+ if (c == '/')
+ { /* Cope with C++ comment */
+ while (ISCCBORING (c))
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+ else if (c != '*')
+ goto next;
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISCBORING (c))
+ c = getc (in_FILE);
+ c = getc (in_FILE);
+ if (c == '/')
+ {
+ c = getc (in_FILE);
+ goto next;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ }
+
+ case '\n':
+ new_line = 1;
+ goto top;
+
+ default:
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ id = id_0;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else if (ISDIGIT (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ }
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+#undef ARGS
+}
+
+#undef I1
+#undef DG
+#undef NM
+#undef C1
+#undef C2
+#undef Q1
+#undef Q2
+#undef ES
+#undef NL
+#undef EF
+#undef SK
+#undef VH
+#undef WS
+#undef ISDIGIT
+#undef ISNUMBER
+#undef ISEOF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISSTRKEEP
+#undef ISSPACE
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISQ1BORING
+#undef ISQ2BORING
+
+/*************** Assembly ***************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define NL 0x04 /* newline: \n */
+#define CM 0x08 /* assembler comment char: usually # or | */
+#define IG 0x10 /* ignore `identifiers' with these chars in them */
+#define C1 0x20 /* C comment introduction char: / */
+#define C2 0x40 /* C comment termination char: * */
+#define EF 0x80 /* EOF */
+
+/* Assembly Language character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISCOMMENT(c) ((rct)[c] & (CM))
+#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
+#define ISCBORING(c) (!((rct)[c] & (EF|NL)))
+#define ISCCBORING(c) (!((rct)[c] & (EF|C2)))
+#define ISIGNORE(c) ((rct)[c] & (IG))
+
+static unsigned char ctype_asm[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+
+};
+
+struct args_asm
+{
+ int handle_cpp;
+ int strip_underscore;
+ unsigned char *ctype;
+};
+
+static struct args_asm args_asm = { 1, 0, ctype_asm };
+
+static struct option const long_options_asm[] =
+{
+ { "comment", required_argument, 0, 'c' },
+ { "keep", required_argument, 0, 'k' },
+ { "ignore", required_argument, 0, 'i' },
+ { "strip-underscore", no_argument, 0, 'u' },
+ { "no-cpp", no_argument, 0, 'p' },
+ { 0 }
+};
+
+static void
+help_me_asm (void)
+{
+ printf (_("\
+Assembly language:\n\
+ -c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\
+ -k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\
+ -i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\
+ -u,--strip-underscore Strip a leading underscore from tokens\n\
+ -n,--no-cpp Don't handle C pre-processor directives\n\
+"));
+}
+
+static void *
+parse_args_asm (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_asm *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_asm;
+
+ if (argc)
+ args = &args_asm;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_asm, 1);
+ args->strip_underscore = 0;
+ args->ctype = ctype_asm;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "c:k:i:un",
+ long_options_asm, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i' || optc == 'c')
+ && args->ctype == ctype_asm)
+ args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof (ctype_asm));
+ switch (optc)
+ {
+ case 'c':
+ set_uchar_ctype (args->ctype, optarg, CM);
+ break;
+
+ case 'k':
+ set_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ case 'i':
+ set_uchar_ctype (args->ctype, optarg, I1 | IG);
+ break;
+
+ case 'u':
+ args->strip_underscore = 1;
+ break;
+
+ case 'n':
+ args->handle_cpp = 0;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+/* Grab the next identifier the assembly language source file. This
+ state machine is built for speed, not elegance. */
+
+static struct token *
+get_token_asm (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_asm *) args)
+ static int new_line = 1;
+ unsigned char *rct = &ARGS->ctype[1];
+ char id_0[BUFSIZ];
+ char *id = id_0;
+ int c;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ if (ARGS->handle_cpp > 0 && new_line)
+ {
+ new_line = 0;
+ if (c != '#')
+ goto next;
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (!ISID1ST (c))
+ goto next;
+ id = id_0;
+ *id++ = c;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ *id = '\0';
+ if (strequ (id_0, "include"))
+ {
+ while (c != '"' && c != '<')
+ c = getc (in_FILE);
+ id = id_0;
+ *id++ = c = getc (in_FILE);
+ while ((c = getc (in_FILE)) != '"' && c != '>')
+ *id++ = c;
+ *flags = TOK_STRING;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+ }
+ if (strnequ (id_0, "if", 2)
+ || strequ (id_0, "define")
+ || strequ (id_0, "undef"))
+ goto next;
+ while (c != '\n')
+ c = getc (in_FILE);
+ new_line = 1;
+ goto top;
+ }
+
+next:
+ while (ISBORING (c))
+ c = getc (in_FILE);
+
+ if (ISCOMMENT (c))
+ {
+ while (ISCBORING (c))
+ c = getc (in_FILE);
+ new_line = 1;
+ }
+
+ if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+
+ if (c == '\n')
+ {
+ new_line = 1;
+ goto top;
+ }
+
+ if (c == '/')
+ {
+ if ((c = getc (in_FILE)) != '*')
+ goto next;
+ c = getc (in_FILE);
+ for (;;)
+ {
+ while (ISCCBORING (c))
+ c = getc (in_FILE);
+ c = getc (in_FILE);
+ if (c == '/')
+ {
+ c = getc (in_FILE);
+ break;
+ }
+ else if (ISEOF (c))
+ {
+ new_line = 1;
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ }
+ goto next;
+ }
+
+ id = id_0;
+ if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE)))
+ {
+ obstack_grow0 (&tokens_obstack, "_", 1);
+ return obstack_finish (&tokens_obstack);
+ }
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ goto next;
+ }
+
+ *id = '\0';
+ for (id = id_0; *id; id++)
+ if (ISIGNORE (*id))
+ goto next;
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+#undef ARGS
+}
+
+#undef I1
+#undef NM
+#undef NL
+#undef CM
+#undef IG
+#undef C1
+#undef C2
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISCOMMENT
+#undef ISBORING
+#undef ISCBORING
+#undef ISCCBORING
+#undef ISIGNORE
+
+/*************** Text *******************************************************/
+
+#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
+#define NM 0x02 /* digit [0-9a-fA-FxX] */
+#define SQ 0x04 /* squeeze these out (.,',-) */
+#define EF 0x80 /* EOF */
+
+/* Text character classes */
+#define ISID1ST(c) ((rct)[c] & (I1))
+#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ))
+#define ISNUMBER(c) ((rct)[c] & (NM))
+#define ISEOF(c) ((rct)[c] & (EF))
+#define ISBORING(c) (!((rct)[c] & (I1|NM|EF)))
+#define ISIDSQUEEZE(c) ((rct)[c] & (SQ))
+
+static unsigned char ctype_text[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*050*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
+/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
+/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
+/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
+/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
+/*200*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*210*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*220*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*230*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*240*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*250*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*260*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*270*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*300*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*310*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*320*/ I1, I1, I1, I1, I1, I1, I1, 0,
+/*330*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*340*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*350*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*360*/ I1, I1, I1, I1, I1, I1, I1, 0,
+/*370*/ I1, I1, I1, I1, I1, I1, I1, I1,
+};
+
+struct args_text
+{
+ unsigned char *ctype;
+};
+
+static struct args_text args_text = { ctype_text };
+
+static struct option const long_options_text[] =
+{
+ { "include", required_argument, 0, 'i' },
+ { "exclude", required_argument, 0, 'x' },
+ { 0 }
+};
+
+static void
+help_me_text (void)
+{
+ printf (_("\
+Text language:\n\
+ -i,--include=CHAR-CLASS Include characters from CHAR-CLASS in tokens\n\
+ -x,--exclude=CHAR-CLASS Exclude characters from CHAR-CLASS from tokens\n\
+"));
+}
+
+static void *
+parse_args_text (char **argv, int argc)
+{
+ char *tmp_string = 0;
+ struct args_text *args;
+
+ if (argv == 0 || *argv == 0)
+ return &args_text;
+
+ if (argc)
+ args = &args_text;
+ else
+ {
+ tmp_string = strdup (*argv);
+ tokenize_args_string (tmp_string, &argc, &argv);
+ args = MALLOC (struct args_text, 1);
+ args->ctype = ctype_text;
+ }
+
+ optind = 0;
+ for (;;)
+ {
+ int optc = getopt_long (argc, argv, "i:x:",
+ long_options_text, (int *) 0);
+ if (optc < 0)
+ break;
+ if ((optc == 'k' || optc == 'i') && args->ctype == ctype_text)
+ args->ctype = CLONE (ctype_text, unsigned char, cardinalityof (ctype_text));
+ switch (optc)
+ {
+ case 'i':
+ set_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ case 'x':
+ clear_uchar_ctype (args->ctype, optarg, I1);
+ break;
+
+ default:
+ usage ();
+ }
+ }
+ if (tmp_string)
+ {
+ free (argv);
+ free (tmp_string);
+ }
+ return args;
+}
+
+/* Grab the next identifier the text source file. This state machine
+ is built for speed, not elegance. */
+
+static struct token *
+get_token_text (FILE *in_FILE, void const *args, int *flags)
+{
+#define ARGS ((struct args_text *) args)
+ static char id_0[BUFSIZ];
+ unsigned char *rct = &ARGS->ctype[1];
+ int c;
+ char *id = id_0;
+
+ obstack_blank (&tokens_obstack, offsetof (struct token, tok_name));
+
+top:
+ c = getc (in_FILE);
+ while (ISBORING (c))
+ c = getc (in_FILE);
+ if (ISEOF (c))
+ {
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+ }
+ id = id_0;
+ *id++ = c;
+ if (ISID1ST (c))
+ {
+ *flags = TOK_NAME;
+ while (ISIDREST (c = getc (in_FILE)))
+ if (!ISIDSQUEEZE (c))
+ *id++ = c;
+ }
+ else if (ISNUMBER (c))
+ {
+ *flags = TOK_NUMBER;
+ while (ISNUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ }
+ else
+ {
+ if (isprint (c))
+ fprintf (stderr, _("junk: `%c'"), c);
+ else
+ fprintf (stderr, _("junk: `\\%03o'"), c);
+ goto top;
+ }
+
+ ungetc (c, in_FILE);
+ *flags |= TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, id_0, id - id_0);
+ return obstack_finish (&tokens_obstack);
+#undef ARGS
+}
+
+#undef I1
+#undef NM
+#undef SQ
+#undef EF
+#undef ISID1ST
+#undef ISIDREST
+#undef ISNUMBER
+#undef ISEOF
+#undef ISBORING
+#undef ISIDSQUEEZE