/* scanners.c -- file & directory name manipulations Copyright (C) 1986, 1995, 1996 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include #include #include #include "system.h" #include "error.h" #include "strxtra.h" #include "token.h" #include "alloc.h" #include "scanners.h" #define DEBUG(args) /* printf args */ struct obstack lang_args_obstack; struct lang_args *lang_args_default = 0; struct lang_args *lang_args_list = 0; struct obstack tokens_obstack; extern void usage __P((void)); extern char *program_name; /****************************************************************************/ struct lang_args **parse_language_map_file __P((char const *file_name, struct lang_args **next_ptr)); char *read_language_map_file __P((char const *file_name)); static struct token *get_token_c __P((FILE *in_FILE, void const *args, int *flags)); static void *parse_args_c __P((char **argv, int argc)); static void help_me_c __P((void)); static struct token *get_token_asm __P((FILE *in_FILE, void const *args, int *flags)); static void *parse_args_asm __P((char **argv, int argc)); static void help_me_asm __P((void)); static struct token *get_token_text __P((FILE *in_FILE, void const *args, int *flags)); static void *parse_args_text __P((char **argv, int argc)); static void help_me_text __P((void)); struct language languages_0[] = { { "C", parse_args_c, get_token_c, help_me_c }, { "asm", parse_args_asm, get_token_asm, help_me_asm }, { "text", parse_args_text, get_token_text, help_me_text }, }; struct language const *languages_N = &languages_0[cardinalityof (languages_0)]; void language_help_me (void) { struct language *lang; for (lang = languages_0; lang < languages_N; lang++) { putchar ('\n'); (*lang->lg_help_me) (); } } void language_save_arg (char *arg) { static char horizontal_space[] = " \t"; char *lang_name = strtok (arg, ":"); struct language *lang = get_language (lang_name); if (lang == 0) { fprintf (stderr, _("unrecognized language: `%s'\n"), lang_name); usage (); } if (lang->lg_argc == 0) lang->lg_argv[lang->lg_argc++] = program_name; lang->lg_argv[lang->lg_argc++] = strtok (0, horizontal_space); } void language_getopt () { struct language *lang; for (lang = languages_0; lang < languages_N; lang++) if (lang->lg_argc) lang->lg_parse_args (lang->lg_argv, lang->lg_argc); } struct language * get_language (char const *lang_name) { struct language *lang; for (lang = languages_0; lang < languages_N; lang++) if (strequ (lang_name, lang->lg_name)) { DEBUG (("lang=%s", lang_name)); return lang; } DEBUG (("!lang=%s", lang_name)); return 0; } /****************************************************************************/ int lang_args_index = 0; void parse_language_map (char const *file_name) { if (obstack_init (&lang_args_obstack) == 0) error (1, 0, _("can't allocate language args obstack: memory exhausted")); if (file_name == 0) file_name = LANGUAGE_MAP; parse_language_map_file (file_name, &lang_args_list); } struct lang_args ** parse_language_map_file (char const *file_name, struct lang_args **next_ptr) { static char white_space[] = " \t\r\n\v\f"; static char horizontal_space[] = " \t"; static char vertical_space[] = "\r\n\v\f"; char *lang_map_buffer; char *lmp; lmp = lang_map_buffer = read_language_map_file (file_name); for (;;) { struct lang_args *new_args; struct language const *lang; int pattern_size; char *lang_name; int space; /* Skip leading white space and full-line comments */ while (*lmp) { lmp += strspn (lmp, white_space); if (*lmp != '#') break; lmp += strcspn (lmp, vertical_space); } if (*lmp == '\0') break; pattern_size = strcspn (lmp, white_space); if (pattern_size == 3 && strnequ (lmp, "***", 3)) { lmp += pattern_size; lmp += strspn (lmp, horizontal_space); if (isspace (*lmp)) next_ptr = parse_language_map_file (LANGUAGE_MAP, next_ptr); else { char *end = lmp + strcspn (lmp, white_space); *end = '\0'; next_ptr = parse_language_map_file (lmp, next_ptr); lmp = end + 1; } continue; } new_args = OBSTACK_ALLOC (&lang_args_obstack, struct lang_args, 1); if (new_args == 0) error (1, 0, _("can't allocate language args: memory exhausted")); new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp, pattern_size); new_args->la_args_string = 0; lmp += pattern_size; lmp += strspn (lmp, horizontal_space); if (isspace (*lmp)) { error (0, 0, _("language name expected following `%s' in file `%s'"), new_args->la_pattern, file_name); obstack_free (&lang_args_obstack, new_args); continue; } lang_name = lmp; lmp += strcspn (lmp, white_space); space = *lmp; *lmp++ = '\0'; lmp += strspn (lmp, horizontal_space); lang = new_args->la_language = get_language (lang_name); if (*lmp == '#') lmp += strcspn (lmp, vertical_space); else if (!isspace (*lmp) && (space == ' ' || space == '\t')) { int args_size = strcspn (lmp, vertical_space); new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp, args_size); lmp += args_size; } new_args->la_args_digested = (lang ? lang->lg_parse_args (&new_args->la_args_string, 0) : 0); if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2)) { if (lang_args_default) { obstack_free (&lang_args_obstack, new_args); continue; } lang_args_default = new_args; DEBUG ((", ")); } else { new_args->la_index = lang_args_index++; *next_ptr = new_args; next_ptr = &new_args->la_next; } DEBUG ((", pat=%s\n", new_args->la_pattern)); } free (lang_map_buffer); return next_ptr; } char * read_language_map_file (char const *file_name) { int map_fd; char *lang_map_buffer; struct stat st; int bytes; map_fd = open (file_name, O_RDONLY); if (map_fd < 0) error (1, errno, _("can't open language map file `%s'"), file_name); if (fstat (map_fd, &st) < 0) error (1, errno, _("can't get size of map file `%s'"), file_name); lang_map_buffer = MALLOC (char, st.st_size + 2); if (lang_map_buffer == 0) error (1, 0, _("can't allocate language args: memory exhausted")); lang_map_buffer[st.st_size] = '\n'; lang_map_buffer[st.st_size+1] = '\0'; bytes = read (map_fd, lang_map_buffer, st.st_size); if (bytes < 0) error (1, errno, _("can't read language map file `%s'"), file_name); /* FIXME: handle interrupted & partial reads */ if (bytes != st.st_size) error (1, errno, _("can't read entire language map file `%s'"), file_name); close (map_fd); return lang_map_buffer; } /****************************************************************************/ void tokenize_args_string (char *args_string, int *argcp, char ***argvp) { static char horizontal_space[] = " \t"; char **argv_0 = MALLOC (char *, strlen (args_string) / 2); char **argv = argv_0; char *arg; *argv++ = program_name; arg = strtok (args_string, horizontal_space); while (arg) { *argv++ = arg; arg = strtok (0, horizontal_space); } *argcp = argv - argv_0; *argvp = REALLOC (argv_0, char *, *argcp); } static void set_ushort_ctype (unsigned short *ctype, char const *chars, int type) { unsigned short *rct = &ctype[1]; while (*chars) rct[*chars++] |= type; } static void clear_ushort_ctype (unsigned short *ctype, char const *chars, int type) { unsigned short *rct = &ctype[1]; while (*chars) rct[*chars++] &= ~type; } static void set_uchar_ctype (unsigned char *ctype, char const *chars, int type) { unsigned char *rct = &ctype[1]; while (*chars) rct[*chars++] |= type; } static void clear_uchar_ctype (unsigned char *ctype, char const *chars, int type) { unsigned char *rct = &ctype[1]; while (*chars) rct[*chars++] &= ~type; } /*************** C & C++ ****************************************************/ #define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ #define DG 0x0002 /* decimal digit [0-9] */ #define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */ #define C1 0x0008 /* C comment introduction char: / */ #define C2 0x0010 /* C comment termination char: * */ #define Q1 0x0020 /* single quote: ' */ #define Q2 0x0040 /* double quote: " */ #define ES 0x0080 /* escape char: \ */ #define NL 0x0100 /* newline: \n */ #define EF 0x0200 /* EOF */ #define SK 0x0400 /* Make these chars valid for names within strings */ #define VH 0x0800 /* VHIL comment introduction char: # */ #define WS 0x1000 /* White space characters */ /* character class membership macros: */ #define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ #define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ #define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ #define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ #define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ #define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ #define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */ /* The `BORING' classes should be skipped over until something interesting comes along... */ #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ #define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ #define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */ #define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ #define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ static unsigned short ctype_c[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, /*050*/ 0, 0, C2, 0, 0, 0, 0, C1, /*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /*070*/ DG, DG, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, /* FIXME: latin-1 */ }; struct args_c { int strip_underscore; unsigned short *ctype; }; static struct args_c args_c = { 0, ctype_c }; static struct option const long_options_c[] = { { "keep", required_argument, 0, 'k' }, { "ignore", required_argument, 0, 'i' }, { "strip-underscore", no_argument, 0, 'u' }, { 0 } }; static void help_me_c (void) { printf (_("\ C language:\n\ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\ -u,--strip-underscore Strip a leading underscore from single-token strings\n\ ")); } static void * parse_args_c (char **argv, int argc) { char *tmp_string = 0; struct args_c *args; if (argv == 0 || *argv == 0) return &args_c; if (argc) args = &args_c; else { tmp_string = strdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = MALLOC (struct args_c, 1); args->strip_underscore = 0; args->ctype = ctype_c; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "k:i:u", long_options_c, (int *) 0); if (optc < 0) break; if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c) args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c)); switch (optc) { case 'k': set_ushort_ctype (args->ctype, optarg, SK); break; case 'i': clear_ushort_ctype (args->ctype, optarg, SK); break; case 'u': args->strip_underscore = 1; break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier from the C source file. This state machine is built for speed, not elegance. */ static struct token * get_token_c (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_c *) args) static int new_line = 1; unsigned short *rct = &ARGS->ctype[1]; char id_0[BUFSIZ]; char *id = id_0; int c; obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); top: c = getc (in_FILE); if (new_line) { new_line = 0; if (c != '#') goto next; c = getc (in_FILE); while (ISBORING (c)) c = getc (in_FILE); if (!ISID1ST (c)) goto next; id = id_0; *id++ = c; while (ISIDREST (c = getc (in_FILE))) *id++ = c; *id = '\0'; if (strequ (id_0, "include")) { while (c == ' ' || c == '\t') c = getc (in_FILE); if (c == '\n') { new_line = 1; goto top; } id = id_0; if (c == '"') { c = getc (in_FILE); while (c != '\n' && c != EOF && c != '"') { *id++ = c; c = getc (in_FILE); } *flags = TOK_STRING; } else if (c == '<') { c = getc (in_FILE); while (c != '\n' && c != EOF && c != '>') { *id++ = c; c = getc (in_FILE); } *flags = TOK_STRING; } else if (ISID1ST (c)) { *id++ = c; while (ISIDREST (c = getc (in_FILE))) *id++ = c; *flags = TOK_NAME; } else { while (c != '\n' && c != EOF) c = getc (in_FILE); new_line = 1; goto top; } while (c != '\n' && c != EOF) c = getc (in_FILE); new_line = 1; obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); } if (strnequ (id_0, "if", 2) || strequ (id_0, "define") || strequ (id_0, "elif") /* ansi C */ || strequ (id_0, "undef")) goto next; while ((c != '\n') && (c != EOF)) c = getc (in_FILE); new_line = 1; goto top; } next: while (ISBORING (c)) c = getc (in_FILE); switch (c) { case '"': id = id_0; *id++ = c = getc (in_FILE); for (;;) { while (ISQ2BORING (c)) *id++ = c = getc (in_FILE); if (c == '\\') { *id++ = c = getc (in_FILE); continue; } else if (c != '"') goto next; break; } *--id = '\0'; id = id_0; while (ISSTRKEEP (*id)) id++; if (*id || id == id_0) { c = getc (in_FILE); goto next; } *flags = TOK_STRING; if (ARGS->strip_underscore && id_0[0] == '_' && id_0[1]) obstack_grow0 (&tokens_obstack, id_0 + 1, id - id_0 - 1); else obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); case '\'': c = getc (in_FILE); for (;;) { while (ISQ1BORING (c)) c = getc (in_FILE); if (c == '\\') { c = getc (in_FILE); continue; } else if (c == '\'') c = getc (in_FILE); goto next; } case '/': c = getc (in_FILE); if (c == '/') { /* Cope with C++ comment */ while (ISCCBORING (c)) c = getc (in_FILE); new_line = 1; goto top; } else if (c != '*') goto next; c = getc (in_FILE); for (;;) { while (ISCBORING (c)) c = getc (in_FILE); c = getc (in_FILE); if (c == '/') { c = getc (in_FILE); goto next; } else if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } } case '\n': new_line = 1; goto top; default: if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } id = id_0; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) *id++ = c; } else if (ISDIGIT (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); } ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); } #undef ARGS } #undef I1 #undef DG #undef NM #undef C1 #undef C2 #undef Q1 #undef Q2 #undef ES #undef NL #undef EF #undef SK #undef VH #undef WS #undef ISDIGIT #undef ISNUMBER #undef ISEOF #undef ISID1ST #undef ISIDREST #undef ISSTRKEEP #undef ISSPACE #undef ISBORING #undef ISCBORING #undef ISCCBORING #undef ISQ1BORING #undef ISQ2BORING /*************** Assembly ***************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define NL 0x04 /* newline: \n */ #define CM 0x08 /* assembler comment char: usually # or | */ #define IG 0x10 /* ignore `identifiers' with these chars in them */ #define C1 0x20 /* C comment introduction char: / */ #define C2 0x40 /* C comment termination char: * */ #define EF 0x80 /* EOF */ /* Assembly Language character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISCOMMENT(c) ((rct)[c] & (CM)) #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) #define ISCBORING(c) (!((rct)[c] & (EF|NL))) #define ISCCBORING(c) (!((rct)[c] & (EF|C2))) #define ISIGNORE(c) ((rct)[c] & (IG)) static unsigned char ctype_asm[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, 0, /*050*/ 0, 0, C2, 0, 0, 0, 0, C1, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, }; struct args_asm { int handle_cpp; int strip_underscore; unsigned char *ctype; }; static struct args_asm args_asm = { 1, 0, ctype_asm }; static struct option const long_options_asm[] = { { "comment", required_argument, 0, 'c' }, { "keep", required_argument, 0, 'k' }, { "ignore", required_argument, 0, 'i' }, { "strip-underscore", no_argument, 0, 'u' }, { "no-cpp", no_argument, 0, 'p' }, { 0 } }; static void help_me_asm (void) { printf (_("\ Assembly language:\n\ -c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\ -k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\ -i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\ -u,--strip-underscore Strip a leading underscore from tokens\n\ -n,--no-cpp Don't handle C pre-processor directives\n\ ")); } static void * parse_args_asm (char **argv, int argc) { char *tmp_string = 0; struct args_asm *args; if (argv == 0 || *argv == 0) return &args_asm; if (argc) args = &args_asm; else { tmp_string = strdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = MALLOC (struct args_asm, 1); args->strip_underscore = 0; args->ctype = ctype_asm; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "c:k:i:un", long_options_asm, (int *) 0); if (optc < 0) break; if ((optc == 'k' || optc == 'i' || optc == 'c') && args->ctype == ctype_asm) args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof (ctype_asm)); switch (optc) { case 'c': set_uchar_ctype (args->ctype, optarg, CM); break; case 'k': set_uchar_ctype (args->ctype, optarg, I1); break; case 'i': set_uchar_ctype (args->ctype, optarg, I1 | IG); break; case 'u': args->strip_underscore = 1; break; case 'n': args->handle_cpp = 0; break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier the assembly language source file. This state machine is built for speed, not elegance. */ static struct token * get_token_asm (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_asm *) args) static int new_line = 1; unsigned char *rct = &ARGS->ctype[1]; char id_0[BUFSIZ]; char *id = id_0; int c; obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); top: c = getc (in_FILE); if (ARGS->handle_cpp > 0 && new_line) { new_line = 0; if (c != '#') goto next; while (ISBORING (c)) c = getc (in_FILE); if (!ISID1ST (c)) goto next; id = id_0; *id++ = c; while (ISIDREST (c = getc (in_FILE))) *id++ = c; *id = '\0'; if (strequ (id_0, "include")) { while (c != '"' && c != '<') c = getc (in_FILE); id = id_0; *id++ = c = getc (in_FILE); while ((c = getc (in_FILE)) != '"' && c != '>') *id++ = c; *flags = TOK_STRING; obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); } if (strnequ (id_0, "if", 2) || strequ (id_0, "define") || strequ (id_0, "undef")) goto next; while (c != '\n') c = getc (in_FILE); new_line = 1; goto top; } next: while (ISBORING (c)) c = getc (in_FILE); if (ISCOMMENT (c)) { while (ISCBORING (c)) c = getc (in_FILE); new_line = 1; } if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } if (c == '\n') { new_line = 1; goto top; } if (c == '/') { if ((c = getc (in_FILE)) != '*') goto next; c = getc (in_FILE); for (;;) { while (ISCCBORING (c)) c = getc (in_FILE); c = getc (in_FILE); if (c == '/') { c = getc (in_FILE); break; } else if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } } goto next; } id = id_0; if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE))) { obstack_grow0 (&tokens_obstack, "_", 1); return obstack_finish (&tokens_obstack); } *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); goto next; } *id = '\0'; for (id = id_0; *id; id++) if (ISIGNORE (*id)) goto next; ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); #undef ARGS } #undef I1 #undef NM #undef NL #undef CM #undef IG #undef C1 #undef C2 #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISCOMMENT #undef ISBORING #undef ISCBORING #undef ISCCBORING #undef ISIGNORE /*************** Text *******************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define SQ 0x04 /* squeeze these out (.,',-) */ #define EF 0x80 /* EOF */ /* Text character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) #define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) static unsigned char ctype_text[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, 0, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, 0, /*050*/ 0, 0, 0, 0, 0, 0, 0, 0, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, /*200*/ 0, 0, 0, 0, 0, 0, 0, 0, /*210*/ 0, 0, 0, 0, 0, 0, 0, 0, /*220*/ 0, 0, 0, 0, 0, 0, 0, 0, /*230*/ 0, 0, 0, 0, 0, 0, 0, 0, /*240*/ 0, 0, 0, 0, 0, 0, 0, 0, /*250*/ 0, 0, 0, 0, 0, 0, 0, 0, /*260*/ 0, 0, 0, 0, 0, 0, 0, 0, /*270*/ 0, 0, 0, 0, 0, 0, 0, 0, /*300*/ I1, I1, I1, I1, I1, I1, I1, I1, /*310*/ I1, I1, I1, I1, I1, I1, I1, I1, /*320*/ I1, I1, I1, I1, I1, I1, I1, 0, /*330*/ I1, I1, I1, I1, I1, I1, I1, I1, /*340*/ I1, I1, I1, I1, I1, I1, I1, I1, /*350*/ I1, I1, I1, I1, I1, I1, I1, I1, /*360*/ I1, I1, I1, I1, I1, I1, I1, 0, /*370*/ I1, I1, I1, I1, I1, I1, I1, I1, }; struct args_text { unsigned char *ctype; }; static struct args_text args_text = { ctype_text }; static struct option const long_options_text[] = { { "include", required_argument, 0, 'i' }, { "exclude", required_argument, 0, 'x' }, { 0 } }; static void help_me_text (void) { printf (_("\ Text language:\n\ -i,--include=CHAR-CLASS Include characters from CHAR-CLASS in tokens\n\ -x,--exclude=CHAR-CLASS Exclude characters from CHAR-CLASS from tokens\n\ ")); } static void * parse_args_text (char **argv, int argc) { char *tmp_string = 0; struct args_text *args; if (argv == 0 || *argv == 0) return &args_text; if (argc) args = &args_text; else { tmp_string = strdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = MALLOC (struct args_text, 1); args->ctype = ctype_text; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "i:x:", long_options_text, (int *) 0); if (optc < 0) break; if ((optc == 'k' || optc == 'i') && args->ctype == ctype_text) args->ctype = CLONE (ctype_text, unsigned char, cardinalityof (ctype_text)); switch (optc) { case 'i': set_uchar_ctype (args->ctype, optarg, I1); break; case 'x': clear_uchar_ctype (args->ctype, optarg, I1); break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier the text source file. This state machine is built for speed, not elegance. */ static struct token * get_token_text (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_text *) args) static char id_0[BUFSIZ]; unsigned char *rct = &ARGS->ctype[1]; int c; char *id = id_0; obstack_blank (&tokens_obstack, offsetof (struct token, tok_name)); top: c = getc (in_FILE); while (ISBORING (c)) c = getc (in_FILE); if (ISEOF (c)) { obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } id = id_0; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) if (!ISIDSQUEEZE (c)) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); goto top; } ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, id_0, id - id_0); return obstack_finish (&tokens_obstack); #undef ARGS } #undef I1 #undef NM #undef SQ #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISBORING #undef ISIDSQUEEZE