/* scanners.c -- file & directory name manipulations Copyright (C) 1986-2014 Free Software Foundation, Inc. Written by Greg McGary This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include "xnls.h" #include "scanners.h" #include "tokflags.h" #include "iduglobal.h" static struct obstack lang_args_obstack; struct lang_args *lang_args_default = 0; struct lang_args *lang_args_list = 0; struct obstack tokens_obstack; size_t log_8_member_files = 0; extern void usage (void) __attribute__((__noreturn__)); extern char *program_name; /****************************************************************************/ static struct lang_args **parse_language_map_file (char const *file_name, struct lang_args **next_ptr); static char *read_language_map_file (char const *file_name); static void tokenize_args_string (char *args_string, int *argcp, char ***argvp); static struct token *get_token_c (FILE *in_FILE, void const *args, int *flags); static void *parse_args_c (char **argv, int argc); static void help_me_c (void); static void help_me_cpp (void); static void help_me_java (void); static struct token *get_token_asm (FILE *in_FILE, void const *args, int *flags); static void *parse_args_asm (char **argv, int argc); static void help_me_asm (void); static struct token *get_token_text (FILE *in_FILE, void const *args, int *flags); static void *parse_args_text (char **argv, int argc); static void help_me_text (void); static struct token *get_token_perl (FILE *in_FILE, void const *args, int *flags); static void *parse_args_perl (char **argv, int argc); static void help_me_perl (void); static struct token *get_token_lisp (FILE *in_FILE, void const *args, int *flags); static void *parse_args_lisp (char **argv, int argc); static void help_me_lisp (void); static struct language languages_0[] = { { "C", parse_args_c, get_token_c, help_me_c }, { "C++", parse_args_c, get_token_c, help_me_cpp }, { "Java", parse_args_c, get_token_c, help_me_java }, { "asm", parse_args_asm, get_token_asm, help_me_asm }, { "text", parse_args_text, get_token_text, help_me_text }, { "perl", parse_args_perl, get_token_perl, help_me_perl }, { "lisp", parse_args_lisp, get_token_lisp, help_me_lisp } }; static struct language const *languages_N = &languages_0[cardinalityof (languages_0)]; void language_help_me (void) { const struct language *lang; for (lang = languages_0; lang < languages_N; lang++) { putchar ('\n'); (*lang->lg_help_me) (); } } void language_save_arg (char *arg) { char *lang_name = strsep (&arg, ":"); struct language *lang = get_language (lang_name); if (lang == NULL) { error (0, 0, _("unrecognized language: `%s'"), lang_name); usage (); } if (lang->lg_argc == 0) lang->lg_argv[lang->lg_argc++] = program_name; lang->lg_argv[lang->lg_argc++] = arg; } void language_getopt (void) { const struct language *lang; for (lang = languages_0; lang < languages_N; lang++) if (lang->lg_argc) lang->lg_parse_args ((char**)(lang->lg_argv), lang->lg_argc); } struct language * get_language (char const *lang_name) { const struct language *lang; for (lang = languages_0; lang < languages_N; lang++) if (strequ (lang_name, lang->lg_name)) { DEBUG (("lang=%s", lang_name)); return (struct language *) lang; } DEBUG (("!lang=%s", lang_name)); return 0; } /****************************************************************************/ static int lang_args_index = 0; void set_default_language (char const *lang_name) { struct lang_args *new_args; const struct language *lang; obstack_init (&lang_args_obstack); new_args = obstack_alloc (&lang_args_obstack, sizeof *new_args); new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lang_name, strlen(lang_name)+1); new_args->la_args_string = 0; new_args->la_next = 0; lang = new_args->la_language = get_language (lang_name); new_args->la_args_digested = (lang ? lang->lg_parse_args (&new_args->la_args_string, 0) : 0); lang_args_default = new_args; DEBUG ((", \n")); } void parse_language_map (char const *file_name) { if (obstack_init (&lang_args_obstack) == 0) error (EXIT_FAILURE, 0, _("can't allocate language args obstack: memory exhausted")); if (file_name == 0) file_name = LANGUAGE_MAP_FILE; parse_language_map_file (file_name, &lang_args_list); } static struct lang_args ** parse_language_map_file (char const *file_name, struct lang_args **next_ptr) { static char white_space[] = " \t\r\n\v\f"; static char horizontal_space[] = " \t"; static char vertical_space[] = "\r\n\v\f"; char *lang_map_buffer; char *lmp; lmp = lang_map_buffer = read_language_map_file (file_name); for (;;) { struct lang_args *new_args; struct language const *lang; int pattern_size; char *lang_name; int space; /* Skip leading white space and full-line comments */ while (*lmp) { lmp += strspn (lmp, white_space); if (*lmp != '#') break; lmp += strcspn (lmp, vertical_space); } if (*lmp == '\0') break; pattern_size = strcspn (lmp, white_space); if (pattern_size == 3 && strnequ (lmp, "***", 3)) { lmp += pattern_size; lmp += strspn (lmp, horizontal_space); if (isspace (*lmp)) next_ptr = parse_language_map_file (LANGUAGE_MAP_FILE, next_ptr); else { char *end = lmp + strcspn (lmp, white_space); *end = '\0'; next_ptr = parse_language_map_file (lmp, next_ptr); lmp = end + 1; } continue; } new_args = obstack_alloc (&lang_args_obstack, sizeof *new_args); if (new_args == 0) error (EXIT_FAILURE, 0, _("can't allocate language args: memory exhausted")); new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp, pattern_size); new_args->la_args_string = 0; new_args->la_next = 0; lmp += pattern_size; lmp += strspn (lmp, horizontal_space); if (isspace (*lmp)) { error (0, 0, _("language name expected following `%s' in file `%s'"), new_args->la_pattern, file_name); obstack_free (&lang_args_obstack, new_args); continue; } lang_name = lmp; lmp += strcspn (lmp, white_space); space = *lmp; *lmp++ = '\0'; lmp += strspn (lmp, horizontal_space); lang = new_args->la_language = get_language (lang_name); if (*lmp == '#') lmp += strcspn (lmp, vertical_space); else if (!isspace (*lmp) && (space == ' ' || space == '\t')) { int args_size = strcspn (lmp, vertical_space); new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp, args_size); lmp += args_size; } new_args->la_args_digested = (lang ? lang->lg_parse_args (&new_args->la_args_string, 0) : 0); if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2)) { if (lang_args_default) { obstack_free (&lang_args_obstack, new_args); continue; } lang_args_default = new_args; DEBUG ((", ")); } else { new_args->la_index = lang_args_index++; *next_ptr = new_args; next_ptr = &new_args->la_next; } DEBUG ((", pat=%s\n", new_args->la_pattern)); } free (lang_map_buffer); return next_ptr; } static char * read_language_map_file (char const *file_name) { int map_fd; char *lang_map_buffer; struct stat st; int bytes; map_fd = open (file_name, O_RDONLY); if (map_fd < 0) error (EXIT_FAILURE, errno, _("can't open language map file `%s'"), file_name); if (fstat (map_fd, &st) < 0) error (EXIT_FAILURE, errno, _("can't get size of map file `%s'"), file_name); lang_map_buffer = xmalloc (sizeof(char) * (st.st_size + 2)); if (lang_map_buffer == 0) error (EXIT_FAILURE, 0, _("can't allocate language args: memory exhausted")); lang_map_buffer[st.st_size] = '\n'; lang_map_buffer[st.st_size+1] = '\0'; bytes = read (map_fd, lang_map_buffer, st.st_size); if (bytes < 0) error (EXIT_FAILURE, errno, _("can't read language map file `%s'"), file_name); /* FIXME: handle interrupted & partial reads */ if (bytes != st.st_size) error (EXIT_FAILURE, errno, _("can't read entire language map file `%s'"), file_name); close (map_fd); return lang_map_buffer; } /****************************************************************************/ static void tokenize_args_string (char *args_string, int *argcp, char ***argvp) { static char horizontal_space[] = " \t"; char **argv_0 = xmalloc (sizeof(char *) * strlen (args_string) / 2); char **argv = argv_0; char *arg; *argv++ = program_name; arg = strsep (&args_string, horizontal_space); while (arg) { *argv++ = arg; arg = strsep (&args_string, horizontal_space); } *argcp = argv - argv_0; *argvp = xnrealloc (argv_0, *argcp, sizeof **argvp); } static void set_ushort_ctype (unsigned short *ctype, char const *chars, int type) { unsigned short *rct = &ctype[1]; unsigned char const *uc = (unsigned char const *) chars; while (*uc) rct[*uc++] |= type; } static void clear_ushort_ctype (unsigned short *ctype, char const *chars, int type) { unsigned short *rct = &ctype[1]; unsigned char const *uc = (unsigned char const *) chars; while (*uc) rct[*uc++] &= ~type; } static void set_uchar_ctype (unsigned char *ctype, char const *chars, int type) { unsigned char *rct; unsigned char const *uc; rct = &ctype[1]; uc = (unsigned char const *) chars; while (*uc) rct[*uc++] |= type; } static void clear_uchar_ctype (unsigned char *ctype, char const *chars, int type) { unsigned char *rct; unsigned char const *uc; rct = &ctype[1]; uc = (unsigned char const *) chars; while (*uc) rct[*uc++] &= ~type; } /*************** C, C++ & Java ***********************************************/ #define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */ #define DG 0x0002 /* decimal digit [0-9] */ #define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlLuU.] */ #define C1 0x0008 /* C comment introduction char: / */ #define C2 0x0010 /* C comment termination char: * */ #define Q1 0x0020 /* single quote: ' */ #define Q2 0x0040 /* double quote: " */ #define ES 0x0080 /* escape char: \ */ #define NL 0x0100 /* newline: \n */ #define EF 0x0200 /* EOF */ #define SK 0x0400 /* Make these chars valid for names within strings */ #define VH 0x0800 /* VHIL comment introduction char: # */ /* character class membership macros: */ #define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */ #define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */ #define ISEOF(c) ((rct)[c] & (EF)) /* EOF */ #define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */ #define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */ #define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */ /* The `BORING' classes should be skipped over until something interesting comes along... */ #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /* fluff */ #define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */ #define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */ #define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */ #define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */ static unsigned short ctype_c[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1, /*050*/ 0, 0, C2, 0, 0, 0, NM, C1, /*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /*070*/ DG, DG, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1|NM, I1, I1, /*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1|NM, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, /* FIXME: latin-1 */ }; struct args_c { int strip_underscore; unsigned short *ctype; }; static struct args_c args_c = { 0, ctype_c }; static struct option const long_options_c[] = { { "keep", required_argument, 0, 'k' }, { "ignore", required_argument, 0, 'i' }, { "strip-underscore", no_argument, 0, 'u' }, {NULL, 0, NULL, 0} }; static void help_me_c (void) { printf (_("\ C language:\n\ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\ -u,--strip-underscore Strip a leading underscore from single-token strings\n\ ")); } static void help_me_cpp (void) { printf (_("\ C++ language:\n\ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\ -u,--strip-underscore Strip a leading underscore from single-token strings\n\ ")); } static void help_me_java (void) { printf (_("\ Java language:\n\ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\ -u,--strip-underscore Strip a leading underscore from single-token strings\n\ ")); } static void * parse_args_c (char **argv, int argc) { char *tmp_string = 0; struct args_c *args; if (argv == 0 || *argv == 0) return &args_c; if (argc) args = &args_c; else { tmp_string = xstrdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = xmalloc (sizeof *args); args->strip_underscore = 0; args->ctype = ctype_c; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "k:i:u", long_options_c, (int *) 0); if (optc < 0) break; if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c) args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c)); switch (optc) { case 'k': set_ushort_ctype (args->ctype, optarg, SK); break; case 'i': clear_ushort_ctype (args->ctype, optarg, SK); break; case 'u': args->strip_underscore = 1; break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } unsigned char *scanner_buffer; #define SCAN_CPP_DIRECTIVE \ do \ { \ new_line = 0; \ /* Cope with leading whitespace before CPP lines */ \ while (c == ' ' || c == '\t') \ c = getc (in_FILE); \ if (c == '\n') \ { \ new_line = 1; \ goto top; \ } \ else if (c != '#') \ goto next; \ c = getc (in_FILE); \ while (ISBORING (c)) \ c = getc (in_FILE); \ if (!ISID1ST (c)) \ goto next; \ id = scanner_buffer; \ *id++ = c; \ while (ISIDREST (c = getc (in_FILE))) \ *id++ = c; \ *id = '\0'; \ if (strequ (scanner_buffer, "include")) \ { \ while (c == ' ' || c == '\t') \ c = getc (in_FILE); \ if (c == '\n') \ { \ new_line = 1; \ goto top; \ } \ id = scanner_buffer; \ if (c == '"') \ { \ c = getc (in_FILE); \ while (c != '\n' && c != '"' && c != EOF) \ { \ *id++ = c; \ c = getc (in_FILE); \ } \ *flags = TOK_STRING; \ } \ else if (c == '<') \ { \ c = getc (in_FILE); \ while (c != '\n' && c != '>' && c != EOF) \ { \ *id++ = c; \ c = getc (in_FILE); \ } \ *flags = TOK_STRING; \ } \ else if (ISID1ST (c)) \ { \ *id++ = c; \ while (ISIDREST (c = getc (in_FILE))) \ *id++ = c; \ *flags = TOK_NAME; \ } \ else \ { \ while (c != '\n' && c != EOF) \ c = getc (in_FILE); \ new_line = 1; \ goto top; \ } \ while (c != '\n' && c != EOF) \ c = getc (in_FILE); \ new_line = 1; \ obstack_grow0 (&tokens_obstack, scanner_buffer, \ id - scanner_buffer); \ return (struct token *) obstack_finish (&tokens_obstack); \ } \ if (strnequ (scanner_buffer, "if", 2) \ || strequ (scanner_buffer, "define") \ || strequ (scanner_buffer, "elif") /* ansi C */ \ || strequ (scanner_buffer, "undef")) \ goto next; \ while ((c != '\n') && (c != EOF)) \ c = getc (in_FILE); \ new_line = 1; \ goto top; \ } while (0) /* Grab the next identifier from the C source file. This state machine is built for speed, not elegance. */ static struct token * get_token_c (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_c const *) args) static int new_line = 1; unsigned short const *rct = &ARGS->ctype[1]; unsigned char *id = scanner_buffer; int c; int d; obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME); top: c = getc (in_FILE); if (new_line) SCAN_CPP_DIRECTIVE; next: while (ISBORING (c)) c = getc (in_FILE); switch (c) { case '"': id = scanner_buffer; *id++ = c = getc (in_FILE); for (;;) { while (ISQ2BORING (c)) *id++ = c = getc (in_FILE); if (c == '\\') { *id++ = c = getc (in_FILE); continue; } else if (c != '"') goto next; break; } *--id = '\0'; id = scanner_buffer; while (ISSTRKEEP (d = *id)) id++; if (*id || id == scanner_buffer) { c = getc (in_FILE); goto next; } *flags = TOK_STRING; if (ARGS->strip_underscore && scanner_buffer[0] == '_' && scanner_buffer[1]) obstack_grow0 (&tokens_obstack, scanner_buffer + 1, id - scanner_buffer - 1); else obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); case '\'': c = getc (in_FILE); for (;;) { while (ISQ1BORING (c)) c = getc (in_FILE); if (c == '\\') { c = getc (in_FILE); continue; } else if (c == '\'') c = getc (in_FILE); goto next; } case '/': c = getc (in_FILE); if (c == '/') { /* Cope with C++ comment */ while (ISCCBORING (c)) c = getc (in_FILE); new_line = 1; goto top; } else if (c != '*') goto next; c = getc (in_FILE); for (;;) { while (ISCBORING (c)) c = getc (in_FILE); c = getc (in_FILE); if (c == '/') { c = getc (in_FILE); goto next; } else if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } } case '\n': new_line = 1; goto top; default: if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } id = scanner_buffer; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) *id++ = c; } else if (ISDIGIT (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); } ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); } #undef ARGS } #undef I1 #undef DG #undef NM #undef C1 #undef C2 #undef Q1 #undef Q2 #undef ES #undef NL #undef EF #undef SK #undef VH #undef ISDIGIT #undef ISNUMBER #undef ISEOF #undef ISID1ST #undef ISIDREST #undef ISSTRKEEP #undef ISBORING #undef ISCBORING #undef ISCCBORING #undef ISQ1BORING #undef ISQ2BORING /*************** Assembly ***************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define NL 0x04 /* newline: \n */ #define CM 0x08 /* assembler comment char: usually # or | */ #define IG 0x10 /* ignore `identifiers' with these chars in them */ #define C1 0x20 /* C comment introduction char: / */ #define C2 0x40 /* C comment termination char: * */ #define EF 0x80 /* EOF */ /* Assembly Language character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISCOMMENT(c) ((rct)[c] & (CM)) #define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1))) #define ISCBORING(c) (!((rct)[c] & (EF|NL))) #define ISCCBORING(c) (!((rct)[c] & (EF|C2))) #define ISIGNORE(c) ((rct)[c] & (IG)) static unsigned char ctype_asm[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, 0, /*050*/ 0, 0, C2, 0, 0, 0, 0, C1, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, }; struct args_asm { int handle_cpp; int strip_underscore; unsigned char *ctype; }; static struct args_asm args_asm = { 1, 0, ctype_asm }; static struct option const long_options_asm[] = { { "comment", required_argument, 0, 'c' }, { "keep", required_argument, 0, 'k' }, { "ignore", required_argument, 0, 'i' }, { "strip-underscore", no_argument, 0, 'u' }, { "no-cpp", no_argument, 0, 'n' }, {NULL, 0, NULL, 0} }; static void help_me_asm (void) { printf (_("\ Assembly language:\n\ -c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\ -k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\ -i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\ -u,--strip-underscore Strip a leading underscore from tokens\n\ -n,--no-cpp Don't handle C pre-processor directives\n\ ")); } static void * parse_args_asm (char **argv, int argc) { char *tmp_string = 0; struct args_asm *args; if (argv == 0 || *argv == 0) return &args_asm; if (argc) args = &args_asm; else { tmp_string = xstrdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = xmalloc (sizeof *args); args->strip_underscore = 0; args->ctype = ctype_asm; args->handle_cpp = 1; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "c:k:i:un", long_options_asm, (int *) 0); if (optc < 0) break; if ((optc == 'k' || optc == 'i' || optc == 'c') && args->ctype == ctype_asm) args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof (ctype_asm)); switch (optc) { case 'c': set_uchar_ctype (args->ctype, optarg, CM); break; case 'k': set_uchar_ctype (args->ctype, optarg, I1); break; case 'i': set_uchar_ctype (args->ctype, optarg, I1 | IG); break; case 'u': args->strip_underscore = 1; break; case 'n': args->handle_cpp = 0; break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier the assembly language source file. This state machine is built for speed, not elegance. */ static struct token * get_token_asm (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_asm const *) args) static int new_line = 1; unsigned char const *rct = &ARGS->ctype[1]; unsigned char *id = scanner_buffer; int c, d; obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME); top: c = getc (in_FILE); if (ARGS->handle_cpp > 0 && new_line) SCAN_CPP_DIRECTIVE; next: while (ISBORING (c)) c = getc (in_FILE); if (ISCOMMENT (c)) { while (ISCBORING (c)) c = getc (in_FILE); new_line = 1; } if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } if (c == '\n') { new_line = 1; goto top; } if (c == '/') { if ((c = getc (in_FILE)) != '*') goto next; c = getc (in_FILE); for (;;) { while (ISCCBORING (c)) c = getc (in_FILE); c = getc (in_FILE); if (c == '/') { c = getc (in_FILE); break; } else if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } } goto next; } id = scanner_buffer; if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE))) { obstack_grow0 (&tokens_obstack, "_", 1); return (struct token *) obstack_finish (&tokens_obstack); } *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); goto next; } *id = '\0'; for (id = scanner_buffer; *id; id++) if (ISIGNORE (d = *id)) goto next; ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); #undef ARGS } #undef I1 #undef NM #undef NL #undef CM #undef IG #undef C1 #undef C2 #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISCOMMENT #undef ISBORING #undef ISCBORING #undef ISCCBORING #undef ISIGNORE /*************** Text *******************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define SQ 0x04 /* squeeze these out (.,',-) */ #define EF 0x80 /* EOF */ /* Text character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISBORING(c) (!((rct)[c] & (I1|NM|EF))) #define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) static unsigned char ctype_text[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, 0, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, 0, 0, 0, 0, 0, 0, /*050*/ 0, 0, 0, 0, 0, 0, 0, 0, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, 0, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, /*200*/ 0, 0, 0, 0, 0, 0, 0, 0, /*210*/ 0, 0, 0, 0, 0, 0, 0, 0, /*220*/ 0, 0, 0, 0, 0, 0, 0, 0, /*230*/ 0, 0, 0, 0, 0, 0, 0, 0, /*240*/ 0, 0, 0, 0, 0, 0, 0, 0, /*250*/ 0, 0, 0, 0, 0, 0, 0, 0, /*260*/ 0, 0, 0, 0, 0, 0, 0, 0, /*270*/ 0, 0, 0, 0, 0, 0, 0, 0, /*300*/ I1, I1, I1, I1, I1, I1, I1, I1, /*310*/ I1, I1, I1, I1, I1, I1, I1, I1, /*320*/ I1, I1, I1, I1, I1, I1, I1, 0, /*330*/ I1, I1, I1, I1, I1, I1, I1, I1, /*340*/ I1, I1, I1, I1, I1, I1, I1, I1, /*350*/ I1, I1, I1, I1, I1, I1, I1, I1, /*360*/ I1, I1, I1, I1, I1, I1, I1, 0, /*370*/ I1, I1, I1, I1, I1, I1, I1, I1, }; struct args_text { unsigned char *ctype; }; static struct args_text args_text = { ctype_text }; static struct option const long_options_text[] = { { "include", required_argument, 0, 'i' }, { "exclude", required_argument, 0, 'x' }, {NULL, 0, NULL, 0} }; static void help_me_text (void) { printf (_("\ Text language:\n\ -i,--include=CHAR-CLASS Treat characters of CHAR-CLASS as token constituents\n\ -x,--exclude=CHAR-CLASS Treat characters of CHAR-CLASS as token delimiters\n\ ")); } static void * parse_args_text (char **argv, int argc) { char *tmp_string = 0; struct args_text *args; if (argv == 0 || *argv == 0) return &args_text; if (argc) args = &args_text; else { tmp_string = xstrdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = xmalloc (sizeof *args); args->ctype = ctype_text; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "i:x:", long_options_text, (int *) 0); if (optc < 0) break; if ((optc == 'x' || optc == 'i') && args->ctype == ctype_text) args->ctype = CLONE (ctype_text, unsigned char, cardinalityof (ctype_text)); switch (optc) { case 'i': set_uchar_ctype (args->ctype, optarg, I1); break; case 'x': clear_uchar_ctype (args->ctype, optarg, I1); break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier the text source file. This state machine is built for speed, not elegance. */ static struct token * get_token_text (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_text const *) args) unsigned char const *rct = &ARGS->ctype[1]; int c; unsigned char *id = scanner_buffer; obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME); top: c = getc (in_FILE); while (ISBORING (c)) c = getc (in_FILE); if (ISEOF (c)) { obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } id = scanner_buffer; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) if (!ISIDSQUEEZE (c)) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); goto top; } ungetc (c, in_FILE); *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); #undef ARGS } #undef I1 #undef NM #undef SQ #undef EF #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISEOF #undef ISBORING #undef ISIDSQUEEZE /*************** Perl *******************************************************/ #define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */ #define NM 0x02 /* digit [0-9a-fA-FxX] */ #define SQ 0x04 /* squeeze these out (.,',-) */ #define CM 0x08 /* Comment Character '#' */ #define EQ 0x10 /* Start of Documentation Character '=' */ #define Q1 0x20 /* Start of Single Quote ''' */ #define Q2 0x40 /* Start of Double Quote '"' */ #define EF 0x80 /* EOF */ #define NL 0x100 /* '\n' character */ #define ES 0x200 /* Escape '\\' character */ /* Text character classes */ #define ISID1ST(c) ((rct)[c] & (I1)) #define ISIDREST(c) ((rct)[c] & (I1|NM|SQ)) #define ISNUMBER(c) ((rct)[c] & (NM)) #define ISIDSQUEEZE(c) ((rct)[c] & (SQ)) #define ISCOMMENT(c) ((rct)[c] & (CM)) #define ISEQUALS(c) ((rct)[c] & (EQ)) #define ISQUOTE1(c) ((rct)[c] & (Q1)) #define ISQUOTE2(c) ((rct)[c] & (Q2)) #define ISNEWLINE(c) ((rct)[c] & (NL)) #define ISEOF(c) ((rct)[c] & (EF)) #define ISBORING(c) (!((rct)[c] & (I1|NL|NM|CM|EQ|Q1|Q2|ES|EF))) static unsigned short ctype_perl[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, 0, Q2, CM, 0, 0, 0, Q1, /*050*/ 0, 0, 0, 0, 0, 0, 0, 0, /*060*/ NM, NM, NM, NM, NM, NM, NM, NM, /*070*/ NM, NM, 0, 0, 0, EQ, 0, 0, /*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1, /*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0, /*200*/ 0, 0, 0, 0, 0, 0, 0, 0, /*210*/ 0, 0, 0, 0, 0, 0, 0, 0, /*220*/ 0, 0, 0, 0, 0, 0, 0, 0, /*230*/ 0, 0, 0, 0, 0, 0, 0, 0, /*240*/ 0, 0, 0, 0, 0, 0, 0, 0, /*250*/ 0, 0, 0, 0, 0, 0, 0, 0, /*260*/ 0, 0, 0, 0, 0, 0, 0, 0, /*270*/ 0, 0, 0, 0, 0, 0, 0, 0, /*300*/ I1, I1, I1, I1, I1, I1, I1, I1, /*310*/ I1, I1, I1, I1, I1, I1, I1, I1, /*320*/ I1, I1, I1, I1, I1, I1, I1, 0, /*330*/ I1, I1, I1, I1, I1, I1, I1, I1, /*340*/ I1, I1, I1, I1, I1, I1, I1, I1, /*350*/ I1, I1, I1, I1, I1, I1, I1, I1, /*360*/ I1, I1, I1, I1, I1, I1, I1, 0, /*370*/ I1, I1, I1, I1, I1, I1, I1, I1, }; struct args_perl { int exclude_dtags; unsigned short *ctype; }; static struct args_perl args_perl = { 1, ctype_perl }; static struct option const long_options_perl[] = { { "include", required_argument, 0, 'i' }, { "exclude", required_argument, 0, 'x' }, { "dtags", no_argument, 0, 'd' }, {NULL, 0, NULL, 0} }; static void help_me_perl (void) { printf (_("\ Perl language:\n\ -i,--include=CHAR-CLASS Treat characters of CHAR-CLASS as token constituents\n\ -x,--exclude=CHAR-CLASS Treat characters of CHAR-CLASS as token delimiters\n\ -d,--dtags Include documentation tags\n\ ")); } static void * parse_args_perl (char **argv, int argc) { char *tmp_string = 0; struct args_perl *args; if (argv == 0 || *argv == 0) return &args_perl; if (argc) args = &args_perl; else { tmp_string = xstrdup (*argv); tokenize_args_string (tmp_string, &argc, &argv); args = xmalloc (sizeof(struct args_perl)); args->exclude_dtags = 1; args->ctype = ctype_perl; } optind = 0; for (;;) { int optc = getopt_long (argc, argv, "i:x:d", long_options_perl, (int *) 0); if (optc < 0) break; if ((optc == 'x' || optc == 'i') && args->ctype == ctype_perl) args->ctype = CLONE (ctype_perl, unsigned short, cardinalityof (ctype_perl)); switch (optc) { case 'd': args->exclude_dtags = 0; break; case 'i': set_ushort_ctype (args->ctype, optarg, I1); break; case 'x': clear_ushort_ctype (args->ctype, optarg, I1); break; default: usage (); } } if (tmp_string) { free (argv); free (tmp_string); } return args; } /* Grab the next identifier the text source file. This state machine is built for speed, not elegance. */ static struct token * get_token_perl (FILE *in_FILE, void const *args, int *flags) { #define ARGS ((struct args_perl const *) args) static int new_line = 1; /* static char id_0[BUFSIZ]; */ unsigned short const *rct = &ARGS->ctype[1]; int c, state = 0, skip_doc = 0; /* int comment = 0, d_quote = 0, s_quote = 0, equals = 0; */ unsigned char *id = scanner_buffer; obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME); top: c = getc (in_FILE); while (ISBORING (c)) c = getc (in_FILE); switch (c) { case '\n': state &= ~CM; /* comment = 0; */ new_line = 1; goto top; break; case '\\': c = getc (in_FILE); /* Skip next character */ new_line = 0; goto top; break; case '#': /* Skip the # if it is inside single quotes or double quotes */ if (!(state & Q1) && !(state & Q2)) state |= CM; /* comment = 1 */ break; case '\'': /* Skip the single quote if it is inside a comment or double quotes */ if (!skip_doc) if (!(state & CM) && !(state & Q2)) state ^= Q1; /* s_quote = ((s_quote) ? 0 : 1); */ break; case '\"': /* Skip the double quote if it is inside a comment or single quotes */ if (!skip_doc) if (!(state & CM) && !(state & Q1)) state ^= Q2; /* d_quote = ((d_quote == 1) ? 0 : 1); */ break; case '=': if (new_line && ARGS->exclude_dtags) { skip_doc = 1; state &= ~EQ; /* documentation = 0; */ } break; default: if (ISEOF (c)) { new_line = 1; obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; } break; } new_line = 0; if (ISCOMMENT(c) || ISQUOTE1(c) || ISQUOTE2(c) || ISNEWLINE(c) || ISEQUALS(c) || state) goto top; id = scanner_buffer; *id++ = c; if (ISID1ST (c)) { *flags = TOK_NAME; while (ISIDREST (c = getc (in_FILE))) if (!ISIDSQUEEZE (c)) *id++ = c; } else if (ISNUMBER (c)) { *flags = TOK_NUMBER; while (ISNUMBER (c = getc (in_FILE))) *id++ = c; ungetc (c, in_FILE); goto top; /* skip all numbers */ } else { if (isprint (c)) fprintf (stderr, _("junk: `%c'"), c); else fprintf (stderr, _("junk: `\\%03o'"), c); goto top; } ungetc (c, in_FILE); *id = '\0'; if (skip_doc) { /* perl documentation 'perlpod' processing is done here */ if (strequ (scanner_buffer, "cut")) { skip_doc = 0; } else { state |= EQ; /* documentation = 1; */ } goto top; } if (strequ (scanner_buffer, "_")) { goto top; } *flags |= TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); #undef ARGS } #undef I1 #undef NM #undef SQ #undef CM #undef EQ #undef Q1 #undef Q2 #undef EF #undef NL #undef ES #undef ISID1ST #undef ISIDREST #undef ISNUMBER #undef ISIDSQUEEZE #undef ISCOMMENT #undef ISEQUALS #undef ISQUOTE1 #undef ISQUOTE2 #undef ISNEWLINE #undef ISEOF #undef ISBORING /*************** Lisp *******************************************************/ #define DG 0x01 /* [0-9] */ #define LT 0x02 /* [a-zA-Z] */ #define I1 0x04 /* LT | [!$%&*:/<=>?^_~] */ #define Id 0x08 /* LT | I1 | DG | [-+.@] */ #define RA 0x10 /* [bodxieBODXIE] */ #define NM 0x20 /* [-@#.0-9a-fA-FesfdlESFDL] */ #define EF 0x40 /* (eof) */ #define is_DIGIT(c) ((rct)[c] & DG) #define is_LETTER(c) ((rct)[c] & LT) #define is_IDENT1(c) ((rct)[c] & (LT | I1)) #define is_IDENT(c) ((rct)[c] & (LT | I1 | DG | Id)) #define is_RADIX(c) ((rct)[c] & RA) #define is_NUMBER(c) ((rct)[c] & (DG | RA | NM)) static unsigned char ctype_lisp[257] = { EF, /* 0 1 2 3 4 5 6 7 */ /* ----- ----- ----- ----- ----- ----- ----- ----- */ /*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /*010*/ 0, 0, 0, 0, 0, 0, 0, 0, /*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /*040*/ 0, I1, 0, NM, I1, I1, I1, 0, /*050*/ 0, 0, I1, Id, 0, Id|NM, Id|NM, I1, /*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /*070*/ DG, DG, I1, 0, I1, I1, I1, I1, /*100*/ Id|NM, LT|NM, LT|RA, LT|NM, LT|RA, LT|RA, LT|NM, LT, /*110*/ LT, LT|RA, LT, LT, LT|NM, LT, LT, LT|RA, /*120*/ LT, LT, LT, LT|NM, LT, LT, LT, LT, /*130*/ LT|RA, LT, LT, 0, 0, 0, I1, I1, /*140*/ 0, LT|NM, LT|RA, LT|NM, LT|RA, LT|RA, LT|NM, LT, /*150*/ LT, LT|RA, LT, LT, LT|NM, LT, LT, LT|RA, /*160*/ LT, LT, LT, LT|NM, LT, LT, LT, LT, /*170*/ LT|RA, LT, LT, 0, 0, 0, I1, 0 /* FIXME: latin-1 */ }; static void help_me_lisp (void) { printf (_("\ Lisp language:\n\ ")); } static void * parse_args_lisp (char **argv, int argc) { return NULL; } /* Grab the next identifier from the lisp source file. This state machine is built for speed, not elegance. */ static struct token * get_token_lisp (FILE *in_FILE, void const *args, int *flags) { unsigned char const *rct = &ctype_lisp[1]; unsigned char *id = scanner_buffer; int c; obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME); top: c = getc (in_FILE); recheck: switch (c) { case EOF: obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack)); return 0; case '(': case ')': case '\'': case '`': /* quote, quasiquote */ goto top; case ',': /* unquote */ c = getc (in_FILE); if (c == '@') /* unquote-splicing */ goto top; goto recheck; case ';': /* comment */ do { c = getc (in_FILE); } while ( (c != EOF) && (c != '\n')); goto top; case '"': /* string with/without ansi-C escapes*/ string: do { c = getc (in_FILE); if (c == '\\') { c = getc (in_FILE); goto string; } } while ( (c != EOF) && (c != '"')); goto top; case '?': /* character constant */ cconstant: do { c = getc(in_FILE); if (c == '\\') { c = getc(in_FILE); goto cconstant; } } while (c != EOF && is_IDENT(c)); goto top; case '.': case '+': case '-': id = scanner_buffer; *id++ = c; c = getc (in_FILE); if (is_DIGIT (c) || (scanner_buffer[0] != '.' && (c == '.' || c == 'i' || c == 'I'))) goto number; if (c != EOF) ungetc (c, in_FILE); goto ident; case '#': id = scanner_buffer; *id++ = c; c = getc (in_FILE); if (c == EOF) goto top; else if (is_RADIX (c)) goto number; else if (c == '\\') /* #\... literal Character */ { *id++ = c; c = getc (in_FILE); *id++ = c; if (is_LETTER (c)) { while (is_LETTER (c = getc (in_FILE))) *id++ = c; if (c != EOF) ungetc (c, in_FILE); } *flags = TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); } else if (c == '(') /* # (...) vector vi%) */ goto top; else if (c == '"') /* #"..." Bigloo: string with ansi-C escape */ goto string; else if (c == '!') /* #!... Kawa key/eof/null/... */ { while (is_LETTER (c = getc (in_FILE))) *id++ = c; if (c != EOF) ungetc (c, in_FILE); *flags = TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); } else if (c == '|') /* #|...|# Guile/Kawa multi-lines comment */ { do { c = getc (in_FILE); if (c == '|') { while ( (c = getc (in_FILE)) == '|') ; if (c == '#') break; } } while (c != EOF); goto top; } else if (c == '@') /* #@LENGTH ...^_ EMACS byte-code comment */ { do { c = getc (in_FILE); } while ( (c != EOF) && (c != '\037')); goto top; } else if (c == '[') /* #[ ... ] EMACS byte-code object */ goto top; /* Ignore invalide #-construct */ goto top; case '[': case ']': /* EMACS vector aka # (...) in Scheme */ /* EMACS vector object vs Kawa ident */ /* rational: Kawa ident could not start with [ nor with ] */ goto top; default: if (is_IDENT1 (c)) { id = scanner_buffer; *id++ = c; ident: /* Emacs end-of-vector vs Kawa ident: allow [] as a part of an ident. */ for (;;) { while (is_IDENT (c = getc (in_FILE))) *id++ = c; if (0 /* c == '[' */) { c = getc (in_FILE); if (c == ']') { *id++ = '['; *id++ = ']'; continue; } if (c != EOF) ungetc (c, in_FILE); ungetc ('[', in_FILE); } break; } if (c != EOF) ungetc (c, in_FILE); *flags = TOK_NAME | TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); } else if (is_DIGIT (c)) { id = scanner_buffer; number: *id++ = c; while (is_NUMBER (c = getc (in_FILE))) *id++ = c; if (c != EOF) ungetc (c, in_FILE); *flags = TOK_NUMBER | TOK_LITERAL; obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer); return (struct token *) obstack_finish (&tokens_obstack); } } goto top; } #undef DG #undef LT #undef I1 #undef Id #undef RA #undef NM #undef is_DIGIT #undef is_LETTER #undef is_IDENT1 #undef is_IDENT #undef is_RADIX #undef is_NUMBER