summaryrefslogtreecommitdiffstats
path: root/libidu
diff options
context:
space:
mode:
authorClaudio Fontana <sick_soul@users.sourceforge.net>2006-02-15 04:19:20 +0000
committerClaudio Fontana <sick_soul@users.sourceforge.net>2006-02-15 04:19:20 +0000
commit043928f4182cd6aaca9ca0a47ee006e0a6f6227b (patch)
treea7b5c7c32148a963422219a5c4a8e9b36abc4fdc /libidu
parent9d3c22904a588362595d9719a5f7246264150df2 (diff)
downloadidutils-043928f4182cd6aaca9ca0a47ee006e0a6f6227b.tar.gz
idutils-043928f4182cd6aaca9ca0a47ee006e0a6f6227b.tar.bz2
idutils-043928f4182cd6aaca9ca0a47ee006e0a6f6227b.zip
* integrated java+lisp support patch (EXPERIMENTAL)
Diffstat (limited to 'libidu')
-rw-r--r--libidu/id-lang.map7
-rw-r--r--libidu/scanners.c293
2 files changed, 289 insertions, 11 deletions
diff --git a/libidu/id-lang.map b/libidu/id-lang.map
index 00a55a7..7edb860 100644
--- a/libidu/id-lang.map
+++ b/libidu/id-lang.map
@@ -52,6 +52,8 @@
*.cpp C++
*.cxx C++
+*.java Java
+
ChangeLog* Cdoc
*.[sS] asm --comment=;
@@ -71,7 +73,10 @@ ChangeLog* Cdoc
# portable object (i18n)
*.po po
-*.el elisp
+*.el lisp
+*.elc lisp
+*.lisp lisp
+*.scm lisp
*.am make
Makefile make
diff --git a/libidu/scanners.c b/libidu/scanners.c
index 3ede033..07255cc 100644
--- a/libidu/scanners.c
+++ b/libidu/scanners.c
@@ -55,6 +55,7 @@ static struct token *get_token_c (FILE *in_FILE, void const *args, int *flags);
static void *parse_args_c (char **argv, int argc);
static void help_me_c (void);
static void help_me_cpp (void);
+static void help_me_java (void);
static struct token *get_token_asm (FILE *in_FILE, void const *args, int *flags);
static void *parse_args_asm (char **argv, int argc);
@@ -68,14 +69,19 @@ static struct token *get_token_perl (FILE *in_FILE, void const *args, int *flags
static void *parse_args_perl (char **argv, int argc);
static void help_me_perl (void);
+static struct token *get_token_lisp (FILE *in_FILE, void const *args, int *flags);
+static void *parse_args_lisp (char **argv, int argc);
+static void help_me_lisp (void);
struct language languages_0[] =
{
{ "C", parse_args_c, get_token_c, help_me_c },
{ "C++", parse_args_c, get_token_c, help_me_cpp },
+ { "Java", parse_args_c, get_token_c, help_me_java },
{ "asm", parse_args_asm, get_token_asm, help_me_asm },
{ "text", parse_args_text, get_token_text, help_me_text },
- { "perl", parse_args_perl, get_token_perl, help_me_perl }
+ { "perl", parse_args_perl, get_token_perl, help_me_perl },
+ { "lisp", parse_args_lisp, get_token_lisp, help_me_lisp }
};
struct language const *languages_N = &languages_0[cardinalityof (languages_0)];
@@ -327,8 +333,11 @@ clear_ushort_ctype (unsigned short *ctype, char const *chars, int type)
static void
set_uchar_ctype (unsigned char *ctype, char const *chars, int type)
{
- unsigned char *rct = &ctype[1];
- unsigned char const *uc = (unsigned char const *) chars;
+ unsigned char *rct;
+ unsigned char const *uc;
+
+ rct = &ctype[1];
+ uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] |= type;
@@ -337,18 +346,21 @@ set_uchar_ctype (unsigned char *ctype, char const *chars, int type)
static void
clear_uchar_ctype (unsigned char *ctype, char const *chars, int type)
{
- unsigned char *rct = &ctype[1];
- unsigned char const *uc = (unsigned char const *) chars;
+ unsigned char *rct;
+ unsigned char const *uc;
+
+ rct = &ctype[1];
+ uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] &= ~type;
}
-/*************** C & C++ ****************************************************/
+/*************** C, C++ & Java ***********************************************/
#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
#define DG 0x0002 /* decimal digit [0-9] */
-#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlL] */
+#define NM 0x0004 /* extra chars in a hex or long number [a-fA-FxXlLuU.] */
#define C1 0x0008 /* C comment introduction char: / */
#define C2 0x0010 /* C comment termination char: * */
#define Q1 0x0020 /* single quote: ' */
@@ -389,16 +401,16 @@ static unsigned short ctype_c[257] =
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
-/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
+/*050*/ 0, 0, C2, 0, 0, 0, NM, C1,
/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*120*/ I1, I1, I1, I1, I1, I1|NM, I1, I1,
/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
-/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
+/*160*/ I1, I1, I1, I1, I1, I1|NM, I1, I1,
/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
/* FIXME: latin-1 */
};
@@ -441,6 +453,17 @@ C++ language:\n\
"));
}
+static void
+help_me_java (void)
+{
+ printf (_("\
+Java language:\n\
+ -k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
+ -i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
+ -u,--strip-underscore Strip a leading underscore from single-token strings\n\
+"));
+}
+
static void *
parse_args_c (char **argv, int argc)
{
@@ -1508,3 +1531,253 @@ top:
#undef ISNEWLINE
#undef ISEOF
#undef ISBORING
+
+/*************** Lisp *******************************************************/
+
+#define DG 0x01 /* [0-9] */
+#define LT 0x02 /* [a-zA-Z] */
+#define I1 0x04 /* LT | [!$%&*:/<=>?^_~] */
+#define Id 0x08 /* LT | I1 | DG | [-+.@] */
+#define RA 0x10 /* [bodxieBODXIE] */
+#define NM 0x20 /* [-@#.0-9a-fA-FesfdlESFDL] */
+#define EF 0x40 /* (eof) */
+
+#define is_DIGIT(c) ((rct)[c] & DG)
+#define is_LETTER(c) ((rct)[c] & LT)
+#define is_IDENT1(c) ((rct)[c] & (LT | I1))
+#define is_IDENT(c) ((rct)[c] & (LT | I1 | DG | Id))
+#define is_RADIX(c) ((rct)[c] & RA)
+#define is_NUMBER(c) ((rct)[c] & (DG | RA | NM))
+
+static unsigned char ctype_lisp[257] =
+{
+ EF,
+/* 0 1 2 3 4 5 6 7 */
+/* ----- ----- ----- ----- ----- ----- ----- ----- */
+/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*040*/ 0, I1, 0, NM, I1, I1, I1, 0,
+/*050*/ 0, 0, I1, Id, 0, Id|NM, Id|NM, I1,
+/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
+/*070*/ DG, DG, I1, 0, I1, I1, I1, I1,
+/*100*/ Id|NM, LT|NM, LT|RA, LT|NM, LT|RA, LT|RA, LT|NM, LT,
+/*110*/ LT, LT|RA, LT, LT, LT|NM, LT, LT, LT|RA,
+/*120*/ LT, LT, LT, LT|NM, LT, LT, LT, LT,
+/*130*/ LT|RA, LT, LT, 0, 0, 0, I1, I1,
+/*140*/ 0, LT|NM, LT|RA, LT|NM, LT|RA, LT|RA, LT|NM, LT,
+/*150*/ LT, LT|RA, LT, LT, LT|NM, LT, LT, LT|RA,
+/*160*/ LT, LT, LT, LT|NM, LT, LT, LT, LT,
+/*170*/ LT|RA, LT, LT, 0, 0, 0, I1, 0
+ /* FIXME: latin-1 */
+};
+
+static void
+help_me_lisp (void)
+{
+ printf (_("\
+Lisp language:\n\
+"));
+}
+
+static void *
+parse_args_lisp (char **argv, int argc)
+{
+ return NULL;
+}
+
+/* Grab the next identifier from the lisp source file. This
+ state machine is built for speed, not elegance. */
+
+static struct token *
+get_token_lisp (FILE *in_FILE, void const *args, int *flags)
+{
+ unsigned char const *rct = &ctype_lisp[1];
+ unsigned char *id = scanner_buffer;
+ int c;
+
+ obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
+
+ top:
+ c = getc (in_FILE);
+ recheck:
+ switch (c)
+ {
+ case EOF:
+ obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
+ return 0;
+
+ case '(': case ')':
+ case '\'': case '`': /* quote, quasiquote */
+ goto top;
+
+ case ',': /* unquote */
+ c = getc (in_FILE);
+ if (c == '@') /* unquote-splicing */
+ goto top;
+ goto recheck;
+
+ case ';': /* comment */
+ do {
+ c = getc (in_FILE);
+ } while ( (c != EOF) && (c != '\n'));
+ goto top;
+
+ case '"': /* string with or without ansi-C escapes */
+ string:
+ do {
+ c = getc (in_FILE);
+ if (c == '\\')
+ {
+ c = getc (in_FILE);
+ continue;
+ }
+ } while ( (c != EOF) && (c != '"'));
+ goto top;
+
+ case '.':
+ case '+': case '-':
+ id = scanner_buffer;
+ *id++ = c;
+ c = getc (in_FILE);
+ if (is_DIGIT (c) ||
+ (scanner_buffer[0] != '.' && (c == '.' || c == 'i' || c == 'I')))
+ goto number;
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ goto ident;
+
+ case '#':
+ id = scanner_buffer;
+ *id++ = c;
+
+ c = getc (in_FILE);
+ if (c == EOF)
+ goto top;
+ else if (is_RADIX (c))
+ goto number;
+ else if (c == '\\') /* #\... literal Character */
+ {
+ *id++ = c;
+ c = getc (in_FILE);
+ *id++ = c;
+ if (is_LETTER (c))
+ {
+ while (is_LETTER (c = getc (in_FILE)))
+ *id++ = c;
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ }
+ *flags = TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
+ return (struct token *) obstack_finish (&tokens_obstack);
+ }
+ else if (c == '(') /* # (...) vector vi%) */
+ goto top;
+ else if (c == '"') /* #"..." Bigloo: string with ansi-C escape */
+ goto string;
+ else if (c == '!') /* #!... Kawa key/eof/null/... */
+ {
+ while (is_LETTER (c = getc (in_FILE)))
+ *id++ = c;
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ *flags = TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
+ return (struct token *) obstack_finish (&tokens_obstack);
+ }
+ else if (c == '|') /* #|...|# Guile/Kawa multi-lines comment */
+ {
+ do {
+ c = getc (in_FILE);
+ if (c == '|')
+ {
+ while ( (c = getc (in_FILE)) == '|')
+ ;
+ if (c == '#')
+ break;
+ }
+ } while (c != EOF);
+ goto top;
+ }
+ else if (c == '@') /* #@LENGTH ...^_ EMACS byte-code comment */
+ {
+ do {
+ c = getc (in_FILE);
+ } while ( (c != EOF) && (c != '\037'));
+ goto top;
+ }
+ else if (c == '[') /* #[ ... ] EMACS byte-code object */
+ goto top;
+ /* Ignore invalide #-construct */
+ goto top;
+
+ case '[': case ']': /* EMACS vector aka # (...) in Scheme */
+ /* EMACS vector object vs Kawa ident */
+ /* rational: Kawa ident could not start with [ nor with ] */
+ goto top;
+
+ default:
+ if (is_IDENT1 (c))
+ {
+ id = scanner_buffer;
+ *id++ = c;
+ ident:
+ /* Emacs end-of-vector vs Kawa ident: allow [] as a part of an ident. */
+ for (;;)
+ {
+ while (is_IDENT (c = getc (in_FILE)))
+ *id++ = c;
+ if (c == '[')
+ {
+ c = getc (in_FILE);
+ if (c == ']')
+ {
+ *id++ = '[';
+ *id++ = ']';
+ continue;
+ }
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ ungetc ('[', in_FILE);
+ }
+ break;
+ }
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ *flags = TOK_NAME | TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
+ return (struct token *) obstack_finish (&tokens_obstack);
+ }
+ else if (is_DIGIT (c))
+ {
+ id = scanner_buffer;
+ number:
+ *id++ = c;
+ while (is_NUMBER (c = getc (in_FILE)))
+ *id++ = c;
+ if (c != EOF)
+ ungetc (c, in_FILE);
+ *flags = TOK_NUMBER | TOK_LITERAL;
+ obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
+ return (struct token *) obstack_finish (&tokens_obstack);
+ }
+ }
+ goto top;
+
+}
+#undef DG
+#undef LT
+#undef I1
+#undef Id
+
+#undef RA
+#undef NM
+
+#undef is_DIGIT
+#undef is_LETTER
+#undef is_IDENT1
+#undef is_IDENT
+#undef is_RADIX
+#undef is_NUMBER