From 2f5e7a5b96039b7a00543b4056bab7ec85c8db4b Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 31 Jul 2017 17:32:19 -0700 Subject: txr-014 2009-10-05 --- extract.l | 220 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 149 insertions(+), 71 deletions(-) (limited to 'extract.l') diff --git a/extract.l b/extract.l index 81dc91d9..ab041bb9 100644 --- a/extract.l +++ b/extract.l @@ -40,7 +40,7 @@ #define YY_NO_UNPUT -const char *version = "013"; +const char *version = "014"; const char *progname = "txr"; const char *spec_file = "stdin"; long lineno = 1; @@ -111,6 +111,7 @@ void yybadtoken(int tok, const char *context) case SOME: problem = "\"some\""; break; case NONE: problem = "\"none\""; break; case MAYBE: problem = "\"maybe\""; break; + case CASES: problem = "\"cases\""; break; case AND: problem = "\"and\""; break; case OR: problem = "\"or\""; break; case END: problem = "\"end\""; break; @@ -124,6 +125,7 @@ void yybadtoken(int tok, const char *context) case FIRST: problem = "\"first\""; break; case LAST: problem = "\"last\""; break; case EMPTY: problem = "\"empty\""; break; + case DEFINE: problem = "\"define\""; break; case NUMBER: problem = "\"number\""; break; case REGCHAR: problem = "regular expression character"; break; } @@ -151,6 +153,8 @@ static int char_esc(int letter) case 'f': return '\f'; case 'r': return '\r'; case 'e': return 27; + case '"': return '"'; + case '\'': return '\''; } abort(); @@ -172,34 +176,40 @@ static int num_esc(char *num) %} TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +ID_END [^a-zA-Z0-9_] +NUM_END [^0-9] WS [\t ]* -%x SPECIAL REGEX REGCLASS +HEX [0-9A-Fa-f] +OCT [0-7] + +%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT %% -{TOK} { - long val; - char *errp; +{TOK} { + long val; + char *errp; - errno = 0; + errno = 0; - val = strtol(yytext, &errp, 10); + val = strtol(yytext, &errp, 10); - if (nesting == 0) - BEGIN(INITIAL); + if (nesting == 0) + BEGIN(INITIAL); - if (*errp != 0) { - /* not a number */ - yylval.lexeme = strdup(yytext); - return IDENT; - } + if (*errp != 0) { + /* not a number */ + yylval.lexeme = strdup(yytext); + return IDENT; + } - if ((val == LONG_MAX || val == LONG_MIN) && errno == ERANGE) - yyerror("numeric overflow in token"); + if ((val == LONG_MAX || val == LONG_MIN) + && errno == ERANGE) + yyerror("numeric overflow in token"); - yylval.num = val; - return NUMBER; - } + yylval.num = val; + return NUMBER; + } \({WS}all{WS}\) { @@ -222,12 +232,17 @@ WS [\t ]* return MAYBE; } +\({WS}cases{WS}\) { + BEGIN(INITIAL); + return CASES; + } + \({WS}and{WS}\) { BEGIN(INITIAL); return AND; } -\({WS}or{WS}\) { +\({WS}or{WS}\) { BEGIN(INITIAL); return OR; } @@ -288,54 +303,74 @@ WS [\t ]* return EMPTY; } -\{|\( { - nesting++; - if (yytext[0] == '{') - closechar = '}'; - else - closechar = ')'; - return yytext[0]; - } +\({WS}define/{ID_END} { + nesting++; + closechar = ')'; + BEGIN(NESTED); + return DEFINE; + } -\}|\) { - if (yytext[0] != closechar) { - yyerror("paren mismatch"); - BEGIN(INITIAL); - } else { - if (--nesting == 0) - BEGIN(INITIAL); - return yytext[0]; - } - } +\{|\( { + nesting++; + if (yytext[0] == '{') + closechar = '}'; + else + closechar = ')'; + BEGIN(NESTED); + return yytext[0]; + } -[\t ]+ { - /* Eat whitespace in directive */ - } +\}|\) { + if (yytext[0] != closechar) { + yyerror("paren mismatch"); + BEGIN(INITIAL); + } else { + switch (--nesting) { + case 1: + BEGIN(SPECIAL); + break; + case 0: + BEGIN(INITIAL); + break; + } + + return yytext[0]; + } + } + +[\t ]+ { /* Eat whitespace in directive */ } + +\" { + BEGIN(STRLIT); + return '"'; + } + +\' { + BEGIN(CHRLIT); + return '\''; + } @ { if (nesting == 0) { BEGIN(INITIAL); yylval.lexeme = strdup("@"); return TEXT; - } else { - yyerrorf(0, "bad character in directive: %c", yytext[0]); } } -\n { - lineno++; - yyerror("newline in directive"); - } +\n { + lineno++; + } -[/] { - BEGIN(REGEX); - return '/'; - } +[/] { + BEGIN(REGEX); + return '/'; + } -\. { - yylval.chr = '.'; - return '.'; - } +\. { + yylval.chr = '.'; + return '.'; + } [\\][abtnvfre] { char lexeme[2]; @@ -346,24 +381,25 @@ WS [\t ]* return TEXT; } -[\\](x[0-9a-fA-F]+|[0-7]+) { - char lexeme[2]; - lexeme[0] = num_esc(yytext + 1); - lexeme[1] = 0; - yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); - return TEXT; - } +[\\](x{HEX}+|{OCT}+) { + char lexeme[2]; + lexeme[0] = num_esc(yytext + 1); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + BEGIN(INITIAL); + return TEXT; + } -. { - yyerrorf(0, "bad character in directive: '%c'", yytext[0]); - } +. { + yyerrorf(0, "bad character in directive: '%c'", + yytext[0]); + } [/] { if (nesting == 0) BEGIN(INITIAL); else - BEGIN(SPECIAL); + BEGIN(NESTED); yylval.chr = '/'; return '/'; } @@ -374,10 +410,10 @@ WS [\t ]* return REGCHAR; } -[\\](x[0-9a-fA-F]+|[0-9]+) { - yylval.chr = num_esc(yytext + 1); - return REGCHAR; - } +[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return REGCHAR; + } \n { lineno++; @@ -438,6 +474,48 @@ WS [\t ]* /* comment to end of line */ } +\" { + if (nesting == 0) + BEGIN(INITIAL); + else + BEGIN(NESTED); + return '"'; + } + +\' { + if (nesting == 0) + BEGIN(INITIAL); + else + BEGIN(NESTED); + return '\''; + } + +[\\][abtnvfre] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } + +[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return LITCHAR; + } +\n { + yyerror("newline in string literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } +\n { + yyerror("newline in character literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } +. { + yylval.chr = yytext[0]; + return LITCHAR; + } + %% void help(void) -- cgit v1.2.3