From 3ae3f79a0acb709858dd2a250a8fb1cd88b5846a Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Fri, 23 May 2025 22:21:02 -0700 Subject: parser: recover stream data buffered by lexer. After parsing out of a stream, we attach a shadow stream which temporarily patches the operations to make it appear that bytes that were taken into the lexer have been pushed back. This lets us call an ordinary input operation after a parsing operation to read the data immediately following the parsed-out construct. (Almost: there is still the issue of the parser consuming one token of lookahead in some situations.) * stream.h (struct strm_base): New member shadow_obj. This is a context pointer used by the shadow stream operations. (generic_fill_buf): Declare previously internal function. * stream.c (strm_base_init): Initialize shadow_obj to null. (generic_fill_buf): Function changed to external linkage. Also, reloads the ops pointer from the stream on each loop iteration. This is because it can change; part of the buffer may be filled by shadow_get_byte, which can detach the shadow operations, so then the rest of the buffer is filled by something else like stdio_get_byte. (generic_get_line): Reload ops in in the loop, like in gneric_fill_buf, for the same reason. * parser.l: Include for ptrdiff_t. (scanner_has_buffered_bytes, scanner_get_buffered_bytes): New functions. * parser.c (SHADOW_TAB_SIZE): New preprocessor symbol. (shadow_tab): New static array. (struct shadow_context, struct shadow_ungetch): New struct types. (lisp_parse_impl): After calling parse, call parse_shadow_stream_attach to attach the shadow stream context and operations onto the stream. (shadow_detach, shadow_destroy_op, shadow_mark_op, shadow_put_string, shadow_put_char, shadow_put_byte, shadow_get_char_callback, shadow_get_char, shadow_unget_char_callback, shadow_unget_char, shadow_get_byte, shadow_unget_byte, shadow_put_buf, shadow_close, shadow_flush, shadow_seek, shadow_truncate): New static functions. (shadow_ops_template): New static structure. (customize_shad_ops): New static function. (parser_shadow_stream_attach): New function. (parser_free_all): New function. * parser.h (scanner_has_buffered_bytes, scanner_get_buffered_bytes, parser_shadow_stream_attach, parser_free_all): Declared. * txr.c (free_all): Call parser_free_all. * tests/018/streams.tl: New test cases. * lex.yy.c.shipped: Regenerated. --- lex.yy.c.shipped | 371 +++++++++++++++++++++++++++------------------------ parser.c | 279 ++++++++++++++++++++++++++++++++++++++ parser.h | 4 + parser.l | 19 +++ stream.c | 8 +- stream.h | 2 + tests/018/streams.tl | 5 + txr.c | 1 + 8 files changed, 509 insertions(+), 180 deletions(-) diff --git a/lex.yy.c.shipped b/lex.yy.c.shipped index a1d4fef5..e60e858f 100644 --- a/lex.yy.c.shipped +++ b/lex.yy.c.shipped @@ -4497,6 +4497,7 @@ goto find_rule; \ */ #line 30 "parser.l" +#include #include #include #include @@ -4719,10 +4720,10 @@ static char *remove_char(char *str, int c) return str; } -#line 4723 "lex.yy.c" +#line 4724 "lex.yy.c" #define YY_NO_INPUT 1 -#line 4726 "lex.yy.c" +#line 4727 "lex.yy.c" #define INITIAL 0 #define SPECIAL 1 @@ -5035,10 +5036,10 @@ YY_DECL } { -#line 326 "parser.l" +#line 327 "parser.l" -#line 5042 "lex.yy.c" +#line 5043 "lex.yy.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -5121,7 +5122,7 @@ do_action: /* This label is used only to access EOF actions. */ { /* beginning of action switch */ case 1: YY_RULE_SETUP -#line 328 "parser.l" +#line 329 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext); @@ -5137,7 +5138,7 @@ YY_RULE_SETUP YY_BREAK case 2: YY_RULE_SETUP -#line 341 "parser.l" +#line 342 "parser.l" { wchar_t *wtxt = utf8_dup_from(remove_char(yytext, ',')); @@ -5153,7 +5154,7 @@ YY_RULE_SETUP YY_BREAK case 3: YY_RULE_SETUP -#line 354 "parser.l" +#line 355 "parser.l" { wchar_t *wtxt = utf8_dup_from(remove_char(yytext + 2, ',')); int base; @@ -5176,7 +5177,7 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 374 "parser.l" +#line 375 "parser.l" { int base = 0; val str = string_own(utf8_dup_from(yytext + 2)); @@ -5201,7 +5202,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 396 "parser.l" +#line 397 "parser.l" { if (yy_top_state(yyscanner) == INITIAL || yy_top_state(yyscanner) == QSILIT @@ -5216,7 +5217,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 408 "parser.l" +#line 409 "parser.l" { if (yy_top_state(yyscanner) == INITIAL || yy_top_state(yyscanner) == QSILIT @@ -5232,12 +5233,12 @@ YY_RULE_SETUP } YY_BREAK case 7: -#line 424 "parser.l" -case 8: #line 425 "parser.l" +case 8: +#line 426 "parser.l" case 9: YY_RULE_SETUP -#line 425 "parser.l" +#line 426 "parser.l" { val str = string_utf8(yytext); @@ -5261,7 +5262,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 441 "parser.l" +#line 442 "parser.l" { if (yy_top_state(yyscanner) == INITIAL || yy_top_state(yyscanner) == QSILIT @@ -5275,10 +5276,10 @@ YY_RULE_SETUP } YY_BREAK case 11: -#line 454 "parser.l" +#line 455 "parser.l" case 12: YY_RULE_SETUP -#line 454 "parser.l" +#line 455 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 1); @@ -5292,10 +5293,10 @@ YY_RULE_SETUP } YY_BREAK case 13: -#line 467 "parser.l" +#line 468 "parser.l" case 14: YY_RULE_SETUP -#line 467 "parser.l" +#line 468 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 3); @@ -5309,10 +5310,10 @@ YY_RULE_SETUP } YY_BREAK case 15: -#line 480 "parser.l" +#line 481 "parser.l" case 16: YY_RULE_SETUP -#line 480 "parser.l" +#line 481 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 3); @@ -5327,7 +5328,7 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 492 "parser.l" +#line 493 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 3); @@ -5341,12 +5342,12 @@ YY_RULE_SETUP } YY_BREAK case 18: -#line 505 "parser.l" -case 19: #line 506 "parser.l" +case 19: +#line 507 "parser.l" case 20: YY_RULE_SETUP -#line 506 "parser.l" +#line 507 "parser.l" { yyerrorf(yyg, lit("cramped floating-point literal: " "space needed between ~a and dot."), @@ -5363,12 +5364,12 @@ YY_RULE_SETUP } YY_BREAK case 21: -#line 523 "parser.l" -case 22: #line 524 "parser.l" +case 22: +#line 525 "parser.l" case 23: YY_RULE_SETUP -#line 524 "parser.l" +#line 525 "parser.l" { if (yy_top_state(yyscanner) == INITIAL || yy_top_state(yyscanner) == QSILIT @@ -5380,10 +5381,10 @@ YY_RULE_SETUP } YY_BREAK case 24: -#line 535 "parser.l" +#line 536 "parser.l" case 25: YY_RULE_SETUP -#line 535 "parser.l" +#line 536 "parser.l" { if (yy_top_state(yyscanner) == INITIAL || yy_top_state(yyscanner) == QSILIT @@ -5399,7 +5400,7 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 548 "parser.l" +#line 549 "parser.l" { return directive_tok(yyg, ALL, 0); } @@ -5411,28 +5412,28 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 552 "parser.l" +#line 553 "parser.l" { return directive_tok(yyg, SOME, NESTED); } YY_BREAK case 28: YY_RULE_SETUP -#line 556 "parser.l" +#line 557 "parser.l" { return directive_tok(yyg, NONE, 0); } YY_BREAK case 29: YY_RULE_SETUP -#line 560 "parser.l" +#line 561 "parser.l" { return directive_tok(yyg, MAYBE, 0); } YY_BREAK case 30: YY_RULE_SETUP -#line 564 "parser.l" +#line 565 "parser.l" { return directive_tok(yyg, CASES, 0); } @@ -5444,7 +5445,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 568 "parser.l" +#line 569 "parser.l" { return directive_tok(yyg, BLOCK, NESTED); } @@ -5456,7 +5457,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 572 "parser.l" +#line 573 "parser.l" { return directive_tok(yyg, CHOOSE, NESTED); } @@ -5468,28 +5469,28 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 576 "parser.l" +#line 577 "parser.l" { return directive_tok(yyg, GATHER, NESTED); } YY_BREAK case 34: YY_RULE_SETUP -#line 580 "parser.l" +#line 581 "parser.l" { return directive_tok(yyg, AND, 0); } YY_BREAK case 35: YY_RULE_SETUP -#line 584 "parser.l" +#line 585 "parser.l" { return directive_tok(yyg, OR, 0); } YY_BREAK case 36: YY_RULE_SETUP -#line 588 "parser.l" +#line 589 "parser.l" { return directive_tok(yyg, END, 0); } @@ -5501,7 +5502,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 592 "parser.l" +#line 593 "parser.l" { return directive_tok(yyg, COLLECT, NESTED); } @@ -5513,7 +5514,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 596 "parser.l" +#line 597 "parser.l" { return directive_tok(yyg, COLL, NESTED); } @@ -5525,7 +5526,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 600 "parser.l" +#line 601 "parser.l" { return directive_tok(yyg, UNTIL, NESTED); } @@ -5537,7 +5538,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 604 "parser.l" +#line 605 "parser.l" { return directive_tok(yyg, OUTPUT, NESTED); } @@ -5549,7 +5550,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 608 "parser.l" +#line 609 "parser.l" { return directive_tok(yyg, REPEAT, NESTED); } @@ -5561,7 +5562,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 612 "parser.l" +#line 613 "parser.l" { return directive_tok(yyg, PUSH, NESTED); } @@ -5573,21 +5574,21 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 616 "parser.l" +#line 617 "parser.l" { return directive_tok(yyg, REP, NESTED); } YY_BREAK case 44: YY_RULE_SETUP -#line 620 "parser.l" +#line 621 "parser.l" { return directive_tok(yyg, SINGLE, 0); } YY_BREAK case 45: YY_RULE_SETUP -#line 624 "parser.l" +#line 625 "parser.l" { return directive_tok(yyg, FIRST, 0); } @@ -5599,14 +5600,14 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 628 "parser.l" +#line 629 "parser.l" { return directive_tok(yyg, LAST, NESTED); } YY_BREAK case 47: YY_RULE_SETUP -#line 632 "parser.l" +#line 633 "parser.l" { return directive_tok(yyg, EMPTY, 0); } @@ -5618,7 +5619,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 636 "parser.l" +#line 637 "parser.l" { return directive_tok(yyg, MOD, NESTED); } @@ -5630,7 +5631,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 640 "parser.l" +#line 641 "parser.l" { return directive_tok(yyg, MODLAST, NESTED); } @@ -5642,14 +5643,14 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 644 "parser.l" +#line 645 "parser.l" { return directive_tok(yyg, DEFINE, NESTED); } YY_BREAK case 51: YY_RULE_SETUP -#line 648 "parser.l" +#line 649 "parser.l" { return directive_tok(yyg, TRY, 0); } @@ -5661,14 +5662,14 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 652 "parser.l" +#line 653 "parser.l" { return directive_tok(yyg, CATCH, NESTED); } YY_BREAK case 53: YY_RULE_SETUP -#line 656 "parser.l" +#line 657 "parser.l" { return directive_tok(yyg, FINALLY, 0); } @@ -5680,7 +5681,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 660 "parser.l" +#line 661 "parser.l" { return directive_tok(yyg, IF, NESTED); } @@ -5692,21 +5693,21 @@ YY_LINENO_REWIND_TO(yy_cp - 1); yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 664 "parser.l" +#line 665 "parser.l" { return directive_tok(yyg, ELIF, NESTED); } YY_BREAK case 56: YY_RULE_SETUP -#line 668 "parser.l" +#line 669 "parser.l" { return directive_tok(yyg, ELSE, 0); } YY_BREAK case 57: YY_RULE_SETUP -#line 672 "parser.l" +#line 673 "parser.l" { yy_push_state(BRACED, yyscanner); yylval->lineno = yyextra->lineno; @@ -5715,7 +5716,7 @@ YY_RULE_SETUP YY_BREAK case 58: YY_RULE_SETUP -#line 678 "parser.l" +#line 679 "parser.l" { yy_push_state(NESTED, yyscanner); yylval->lineno = yyextra->lineno; @@ -5724,7 +5725,7 @@ YY_RULE_SETUP YY_BREAK case 59: YY_RULE_SETUP -#line 684 "parser.l" +#line 685 "parser.l" { yylval->lineno = yyextra->lineno; return (opt_compat && opt_compat <= 248) ? OLD_AT : '@'; @@ -5732,7 +5733,7 @@ YY_RULE_SETUP YY_BREAK case 60: YY_RULE_SETUP -#line 689 "parser.l" +#line 690 "parser.l" { yylval->chr = '*'; return SPLICE; @@ -5740,7 +5741,7 @@ YY_RULE_SETUP YY_BREAK case 61: YY_RULE_SETUP -#line 694 "parser.l" +#line 695 "parser.l" { yylval->chr = yytext[0]; return yytext[0]; @@ -5748,7 +5749,7 @@ YY_RULE_SETUP YY_BREAK case 62: YY_RULE_SETUP -#line 699 "parser.l" +#line 700 "parser.l" { yy_pop_state(yyscanner); if (yy_top_state(yyscanner) == INITIAL @@ -5763,7 +5764,7 @@ case 63: yyg->yy_c_buf_p = yy_cp = yy_bp + 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 708 "parser.l" +#line 709 "parser.l" { yyerrorf(yyg, lit("cramped floating-point literal: " "space or 0 needed between ~a and dot."), @@ -5780,7 +5781,7 @@ YY_RULE_SETUP YY_BREAK case 64: YY_RULE_SETUP -#line 723 "parser.l" +#line 724 "parser.l" { yy_pop_state(yyscanner); if (yy_top_state(yyscanner) == INITIAL @@ -5792,14 +5793,14 @@ YY_RULE_SETUP YY_BREAK case 65: YY_RULE_SETUP -#line 732 "parser.l" +#line 733 "parser.l" { /* Eat whitespace in directive */ } YY_BREAK case 66: YY_RULE_SETUP -#line 736 "parser.l" +#line 737 "parser.l" { yy_push_state(STRLIT, yyscanner); return '"'; @@ -5807,7 +5808,7 @@ YY_RULE_SETUP YY_BREAK case 67: YY_RULE_SETUP -#line 741 "parser.l" +#line 742 "parser.l" { yy_push_state(CHRLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5816,7 +5817,7 @@ YY_RULE_SETUP YY_BREAK case 68: YY_RULE_SETUP -#line 747 "parser.l" +#line 748 "parser.l" { yy_push_state(BUFLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5825,7 +5826,7 @@ YY_RULE_SETUP YY_BREAK case 69: YY_RULE_SETUP -#line 753 "parser.l" +#line 754 "parser.l" { yy_push_state(REGEX, yyscanner); yylval->lineno = yyextra->lineno; @@ -5834,7 +5835,7 @@ YY_RULE_SETUP YY_BREAK case 70: YY_RULE_SETUP -#line 759 "parser.l" +#line 760 "parser.l" { yy_push_state(QSILIT, yyscanner); return '`'; @@ -5842,7 +5843,7 @@ YY_RULE_SETUP YY_BREAK case 71: YY_RULE_SETUP -#line 764 "parser.l" +#line 765 "parser.l" { yy_push_state(WLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5851,7 +5852,7 @@ YY_RULE_SETUP YY_BREAK case 72: YY_RULE_SETUP -#line 770 "parser.l" +#line 771 "parser.l" { yy_push_state(WLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5860,7 +5861,7 @@ YY_RULE_SETUP YY_BREAK case 73: YY_RULE_SETUP -#line 776 "parser.l" +#line 777 "parser.l" { yy_push_state(QWLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5869,7 +5870,7 @@ YY_RULE_SETUP YY_BREAK case 74: YY_RULE_SETUP -#line 782 "parser.l" +#line 783 "parser.l" { yy_push_state(QWLIT, yyscanner); yylval->lineno = yyextra->lineno; @@ -5878,14 +5879,14 @@ YY_RULE_SETUP YY_BREAK case 75: YY_RULE_SETUP -#line 788 "parser.l" +#line 789 "parser.l" { return '#'; } YY_BREAK case 76: YY_RULE_SETUP -#line 792 "parser.l" +#line 793 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_H; @@ -5893,7 +5894,7 @@ YY_RULE_SETUP YY_BREAK case 77: YY_RULE_SETUP -#line 797 "parser.l" +#line 798 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_S; @@ -5901,7 +5902,7 @@ YY_RULE_SETUP YY_BREAK case 78: YY_RULE_SETUP -#line 802 "parser.l" +#line 803 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_R; @@ -5909,7 +5910,7 @@ YY_RULE_SETUP YY_BREAK case 79: YY_RULE_SETUP -#line 807 "parser.l" +#line 808 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_N; @@ -5917,7 +5918,7 @@ YY_RULE_SETUP YY_BREAK case 80: YY_RULE_SETUP -#line 812 "parser.l" +#line 813 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_T; @@ -5925,7 +5926,7 @@ YY_RULE_SETUP YY_BREAK case 81: YY_RULE_SETUP -#line 817 "parser.l" +#line 818 "parser.l" { yylval->lineno = yyextra->lineno; yy_push_state(JSON, yyscanner); @@ -5934,7 +5935,7 @@ YY_RULE_SETUP YY_BREAK case 82: YY_RULE_SETUP -#line 823 "parser.l" +#line 824 "parser.l" { yylval->lineno = yyextra->lineno; return HASH_SEMI; @@ -5942,7 +5943,7 @@ YY_RULE_SETUP YY_BREAK case 83: YY_RULE_SETUP -#line 828 "parser.l" +#line 829 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 1); yylval->val = int_str_wc(wtxt, num(10)); @@ -5952,7 +5953,7 @@ YY_RULE_SETUP YY_BREAK case 84: YY_RULE_SETUP -#line 835 "parser.l" +#line 836 "parser.l" { wchar_t *wtxt = utf8_dup_from(yytext + 1); yylval->val = int_str_wc(wtxt, num(10)); @@ -5962,7 +5963,7 @@ YY_RULE_SETUP YY_BREAK case 85: YY_RULE_SETUP -#line 842 "parser.l" +#line 843 "parser.l" { yylval->lineno = yyextra->lineno; return (opt_compat && opt_compat <= 185) ? OLD_DOTDOT : DOTDOT; @@ -5970,7 +5971,7 @@ YY_RULE_SETUP YY_BREAK case 86: YY_RULE_SETUP -#line 847 "parser.l" +#line 848 "parser.l" { yy_pop_state(yyscanner); yylval->lexeme = chk_strdup(L"@"); @@ -5980,14 +5981,14 @@ YY_RULE_SETUP case 87: /* rule 87 can match eol */ YY_RULE_SETUP -#line 853 "parser.l" +#line 854 "parser.l" { yyextra->lineno++; } YY_BREAK case 88: YY_RULE_SETUP -#line 857 "parser.l" +#line 858 "parser.l" { yy_push_state(REGEX, yyscanner); return '/'; @@ -5995,7 +5996,7 @@ YY_RULE_SETUP YY_BREAK case 89: YY_RULE_SETUP -#line 862 "parser.l" +#line 863 "parser.l" { yylval->chr = '.'; return CONSDOT; @@ -6003,7 +6004,7 @@ YY_RULE_SETUP YY_BREAK case 90: YY_RULE_SETUP -#line 867 "parser.l" +#line 868 "parser.l" { yylval->chr = '.'; return LAMBDOT; @@ -6011,7 +6012,7 @@ YY_RULE_SETUP YY_BREAK case 91: YY_RULE_SETUP -#line 872 "parser.l" +#line 873 "parser.l" { yylval->chr = '.'; return UREFDOT; @@ -6019,7 +6020,7 @@ YY_RULE_SETUP YY_BREAK case 92: YY_RULE_SETUP -#line 877 "parser.l" +#line 878 "parser.l" { yylval->chr = '.'; return '.'; @@ -6027,7 +6028,7 @@ YY_RULE_SETUP YY_BREAK case 93: YY_RULE_SETUP -#line 882 "parser.l" +#line 883 "parser.l" { yylval->chr = '.'; return OREFDOT; @@ -6035,7 +6036,7 @@ YY_RULE_SETUP YY_BREAK case 94: YY_RULE_SETUP -#line 887 "parser.l" +#line 888 "parser.l" { yylval->chr = '.'; return UOREFDOT; @@ -6044,7 +6045,7 @@ YY_RULE_SETUP case 95: /* rule 95 can match eol */ YY_RULE_SETUP -#line 892 "parser.l" +#line 893 "parser.l" { if (YYSTATE == SPECIAL) yy_pop_state(yyscanner); /* @\ continuation */ @@ -6053,7 +6054,7 @@ YY_RULE_SETUP YY_BREAK case 96: YY_RULE_SETUP -#line 898 "parser.l" +#line 899 "parser.l" { wchar_t lexeme[2]; lexeme[0] = char_esc(yytext[1]); @@ -6065,7 +6066,7 @@ YY_RULE_SETUP YY_BREAK case 97: YY_RULE_SETUP -#line 907 "parser.l" +#line 908 "parser.l" { wchar_t lexeme[2]; lexeme[0] = num_esc(yyg, yytext + 1); @@ -6084,28 +6085,28 @@ YY_RULE_SETUP YY_BREAK case 98: YY_RULE_SETUP -#line 923 "parser.l" +#line 924 "parser.l" { yyerrorf(yyg, lit("\\x escape without digits"), nao); } YY_BREAK case 99: YY_RULE_SETUP -#line 927 "parser.l" +#line 928 "parser.l" { yyerrorf(yyg, lit("unrecognized escape \\~a"), chr(yytext[1]), nao); } YY_BREAK case 100: YY_RULE_SETUP -#line 931 "parser.l" +#line 932 "parser.l" { /* comment */ } YY_BREAK case 101: YY_RULE_SETUP -#line 935 "parser.l" +#line 936 "parser.l" { val ch = chr_str(string_utf8(yytext), zero); if (chr_isspace(ch)) @@ -6125,7 +6126,7 @@ YY_RULE_SETUP YY_BREAK case 102: YY_RULE_SETUP -#line 952 "parser.l" +#line 953 "parser.l" { yyerrprepf(yyg, lit("non-UTF-8 byte #x~02x in directive"), num(convert(unsigned char, yytext[0])), nao); @@ -6134,7 +6135,7 @@ YY_RULE_SETUP YY_BREAK case 103: YY_RULE_SETUP -#line 958 "parser.l" +#line 959 "parser.l" { yylval->chr = '/'; return (YYSTATE == SREGEX) ? REGCHAR : '/'; @@ -6142,7 +6143,7 @@ YY_RULE_SETUP YY_BREAK case 104: YY_RULE_SETUP -#line 963 "parser.l" +#line 964 "parser.l" { yylval->chr = char_esc(yytext[1]); return REGCHAR; @@ -6150,7 +6151,7 @@ YY_RULE_SETUP YY_BREAK case 105: YY_RULE_SETUP -#line 968 "parser.l" +#line 969 "parser.l" { yylval->chr = num_esc(yyg, yytext + 1); return REGCHAR; @@ -6158,7 +6159,7 @@ YY_RULE_SETUP YY_BREAK case 106: YY_RULE_SETUP -#line 973 "parser.l" +#line 974 "parser.l" { yylval->chr = yytext[1]; return REGTOKEN; @@ -6167,7 +6168,7 @@ YY_RULE_SETUP case 107: /* rule 107 can match eol */ YY_RULE_SETUP -#line 978 "parser.l" +#line 979 "parser.l" { yyextra->lineno++; } @@ -6175,7 +6176,7 @@ YY_RULE_SETUP case 108: /* rule 108 can match eol */ YY_RULE_SETUP -#line 982 "parser.l" +#line 983 "parser.l" { yyextra->lineno++; yyerrprepf(yyg, lit("newline in regex"), nao); @@ -6185,7 +6186,7 @@ YY_RULE_SETUP case 109: /* rule 109 can match eol */ YY_RULE_SETUP -#line 988 "parser.l" +#line 989 "parser.l" { yyextra->lineno++; yylval->chr = yytext[0]; @@ -6194,7 +6195,7 @@ YY_RULE_SETUP YY_BREAK case 110: YY_RULE_SETUP -#line 994 "parser.l" +#line 995 "parser.l" { yylval->chr = yytext[0]; return yytext[0]; @@ -6202,7 +6203,7 @@ YY_RULE_SETUP YY_BREAK case 111: YY_RULE_SETUP -#line 999 "parser.l" +#line 1000 "parser.l" { yylval->chr = yytext[1]; return REGCHAR; @@ -6210,7 +6211,7 @@ YY_RULE_SETUP YY_BREAK case 112: YY_RULE_SETUP -#line 1004 "parser.l" +#line 1005 "parser.l" { if (opt_compat && opt_compat <= 105) { yylval->chr = yytext[1]; @@ -6226,7 +6227,7 @@ YY_RULE_SETUP YY_BREAK case 113: YY_RULE_SETUP -#line 1017 "parser.l" +#line 1018 "parser.l" { yyerrprepf(yyg, lit("dangling backslash in regex"), nao); return ERRTOK; @@ -6234,7 +6235,7 @@ YY_RULE_SETUP YY_BREAK case 114: YY_RULE_SETUP -#line 1022 "parser.l" +#line 1023 "parser.l" { wchar_t wchr[8]; if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { @@ -6247,7 +6248,7 @@ YY_RULE_SETUP YY_BREAK case 115: YY_RULE_SETUP -#line 1032 "parser.l" +#line 1033 "parser.l" { yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; return REGCHAR; @@ -6255,7 +6256,7 @@ YY_RULE_SETUP YY_BREAK case 116: YY_RULE_SETUP -#line 1037 "parser.l" +#line 1038 "parser.l" { yylval->lexeme = utf8_dup_from(yytext); return SPACE; @@ -6263,7 +6264,7 @@ YY_RULE_SETUP YY_BREAK case 117: YY_RULE_SETUP -#line 1042 "parser.l" +#line 1043 "parser.l" { yylval->lexeme = utf8_dup_from(yytext); return TEXT; @@ -6272,7 +6273,7 @@ YY_RULE_SETUP case 118: /* rule 118 can match eol */ YY_RULE_SETUP -#line 1047 "parser.l" +#line 1048 "parser.l" { yyextra->lineno++; return '\n'; @@ -6280,7 +6281,7 @@ YY_RULE_SETUP YY_BREAK case 119: YY_RULE_SETUP -#line 1052 "parser.l" +#line 1053 "parser.l" { yy_push_state(SPECIAL, yyscanner); return '*'; @@ -6288,7 +6289,7 @@ YY_RULE_SETUP YY_BREAK case 120: YY_RULE_SETUP -#line 1057 "parser.l" +#line 1058 "parser.l" { yy_push_state(SPECIAL, yyscanner); } @@ -6296,7 +6297,7 @@ YY_RULE_SETUP case 121: /* rule 121 can match eol */ YY_RULE_SETUP -#line 1061 "parser.l" +#line 1062 "parser.l" { /* eat whole line comment */ yyextra->lineno++; @@ -6304,14 +6305,14 @@ YY_RULE_SETUP YY_BREAK case 122: YY_RULE_SETUP -#line 1066 "parser.l" +#line 1067 "parser.l" { /* comment to end of line */ } YY_BREAK case 123: YY_RULE_SETUP -#line 1070 "parser.l" +#line 1071 "parser.l" { yy_pop_state(yyscanner); return yytext[0]; @@ -6319,7 +6320,7 @@ YY_RULE_SETUP YY_BREAK case 124: YY_RULE_SETUP -#line 1075 "parser.l" +#line 1076 "parser.l" { yy_pop_state(yyscanner); return yytext[0]; @@ -6327,7 +6328,7 @@ YY_RULE_SETUP YY_BREAK case 125: YY_RULE_SETUP -#line 1080 "parser.l" +#line 1081 "parser.l" { yylval->chr = char_esc(yytext[1]); return LITCHAR; @@ -6335,7 +6336,7 @@ YY_RULE_SETUP YY_BREAK case 126: YY_RULE_SETUP -#line 1085 "parser.l" +#line 1086 "parser.l" { yylval->chr = char_esc(yytext[1]); return LITCHAR; @@ -6344,7 +6345,7 @@ YY_RULE_SETUP case 127: /* rule 127 can match eol */ YY_RULE_SETUP -#line 1090 "parser.l" +#line 1091 "parser.l" { yyextra->lineno++; } @@ -6352,7 +6353,7 @@ YY_RULE_SETUP case 128: /* rule 128 can match eol */ YY_RULE_SETUP -#line 1094 "parser.l" +#line 1095 "parser.l" { yyextra->lineno++; @@ -6362,7 +6363,7 @@ YY_RULE_SETUP YY_BREAK case 129: YY_RULE_SETUP -#line 1102 "parser.l" +#line 1103 "parser.l" { yylval->chr = num_esc(yyg, yytext+1); return LITCHAR; @@ -6370,21 +6371,21 @@ YY_RULE_SETUP YY_BREAK case 130: YY_RULE_SETUP -#line 1107 "parser.l" +#line 1108 "parser.l" { yyerrorf(yyg, lit("\\x escape without digits"), nao); } YY_BREAK case 131: YY_RULE_SETUP -#line 1111 "parser.l" +#line 1112 "parser.l" { yyerrorf(yyg, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } YY_BREAK case 132: YY_RULE_SETUP -#line 1115 "parser.l" +#line 1116 "parser.l" { yylval->chr = num_esc(yyg, yytext); return LITCHAR; @@ -6392,7 +6393,7 @@ YY_RULE_SETUP YY_BREAK case 133: YY_RULE_SETUP -#line 1120 "parser.l" +#line 1121 "parser.l" { yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; @@ -6400,7 +6401,7 @@ YY_RULE_SETUP YY_BREAK case 134: YY_RULE_SETUP -#line 1125 "parser.l" +#line 1126 "parser.l" { yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; /* hack */ @@ -6409,7 +6410,7 @@ YY_RULE_SETUP case 135: /* rule 135 can match eol */ YY_RULE_SETUP -#line 1130 "parser.l" +#line 1131 "parser.l" { yyerrprepf(yyg, lit("newline in string literal"), nao); yyextra->lineno++; @@ -6420,7 +6421,7 @@ YY_RULE_SETUP case 136: /* rule 136 can match eol */ YY_RULE_SETUP -#line 1137 "parser.l" +#line 1138 "parser.l" { yyerrprepf(yyg, lit("newline in character literal"), nao); yyextra->lineno++; @@ -6431,7 +6432,7 @@ YY_RULE_SETUP case 137: /* rule 137 can match eol */ YY_RULE_SETUP -#line 1144 "parser.l" +#line 1145 "parser.l" { yyerrprepf(yyg, lit("newline in string quasiliteral"), nao); yyextra->lineno++; @@ -6442,7 +6443,7 @@ YY_RULE_SETUP case 138: /* rule 138 can match eol */ YY_RULE_SETUP -#line 1151 "parser.l" +#line 1152 "parser.l" { yyextra->lineno++; @@ -6456,7 +6457,7 @@ YY_RULE_SETUP YY_BREAK case 139: YY_RULE_SETUP -#line 1162 "parser.l" +#line 1163 "parser.l" { yy_push_state(QSPECIAL, yyscanner); yylval->val = nil; @@ -6469,7 +6470,7 @@ YY_RULE_SETUP YY_BREAK case 140: YY_RULE_SETUP -#line 1172 "parser.l" +#line 1173 "parser.l" { yyerrprepf(yyg, lit("bad format string after @ in quasiliteral"), nao); return ERRTOK; @@ -6477,7 +6478,7 @@ YY_RULE_SETUP YY_BREAK case 141: YY_RULE_SETUP -#line 1177 "parser.l" +#line 1178 "parser.l" { yyerrprepf(yyg, lit("malformed @ expression in quasiliteral"), nao); return ERRTOK; @@ -6485,14 +6486,14 @@ YY_RULE_SETUP YY_BREAK case 142: YY_RULE_SETUP -#line 1182 "parser.l" +#line 1183 "parser.l" { return ' '; } YY_BREAK case 143: YY_RULE_SETUP -#line 1186 "parser.l" +#line 1187 "parser.l" { yy_pop_state(yyscanner); return yytext[0]; @@ -6500,7 +6501,7 @@ YY_RULE_SETUP YY_BREAK case 144: YY_RULE_SETUP -#line 1191 "parser.l" +#line 1192 "parser.l" { yylval->chr = char_esc(yytext[1]); return LITCHAR; @@ -6508,7 +6509,7 @@ YY_RULE_SETUP YY_BREAK case 145: YY_RULE_SETUP -#line 1196 "parser.l" +#line 1197 "parser.l" { wchar_t ch0, ch1; yytext[6] = 0; @@ -6520,7 +6521,7 @@ YY_RULE_SETUP YY_BREAK case 146: YY_RULE_SETUP -#line 1205 "parser.l" +#line 1206 "parser.l" { wchar_t ch = num_esc(yyg, yytext + 1); yylval->chr = if3(ch, ch, 0xDC00); @@ -6529,14 +6530,14 @@ YY_RULE_SETUP YY_BREAK case 147: YY_RULE_SETUP -#line 1211 "parser.l" +#line 1212 "parser.l" { yyerrorf(yyg, lit("JSON \\u escape needs four digits"), nao); } YY_BREAK case 148: YY_RULE_SETUP -#line 1215 "parser.l" +#line 1216 "parser.l" { yyerrorf(yyg, lit("unrecognized JSON escape: \\~a"), chr(yytext[1]), nao); } @@ -6544,7 +6545,7 @@ YY_RULE_SETUP case 149: /* rule 149 can match eol */ YY_RULE_SETUP -#line 1219 "parser.l" +#line 1220 "parser.l" { yyerrprepf(yyg, lit("newline in JSON string"), nao); yyextra->lineno++; @@ -6554,7 +6555,7 @@ YY_RULE_SETUP YY_BREAK case 150: YY_RULE_SETUP -#line 1226 "parser.l" +#line 1227 "parser.l" { wchar_t wchr[8]; if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { @@ -6567,7 +6568,7 @@ YY_RULE_SETUP YY_BREAK case 151: YY_RULE_SETUP -#line 1236 "parser.l" +#line 1237 "parser.l" { yylval->chr = strtol(yytext, 0, 16); return LITCHAR; @@ -6575,28 +6576,28 @@ YY_RULE_SETUP YY_BREAK case 152: YY_RULE_SETUP -#line 1241 "parser.l" +#line 1242 "parser.l" { return '\''; } YY_BREAK case 153: YY_RULE_SETUP -#line 1245 "parser.l" +#line 1246 "parser.l" { } YY_BREAK case 154: /* rule 154 can match eol */ YY_RULE_SETUP -#line 1248 "parser.l" +#line 1249 "parser.l" { yyextra->lineno++; } YY_BREAK case 155: YY_RULE_SETUP -#line 1252 "parser.l" +#line 1253 "parser.l" { yyerrorf(yyg, lit("bad character ~s in buffer literal"), chr(yytext[0]), nao); @@ -6604,7 +6605,7 @@ YY_RULE_SETUP YY_BREAK case 156: YY_RULE_SETUP -#line 1257 "parser.l" +#line 1258 "parser.l" { yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; return LITCHAR; @@ -6612,7 +6613,7 @@ YY_RULE_SETUP YY_BREAK case 157: YY_RULE_SETUP -#line 1262 "parser.l" +#line 1263 "parser.l" { if (yyextra->read_json_int && !strpbrk(yytext, ".eE")) { wchar_t *wtxt = utf8_dup_from(yytext); @@ -6631,7 +6632,7 @@ YY_LINENO_REWIND_TO(yy_bp + 4); yyg->yy_c_buf_p = yy_cp = yy_bp + 4; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 1273 "parser.l" +#line 1274 "parser.l" { yylval->val = t; return JSKW; @@ -6644,7 +6645,7 @@ YY_LINENO_REWIND_TO(yy_bp + 5); yyg->yy_c_buf_p = yy_cp = yy_bp + 5; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 1278 "parser.l" +#line 1279 "parser.l" { yylval->val = nil; return JSKW; @@ -6657,7 +6658,7 @@ YY_LINENO_REWIND_TO(yy_bp + 4); yyg->yy_c_buf_p = yy_cp = yy_bp + 4; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 1283 "parser.l" +#line 1284 "parser.l" { yylval->val = null_s; return JSKW; @@ -6665,7 +6666,7 @@ YY_RULE_SETUP YY_BREAK case 161: YY_RULE_SETUP -#line 1288 "parser.l" +#line 1289 "parser.l" { if (strcmp("true", yytext) == 0) { yylval->val = t; @@ -6690,7 +6691,7 @@ YY_RULE_SETUP YY_BREAK case 162: YY_RULE_SETUP -#line 1310 "parser.l" +#line 1311 "parser.l" { yy_push_state(JLIT, yyscanner); return yytext[0]; @@ -6698,7 +6699,7 @@ YY_RULE_SETUP YY_BREAK case 163: YY_RULE_SETUP -#line 1315 "parser.l" +#line 1316 "parser.l" { yy_push_state(JMARKER, yyscanner); yy_push_state(NESTED, yyscanner); @@ -6707,7 +6708,7 @@ YY_RULE_SETUP YY_BREAK case 164: YY_RULE_SETUP -#line 1321 "parser.l" +#line 1322 "parser.l" { yy_push_state(JMARKER, yyscanner); yy_push_state(NESTED, yyscanner); @@ -6716,7 +6717,7 @@ YY_RULE_SETUP YY_BREAK case 165: YY_RULE_SETUP -#line 1327 "parser.l" +#line 1328 "parser.l" { return yytext[0]; } @@ -6724,20 +6725,20 @@ YY_RULE_SETUP case 166: /* rule 166 can match eol */ YY_RULE_SETUP -#line 1331 "parser.l" +#line 1332 "parser.l" { yyextra->lineno++; } YY_BREAK case 167: YY_RULE_SETUP -#line 1335 "parser.l" +#line 1336 "parser.l" { } YY_BREAK case 168: YY_RULE_SETUP -#line 1338 "parser.l" +#line 1339 "parser.l" { yyerrorf(yyg, lit("bad character ~s in JSON literal"), chr(yytext[0]), nao); @@ -6745,17 +6746,17 @@ YY_RULE_SETUP YY_BREAK case 169: YY_RULE_SETUP -#line 1343 "parser.l" +#line 1344 "parser.l" { internal_error("scanner processed input JMARKER state"); } YY_BREAK case 170: YY_RULE_SETUP -#line 1347 "parser.l" +#line 1348 "parser.l" ECHO; YY_BREAK -#line 6759 "lex.yy.c" +#line 6760 "lex.yy.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(SPECIAL): case YY_STATE_EOF(BRACED): @@ -7968,7 +7969,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 1347 "parser.l" +#line 1348 "parser.l" static int directive_tok(scanner_t *yyscanner, int tok, int state) @@ -8179,6 +8180,24 @@ void scrub_scanner(scanner_t *yyg, int yy_char, wchar_t *lexeme) } } +int scanner_has_buffered_bytes(scanner_t *yyg) +{ + YY_BUFFER_STATE bs = YY_CURRENT_BUFFER; + return bs ? (bs->yy_ch_buf + yyg->yy_n_chars - yyg->yy_c_buf_p) > 0 : 0; +} + +unsigned char *scanner_get_buffered_bytes(scanner_t *yyg, size_t *psize) +{ + YY_BUFFER_STATE bs = YY_CURRENT_BUFFER; + ptrdiff_t size = bs->yy_ch_buf + yyg->yy_n_chars - yyg->yy_c_buf_p; + unsigned char *ret = chk_copy_obj(coerce(mem_t *, yyg->yy_c_buf_p), + convert(size_t, size)); + ret[0] = yyg->yy_hold_char; + *psize = size; + yy_flush_buffer(bs, yyg); + return ret; +} + void parser_l_init(void) { prot1(&form_to_ln_hash); diff --git a/parser.c b/parser.c index adc67af7..1faa0578 100644 --- a/parser.c +++ b/parser.c @@ -75,6 +75,8 @@ #include "txr.h" #include "linenoise/linenoise.h" +#define SHADOW_TAB_SIZE 16 + val parser_s, unique_s, circref_s; val listener_hist_len_s, listener_multi_line_p_s, listener_sel_inclusive_p_s; val listener_pprint_s, listener_greedy_eval_s, listener_auto_compound_s; @@ -90,6 +92,22 @@ static int repl_level = 0; static val stream_parser_hash, catch_all, catch_error; +static struct shadow_ops_map { + struct strm_ops *from, *to; +} shadow_tab[SHADOW_TAB_SIZE]; + +struct shadow_context { + struct strm_ops *orig_ops; + val stream; + unsigned char *buf; + size_t size, index; +}; + +struct shadow_ungetch { + unsigned char buf[8]; + unsigned char *ptr; +}; + static void yy_tok_mark(struct yy_token *tok) { gc_conservative_mark(tok->yy_lval.val); @@ -749,6 +767,7 @@ static val lisp_parse_impl(val self, enum prime_parser prime, parse(pi, if3(std_error != std_null, name, null_string), prime); mut(parser); gc_state(gc); + parser_shadow_stream_attach(self, input_stream); if (pi->syntax_tree == nao && pi->errors == 0 && !pi->eof) continue; @@ -2089,6 +2108,257 @@ static val me_json(val form, val menv) return cdr(form); } +static struct strm_ops *shadow_detach(val stream) +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + + if (sc) { + s->shadow_obj = 0; + stream->co.ops = coerce(struct cobj_ops *, ops = sc->orig_ops); + free (sc); + } + + return ops; +} + +static void shadow_destroy_op(val stream) +{ + struct strm_ops *ops = shadow_detach(stream); + ops->cobj_ops.destroy(stream); +} + +static void shadow_mark_op(val stream) +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + sc->orig_ops->cobj_ops.mark(stream); +} + +static val shadow_put_string(val stream, val str) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->put_string(stream, str); +} + +static val shadow_put_char(val stream, val ch) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->put_char(stream, ch); +} + +static val shadow_put_byte(val stream, int byte) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->put_byte(stream, byte); +} + +static int shadow_get_char_callback(mem_t *ctx) +{ + struct strm_base *s = coerce(struct strm_base *, ctx); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + if (sc->index < sc->size) { + return sc->buf[sc->index++]; + } else { + val byte = sc->orig_ops->get_byte(sc->stream); + return byte ? c_n(byte) : EOF; + } +} + +static val shadow_get_char(val stream) +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + wint_t wch = utf8_decode(&s->ud, shadow_get_char_callback, + coerce(mem_t *, s)); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + struct strm_ops *ops = sc->index < sc->size + ? coerce(struct strm_ops *, stream->co.ops) + : shadow_detach(stream); + int ch; + + if (ops->unget_byte) + while ((ch = utf8_getc(&s->ud)) != EOF) + ops->unget_byte(stream, ch); + + return (wch != WEOF) ? chr(wch) : nil; +} + +static int shadow_unget_char_callback(int ch, mem_t *ctx) +{ + struct shadow_ungetch *su = coerce(struct shadow_ungetch *, ctx); + return (su->ptr > su->buf) ? *--su->ptr = ch, 1 : 0; +} + +static val shadow_unget_char(val stream, val ch) + +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + struct shadow_ungetch su; + unsigned char *bend = su.buf + sizeof su.buf; + + su.ptr = bend; + + (void) utf8_encode(c_chr(ch), shadow_unget_char_callback, coerce(mem_t *, &su)); + + if (convert(size_t, bend - su.ptr) > sc->index) + uw_throwf(file_error_s, + lit("unget-char: cannot push past beginning of byte stream"), + nao); + + while (su.ptr < bend) + sc->buf[--sc->index] = *su.ptr++; + + return ch; +} + +static val shadow_get_byte(val stream) +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + + if (sc->index < sc->size) { + return num(sc->buf[sc->index++]); + } else { + struct strm_ops *ops = shadow_detach(sc->stream); + return ops->get_byte(stream); + } +} + +static val shadow_unget_byte(val stream, int byte) +{ + struct strm_base *s = coerce(struct strm_base *, stream->co.handle); + struct shadow_context *sc = coerce(struct shadow_context *, s->shadow_obj); + + if (sc->index == 0) + uw_throwf(file_error_s, + lit("unget-byte: cannot push past beginning of byte stream"), + nao); + + sc->buf[--sc->index] = byte; + return num_fast(byte); +} + +static ucnum shadow_put_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->put_buf(stream, ptr, len, pos); +} + +static val shadow_close(val stream, val throw_on_error) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->close(stream, throw_on_error); +} + +static val shadow_flush(val stream) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->flush(stream); +} + +static val shadow_seek(val stream, val off, enum strm_whence whence) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->seek(stream, off, whence); +} + +static val shadow_truncate(val stream, val len) +{ + struct strm_ops *ops = shadow_detach(stream); + return ops->truncate(stream, len); +} + +static struct strm_ops shadow_ops_template = + strm_ops_init(cobj_ops_init(eq, + stream_print_op, + shadow_destroy_op, + shadow_mark_op, + cobj_eq_hash_op, + 0), + wli("shadow-stream"), + shadow_put_string, shadow_put_char, shadow_put_byte, + generic_get_line, shadow_get_char, shadow_get_byte, + shadow_unget_char, shadow_unget_byte, + shadow_put_buf, generic_fill_buf, + shadow_close, shadow_flush, shadow_seek, shadow_truncate, + 0, 0, + 0, 0, 0, 0); + +static void customize_shad_ops(struct strm_ops *shad_ops, + struct strm_ops *ops) +{ + void (**spfn)(void) = coerce(void (**)(void), &ops->put_string); + void (**send)(void) = coerce(void (**)(void), &ops->set_sock_peer); + void (**dpfn)(void) = coerce(void (**)(void), &shad_ops->put_string); + + while (spfn <= send) { + if (*spfn == 0) + *dpfn = 0; + else if (*dpfn == 0) + *dpfn = *spfn; + spfn++; + dpfn++; + } +} + +/* + * The purpose of this oddly named shadow stream is to claw back + * data which has been read from the stream and placed into a buffer + * inside our flex-generated scanner. The shadow stream patches + * into the target stream, replacing its operations with its own. + * When the shadow stream is done doling out the recovered bytes, + * it uninstalls itself via shadow_detach. Output and positioning + * operations also trigger a detach. + */ +void parser_shadow_stream_attach(val self, val target_stream) +{ + struct strm_base *s = coerce(struct strm_base *, + cobj_handle(self, target_stream, stream_cls)); + struct strm_ops *ops = coerce(struct strm_ops *, target_stream->co.ops); + + if (!s->shadow_obj) { + val parser = gethash(stream_parser_hash, target_stream); + parser_t *p = parser ? coerce(parser_t *, cobj_handle(self, parser, parser_cls)) : 0; + scanner_t *sc = p ? p->scanner : 0; + struct strm_ops *shad_ops = 0; + struct shadow_context *shad = 0; + int i; + + if (!p || !sc || !scanner_has_buffered_bytes(sc)) + return; + + for (i = 0; i < SHADOW_TAB_SIZE; i++) { + if (shadow_tab[i].from == 0) + break; + if (shadow_tab[i].from == ops) { + shad_ops = shadow_tab[i].to; + break; + } + } + + if (i >= SHADOW_TAB_SIZE) + internal_error("shadow_tab overflow"); + + if (shad_ops == 0) { + shad_ops = coerce(struct strm_ops *, + chk_copy_obj(coerce(mem_t *, &shadow_ops_template), + sizeof shadow_ops_template)); + customize_shad_ops(shad_ops, ops); + } + + shad = coerce(struct shadow_context *, chk_malloc(sizeof *shad)); + shad->orig_ops = ops; + shad->stream = target_stream; + shad->buf = scanner_get_buffered_bytes(sc, &shad->size); + shad->index = 0; + + s->shadow_obj = coerce(mem_t *, shad); + target_stream->co.ops = coerce(struct cobj_ops *, shad_ops); + } +} + void parse_init(void) { parser_s = intern(lit("parser"), user_package); @@ -2136,3 +2406,12 @@ void parse_init(void) reg_fun(intern(lit("repl"), system_package), func_n4(repl)); reg_mac(json_s, func_n2(me_json)); } + +void parser_free_all(void) +{ + int i; + for (i = 0; i < SHADOW_TAB_SIZE; i++) { + free(shadow_tab[i].to); + shadow_tab[i].to = 0; + } +} diff --git a/parser.h b/parser.h index c25dc6f9..4dd55cc9 100644 --- a/parser.h +++ b/parser.h @@ -115,6 +115,8 @@ void parser_resolve_circ(parser_t *); void parser_circ_def(parser_t *, val num, val expr); val parser_circ_ref(parser_t *, val num); void scrub_scanner(scanner_t *, int yy_char, wchar_t *lexeme); +int scanner_has_buffered_bytes(scanner_t *); +unsigned char *scanner_get_buffered_bytes(scanner_t *, size_t *); int parse_once(val self, val stream, val name); int parse(parser_t *parser, val name, enum prime_parser); val source_loc(val form); @@ -153,4 +155,6 @@ val ensure_parser(val stream, val name); val parser_set_lineno(val self, val stream, val lineno); val parser_errors(val parser); val parse_errors(val stream); +void parser_shadow_stream_attach(val self, val target_stream); void parse_init(void); +void parser_free_all(void); diff --git a/parser.l b/parser.l index 67481d22..588362f9 100644 --- a/parser.l +++ b/parser.l @@ -28,6 +28,7 @@ %{ +#include #include #include #include @@ -1554,6 +1555,24 @@ void scrub_scanner(scanner_t *yyg, int yy_char, wchar_t *lexeme) } } +int scanner_has_buffered_bytes(scanner_t *yyg) +{ + YY_BUFFER_STATE bs = YY_CURRENT_BUFFER; + return bs ? (bs->yy_ch_buf + yyg->yy_n_chars - yyg->yy_c_buf_p) > 0 : 0; +} + +unsigned char *scanner_get_buffered_bytes(scanner_t *yyg, size_t *psize) +{ + YY_BUFFER_STATE bs = YY_CURRENT_BUFFER; + ptrdiff_t size = bs->yy_ch_buf + yyg->yy_n_chars - yyg->yy_c_buf_p; + unsigned char *ret = chk_copy_obj(coerce(mem_t *, yyg->yy_c_buf_p), + convert(size_t, size)); + ret[0] = yyg->yy_hold_char; + *psize = size; + yy_flush_buffer(bs, yyg); + return ret; +} + void parser_l_init(void) { prot1(&form_to_ln_hash); diff --git a/stream.c b/stream.c index e19438d8..e7558c91 100644 --- a/stream.c +++ b/stream.c @@ -121,7 +121,7 @@ static val shell, shell_arg; void strm_base_init(struct strm_base *s) { static struct strm_base init = { - indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0, utf8_decoder_initializer + indent_off, 60, 10, 0, 0, 0, 0, 0, nil, 0, 0, utf8_decoder_initializer }; *s = init; } @@ -379,13 +379,13 @@ static ucnum generic_put_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) return i; } -static ucnum generic_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) +ucnum generic_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos) { val self = lit("fill-buf"); - struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); ucnum i; for (i = pos; i < len; i++) { + struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); val byte = ops->get_byte(stream); if (!byte) break; @@ -832,7 +832,6 @@ static val stdio_get_fd(val stream) val generic_get_line(val stream) { - struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); const size_t min_size = 512; size_t size = 0; size_t fill = 0; @@ -842,6 +841,7 @@ val generic_get_line(val stream) uw_simple_catch_begin; for (;;) { + struct strm_ops *ops = coerce(struct strm_ops *, stream->co.ops); val chr = ops->get_char(stream); wint_t ch = chr ? convert(wint_t, c_chr(chr)) : WEOF; diff --git a/stream.h b/stream.h index 65f0a336..0f207c65 100644 --- a/stream.h +++ b/stream.h @@ -60,6 +60,7 @@ struct strm_base { cnum max_depth; val close_result; struct strm_ctx *ctx; + mem_t *shadow_obj; utf8_decoder_t ud; }; @@ -196,6 +197,7 @@ val normalize_mode(struct stdio_mode *m, val mode_str, val normalize_mode_no_bin(struct stdio_mode *m, val mode_str, struct stdio_mode m_dfl, val self); val set_mode_props(const struct stdio_mode m, val stream); +ucnum generic_fill_buf(val stream, mem_t *ptr, ucnum len, ucnum pos); val generic_get_line(val stream); val errno_to_string(val err); val make_null_stream(void); diff --git a/tests/018/streams.tl b/tests/018/streams.tl index 1350131f..82212f74 100644 --- a/tests/018/streams.tl +++ b/tests/018/streams.tl @@ -72,3 +72,8 @@ (get-byte s) #xe3 (get-char s) #\xdc81 (unget-char #\x3042 s) :error)) + +(with-in-string-byte-stream (s "[1][foo]") + (mtest + (get-json s) #(1.0) + (get-string s) "[foo]")) diff --git a/txr.c b/txr.c index 97fc5bdb..2b1ab1f1 100644 --- a/txr.c +++ b/txr.c @@ -419,6 +419,7 @@ static void free_all(void) regex_free_all(); gc_free_all(); arith_free_all(); + parser_free_all(); free(progname); } } -- cgit v1.2.3