From 454ae7c0f350842ab40a30ff4a2643cd76e8e277 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 27 Apr 2015 18:26:38 +0300 Subject: Fix to Jan 7 2015 regexp parsing fix. --- ChangeLog | 5 +++++ awkgram.c | 4 ++-- awkgram.y | 4 ++-- test/ChangeLog | 6 ++++++ test/Makefile.am | 5 ++++- test/Makefile.in | 10 +++++++++- test/Maketests | 5 +++++ test/regexpbrack2.awk | 2 ++ test/regexpbrack2.in | 2 ++ test/regexpbrack2.ok | 2 ++ 10 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 test/regexpbrack2.awk create mode 100644 test/regexpbrack2.in create mode 100644 test/regexpbrack2.ok diff --git a/ChangeLog b/ChangeLog index 3696d136..e5d473c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-04-27 Arnold D. Robbins + + * awkgram.y (yylex): Make change of Jan 7 for parsing regexps + work better. Thanks to Nelson Beebe. + 2015-04-26 Arnold D. Robbins * dfa.c: Sync with grep. diff --git a/awkgram.c b/awkgram.c index 4fd53ce0..530aa27c 100644 --- a/awkgram.c +++ b/awkgram.c @@ -5388,8 +5388,8 @@ yylex(void) pushback(); break; case ']': - if (tok[-1] == '[' - || (tok[-2] == '[' && tok[-1] == '^')) + if ((tok[-1] == '[' && tok[-2] != '\\') + || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) /* do nothing */; else in_brack--; diff --git a/awkgram.y b/awkgram.y index fad2b963..31751e8e 100644 --- a/awkgram.y +++ b/awkgram.y @@ -3049,8 +3049,8 @@ yylex(void) pushback(); break; case ']': - if (tok[-1] == '[' - || (tok[-2] == '[' && tok[-1] == '^')) + if ((tok[-1] == '[' && tok[-2] != '\\') + || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) /* do nothing */; else in_brack--; diff --git a/test/ChangeLog b/test/ChangeLog index 201f8a32..cde85f80 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,9 @@ +2015-04-27 Arnold D. Robbins + + * Makefile.am (regexpbrack2): New test. + * regexpbrack2.awk, regexpbrack2.in, regexpbrack2.ok: New files. + Thanks to Nelson Beebe. + 2015-04-14 Arnold D. Robbins * indirectbuiltin.awk: Add another test (gensub 3 args). diff --git a/test/Makefile.am b/test/Makefile.am index e2ccdc56..bf1d073c 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -768,6 +768,9 @@ EXTRA_DIST = \ regexpbrack.awk \ regexpbrack.in \ regexpbrack.ok \ + regexpbrack2.awk \ + regexpbrack2.in \ + regexpbrack2.ok \ regexprange.awk \ regexprange.ok \ reginttrad.awk \ @@ -1026,7 +1029,7 @@ BASIC_TESTS = \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regexpbrack regexprange regrange reindops \ + rand range1 rebt8b1 redfilnm regeq regexpbrack regexpbrack2 regexprange regrange reindops \ reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty sortglos splitargv splitarr splitdef \ diff --git a/test/Makefile.in b/test/Makefile.in index 6ed035fd..0cd10604 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -1025,6 +1025,9 @@ EXTRA_DIST = \ regexpbrack.awk \ regexpbrack.in \ regexpbrack.ok \ + regexpbrack2.awk \ + regexpbrack2.in \ + regexpbrack2.ok \ regexprange.awk \ regexprange.ok \ reginttrad.awk \ @@ -1282,7 +1285,7 @@ BASIC_TESTS = \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regexpbrack regexprange regrange reindops \ + rand range1 rebt8b1 redfilnm regeq regexpbrack regexpbrack2 regexprange regrange reindops \ reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty sortglos splitargv splitarr splitdef \ @@ -3212,6 +3215,11 @@ regexpbrack: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regexpbrack2: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regexprange: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index 5ee49061..e9c89852 100644 --- a/test/Maketests +++ b/test/Maketests @@ -712,6 +712,11 @@ regexpbrack: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regexpbrack2: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regexprange: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/regexpbrack2.awk b/test/regexpbrack2.awk new file mode 100644 index 00000000..81424844 --- /dev/null +++ b/test/regexpbrack2.awk @@ -0,0 +1,2 @@ +NR == 1 { gsub(/\\\\[;?!,()<>|+@%\]\[]/, " ") ; print "\"" $0 "\"" } +NR == 2 { gsub(/\\\\[;?!,()<>|+@%\]\[^]/, " ") ; print "\"" $0 "\"" } diff --git a/test/regexpbrack2.in b/test/regexpbrack2.in new file mode 100644 index 00000000..42888dd0 --- /dev/null +++ b/test/regexpbrack2.in @@ -0,0 +1,2 @@ +test: \\; \\? \\! +test: \\; \\? \\! diff --git a/test/regexpbrack2.ok b/test/regexpbrack2.ok new file mode 100644 index 00000000..9c2a2922 --- /dev/null +++ b/test/regexpbrack2.ok @@ -0,0 +1,2 @@ +"test: " +"test: " -- cgit v1.2.3 From 7bda05c66848de97a7b43aa3e37ff4336f1b3220 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Tue, 28 Apr 2015 09:28:04 +0300 Subject: Fix bracket handling. "This time for sure." --- ChangeLog | 5 +++++ awkgram.c | 49 ++++++++++++++++++++++++++++--------------------- awkgram.y | 49 ++++++++++++++++++++++++++++--------------------- 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/ChangeLog b/ChangeLog index e5d473c0..3e9764a2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-04-28 Arnold D. Robbins + + * awkgram.y (yylex): Rework the bracket handling from zero. + Thanks to Michal Jaegermann for yet another test case. + 2015-04-27 Arnold D. Robbins * awkgram.y (yylex): Make change of Jan 7 for parsing regexps diff --git a/awkgram.c b/awkgram.c index 530aa27c..14e29d98 100644 --- a/awkgram.c +++ b/awkgram.c @@ -5358,21 +5358,24 @@ yylex(void) thisline = NULL; if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ + int b_index = -1; + int cur_index = 0; + /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. + * Here is what's ok with brackets: + * + * [[] [^[] []] [^]] [.../...] + * [...\[...] [...\]...] [...\/...] + * + * (Remember that all of the above are inside /.../) + * + * The code for \ handles \[, \] and \/. * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. + * Otherwise, track the first open [ position, and if + * an embedded [ or ] occurs, allow it to pass through + * if it's right after the first [ or after [^. * - * The code for \ handles \[ and \]. + * Whew! */ want_regexp = false; @@ -5382,17 +5385,21 @@ yylex(void) if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': - /* one day check for `.' and `=' too */ - if (nextc(false) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; case ']': - if ((tok[-1] == '[' && tok[-2] != '\\') - || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) - /* do nothing */; - else + cur_index = tok - tokstart; + if (in_brack > 0 + && (cur_index == b_index + 1 + || (cur_index == b_index + 2 && tok[-1] == '^'))) + ; /* do nothing */ + else if (c == '[') { + in_brack++; + if (in_brack == 1) + b_index = tok - tokstart; + } else { in_brack--; + if (in_brack == 0) + b_index = -1; + } break; case '\\': if ((c = nextc(false)) == END_FILE) { diff --git a/awkgram.y b/awkgram.y index 31751e8e..beb85d5a 100644 --- a/awkgram.y +++ b/awkgram.y @@ -3019,21 +3019,24 @@ yylex(void) thisline = NULL; if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ + int b_index = -1; + int cur_index = 0; + /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. + * Here is what's ok with brackets: + * + * [[] [^[] []] [^]] [.../...] + * [...\[...] [...\]...] [...\/...] + * + * (Remember that all of the above are inside /.../) + * + * The code for \ handles \[, \] and \/. * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. + * Otherwise, track the first open [ position, and if + * an embedded [ or ] occurs, allow it to pass through + * if it's right after the first [ or after [^. * - * The code for \ handles \[ and \]. + * Whew! */ want_regexp = false; @@ -3043,17 +3046,21 @@ yylex(void) if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': - /* one day check for `.' and `=' too */ - if (nextc(false) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; case ']': - if ((tok[-1] == '[' && tok[-2] != '\\') - || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) - /* do nothing */; - else + cur_index = tok - tokstart; + if (in_brack > 0 + && (cur_index == b_index + 1 + || (cur_index == b_index + 2 && tok[-1] == '^'))) + ; /* do nothing */ + else if (c == '[') { + in_brack++; + if (in_brack == 1) + b_index = tok - tokstart; + } else { in_brack--; + if (in_brack == 0) + b_index = -1; + } break; case '\\': if ((c = nextc(false)) == END_FILE) { -- cgit v1.2.3 From f088a3efc8aefc47f0bfe7824732aae4283b4c15 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Tue, 28 Apr 2015 16:39:13 +0300 Subject: Make call-by-value work again for $0. --- ChangeLog | 7 +++++++ eval.c | 8 +++++++- test/ChangeLog | 5 +++++ test/Makefile.am | 5 ++++- test/Makefile.in | 10 +++++++++- test/Maketests | 5 +++++ test/inpref.awk | 9 +++++++++ test/inpref.in | 2 ++ test/inpref.ok | 2 ++ 9 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 test/inpref.awk create mode 100644 test/inpref.in create mode 100644 test/inpref.ok diff --git a/ChangeLog b/ChangeLog index 3e9764a2..448bdcaa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,13 @@ * awkgram.y (yylex): Rework the bracket handling from zero. Thanks to Michal Jaegermann for yet another test case. + Unrelated: + + * eval.c (setup_frame): Restore call-by-value for $0. This was + necessitated by the changes on 2014-11-11 for conserving + memory use. Thanks to Andrew Schorr for the report and isolating + the cause of the problem. + 2015-04-27 Arnold D. Robbins * awkgram.y (yylex): Make change of Jan 7 for parsing regexps diff --git a/eval.c b/eval.c index 12776846..dfb99a4c 100644 --- a/eval.c +++ b/eval.c @@ -1325,7 +1325,13 @@ setup_frame(INSTRUCTION *pc) if (m->type == Node_param_list) m = GET_PARAM(m->param_cnt); - + + /* $0 needs to be passed by value to a function */ + if (m == fields_arr[0]) { + DEREF(m); + m = dupnode(m); + } + switch (m->type) { case Node_var_new: case Node_var_array: diff --git a/test/ChangeLog b/test/ChangeLog index cde85f80..437760ee 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2015-04-27 Andrew J. Schorr + + * Makefile.am (inpref): New test. + * inpref.awk, inpref.in, inpref.ok: New files. + 2015-04-27 Arnold D. Robbins * Makefile.am (regexpbrack2): New test. diff --git a/test/Makefile.am b/test/Makefile.am index bf1d073c..edad8bb6 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -406,6 +406,9 @@ EXTRA_DIST = \ icasers.awk \ icasers.in \ icasers.ok \ + inpref.awk \ + inpref.in \ + inpref.ok \ id.awk \ id.ok \ igncdym.awk \ @@ -1018,7 +1021,7 @@ BASIC_TESTS = \ gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \ gsubtst7 gsubtst8 \ hex hsprint \ - inputred intest intprec iobug1 \ + inpref inputred intest intprec iobug1 \ leaddig leadnl litoct longsub longwrds \ manglprm math membug1 messages minusstr mmap8k mtchi18n \ nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset nlfldsep \ diff --git a/test/Makefile.in b/test/Makefile.in index 0cd10604..b5492261 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -663,6 +663,9 @@ EXTRA_DIST = \ icasers.awk \ icasers.in \ icasers.ok \ + inpref.awk \ + inpref.in \ + inpref.ok \ id.awk \ id.ok \ igncdym.awk \ @@ -1274,7 +1277,7 @@ BASIC_TESTS = \ gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \ gsubtst7 gsubtst8 \ hex hsprint \ - inputred intest intprec iobug1 \ + inpref inputred intest intprec iobug1 \ leaddig leadnl litoct longsub longwrds \ manglprm math membug1 messages minusstr mmap8k mtchi18n \ nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset nlfldsep \ @@ -2908,6 +2911,11 @@ hsprint: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +inpref: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + inputred: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index e9c89852..95d9e80a 100644 --- a/test/Maketests +++ b/test/Maketests @@ -405,6 +405,11 @@ hsprint: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +inpref: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + inputred: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/inpref.awk b/test/inpref.awk new file mode 100644 index 00000000..d64ffe7f --- /dev/null +++ b/test/inpref.awk @@ -0,0 +1,9 @@ +function test(x) { + print x + getline + print x +} + +{ + test($0) +} diff --git a/test/inpref.in b/test/inpref.in new file mode 100644 index 00000000..a32119c8 --- /dev/null +++ b/test/inpref.in @@ -0,0 +1,2 @@ +hello +goodbye diff --git a/test/inpref.ok b/test/inpref.ok new file mode 100644 index 00000000..317e9677 --- /dev/null +++ b/test/inpref.ok @@ -0,0 +1,2 @@ +hello +hello -- cgit v1.2.3