diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | builtin.c | 36 | ||||
-rw-r--r-- | pc/Makefile.tst | 7 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 4 | ||||
-rw-r--r-- | test/Makefile.in | 9 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/regexsub.awk | 48 | ||||
-rw-r--r-- | test/regexsub.ok | 30 |
9 files changed, 144 insertions, 7 deletions
@@ -1,3 +1,10 @@ +2021-08-13 Arnold D. Robbins <arnold@skeeve.com> + + * builtin.c (do_sub): Rationalize handling of strongly typed + regex as argument to sub/gsub, as well as rationalize the return + value from gensub to always be string. Thanks to John Naman + <jnaman2@gmail.com> for the bug report. + 2021-08-05 Andrew J. Schorr <aschorr@telemetry-investments.com> * mpfr.c (do_mpfr_func): New argument, warn_negative. If true, @@ -2934,8 +2934,6 @@ do_sub(int nargs, unsigned int flags) RESTART(rp, target->stptr) > target->stlen) goto done; - target->flags |= STRING; - text = target->stptr; textlen = target->stlen; @@ -3183,6 +3181,10 @@ done: DEREF(target); assert(buf != NULL); return make_str_node(buf, textlen, ALREADY_MALLOCED); + } else if ((target->flags & STRING) == 0) { + /* return a copy of original string */ + DEREF(target); + return make_str_node(target->stptr, target->stlen, 0); } /* return the original string */ @@ -3193,8 +3195,34 @@ done: if ((flags & LITERAL) != 0) DEREF(target); else if (matches > 0) { - unref(*lhs); - *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); + /* + * 8/2021: There's a bit of a song and dance here. If someone does + * + * x = @/abc/ + * sub(/b/, "x", x) + * + * What should the type of x be after the call? Does it get converted + * to string? Or does it remain a regexp? We've decided to let it + * remain a regexp. In that case, we have to update the compiled + * regular expression that it holds. + */ + bool is_regex = false; + NODE *target = *lhs; + + if ((target->flags & REGEX) != 0) { + is_regex = true; + + // free old regex registers + refree(target->typed_re->re_reg[0]); + if (target->typed_re->re_reg[1] != NULL) + refree(target->typed_re->re_reg[1]); + freenode(target->typed_re); + } + unref(*lhs); // nuke original value + if (is_regex) + *lhs = make_typed_regex(buf, textlen); + else + *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); } return make_number((AWKNUM) matches); diff --git a/pc/Makefile.tst b/pc/Makefile.tst index 4b99204f..b9572ba1 100644 --- a/pc/Makefile.tst +++ b/pc/Makefile.tst @@ -216,7 +216,7 @@ GAWK_EXT_TESTS = \ procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \ profile7 profile8 profile9 profile10 profile11 profile12 profile13 \ profile14 profile15 pty1 pty2 \ - rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ + rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ rsstart1 rsstart2 rsstart3 rstest6 \ sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \ sourcesplit split_after_fpat \ @@ -3152,6 +3152,11 @@ profile15: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regexsub: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regnul1: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/ChangeLog b/test/ChangeLog index c92f0188..c3dcd555 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2021-08-13 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (EXTRA_DIST): regexsub, new test. + * regexsub.awk, regexsub.ok: New files. + 2021-05-15 Eli Zaretskii <eliz@gnu.org> * iolint.ok: Reorder results to follow the order of iolint.awk. diff --git a/test/Makefile.am b/test/Makefile.am index 3f9e9308..7ee23813 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1057,6 +1057,8 @@ EXTRA_DIST = \ regexpbrack2.ok \ regexprange.awk \ regexprange.ok \ + regexsub.awk \ + regexsub.ok \ reginttrad.awk \ reginttrad.ok \ regnul1.awk \ @@ -1456,7 +1458,7 @@ GAWK_EXT_TESTS = \ procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \ profile7 profile8 profile9 profile10 profile11 profile12 profile13 \ profile14 profile15 pty1 pty2 \ - rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ + rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ rsstart1 rsstart2 rsstart3 rstest6 \ sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \ sourcesplit split_after_fpat \ diff --git a/test/Makefile.in b/test/Makefile.in index e73a950b..79ca9a3b 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -1320,6 +1320,8 @@ EXTRA_DIST = \ regexpbrack2.ok \ regexprange.awk \ regexprange.ok \ + regexsub.awk \ + regexsub.ok \ reginttrad.awk \ reginttrad.ok \ regnul1.awk \ @@ -1719,7 +1721,7 @@ GAWK_EXT_TESTS = \ procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \ profile7 profile8 profile9 profile10 profile11 profile12 profile13 \ profile14 profile15 pty1 pty2 \ - rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ + rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \ rsstart1 rsstart2 rsstart3 rstest6 \ sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \ sourcesplit split_after_fpat \ @@ -4814,6 +4816,11 @@ profile15: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regexsub: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regnul1: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index a36ac8cf..12cc1644 100644 --- a/test/Maketests +++ b/test/Maketests @@ -1880,6 +1880,11 @@ profile15: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regexsub: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regnul1: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/regexsub.awk b/test/regexsub.awk new file mode 100644 index 00000000..92dede7b --- /dev/null +++ b/test/regexsub.awk @@ -0,0 +1,48 @@ +BEGIN { + print "Initialize strong regex" + rgx2 = rgx1 = @/[abc]/ + print "Test gsub on strong regex" + printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1)) + printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2)) + print "Test gsub() a strong regex" + gsub(/b/, "e", rgx2) + printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1)) + printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2)) + + print "Test value not found in regex" + gsub(/x/, "y", rgx1) # should not change + printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1)) + + print "Test gsub on numbers" + v2 = v1 = 12345 + printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1)) + printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2)) + gsub(/3/, "x", v2) + printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1)) + printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2)) + print "Test value not found in number" + gsub(/9/, "x", v1) + printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1)) + + print "Test gensub on regex" + a = b = @/abc/ + c = gensub(/b/, "x", "g", a) + printf("a = @/%s/\ttypeof(a) = '%s'\n", a, typeof(a)) + printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c)) + print "Test value not found in regex" + c = gensub(/q/, "x", "g", b) + printf("b = @/%s/\ttypeof(b) = '%s'\n", b, typeof(b)) + printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c)) + + print "Test gensub on numbers" + a = b = 12345 + c = gensub(/3/, "x", "g", a) + printf("a = \"%s\"\ttypeof(a) = '%s'\n", a, typeof(a)) + printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b)) + printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c)) + print "Test value not found in number" + c = gensub(/9/, "x", "g", b) + printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b)) + printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c)) + print typeof(c), c +} diff --git a/test/regexsub.ok b/test/regexsub.ok new file mode 100644 index 00000000..44511ebc --- /dev/null +++ b/test/regexsub.ok @@ -0,0 +1,30 @@ +Initialize strong regex +Test gsub on strong regex +rgx1 = '[abc]' typeof(rgx1) = 'regexp' +rgx2 = '[abc]' typeof(rgx2) = 'regexp' +Test gsub() a strong regex +rgx1 = '[abc]' typeof(rgx1) = 'regexp' +rgx2 = '[aec]' typeof(rgx2) = 'regexp' +Test value not found in regex +rgx1 = '[abc]' typeof(rgx1) = 'regexp' +Test gsub on numbers +v1 = '12345' typeof(v1) = 'number' +v2 = '12345' typeof(v2) = 'number' +v1 = '12345' typeof(v1) = 'number' +v2 = '12x45' typeof(v2) = 'string' +Test value not found in number +v1 = '12345' typeof(v1) = 'number' +Test gensub on regex +a = @/abc/ typeof(a) = 'regexp' +c = "axc" typeof(c) = 'string' +Test value not found in regex +b = @/abc/ typeof(b) = 'regexp' +c = "abc" typeof(c) = 'string' +Test gensub on numbers +a = "12345" typeof(a) = 'number' +b = "12345" typeof(b) = 'number' +c = "12x45" typeof(c) = 'string' +Test value not found in number +b = "12345" typeof(b) = 'number' +c = "12345" typeof(c) = 'string' +string 12345 |