aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--builtin.c36
-rw-r--r--pc/Makefile.tst7
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am4
-rw-r--r--test/Makefile.in9
-rw-r--r--test/Maketests5
-rw-r--r--test/regexsub.awk48
-rw-r--r--test/regexsub.ok30
9 files changed, 144 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index c598dac5..4e82bffb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2021-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_sub): Rationalize handling of strongly typed
+ regex as argument to sub/gsub, as well as rationalize the return
+ value from gensub to always be string. Thanks to John Naman
+ <jnaman2@gmail.com> for the bug report.
+
2021-08-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
* mpfr.c (do_mpfr_func): New argument, warn_negative. If true,
diff --git a/builtin.c b/builtin.c
index 454034f4..e1ba5eb3 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2934,8 +2934,6 @@ do_sub(int nargs, unsigned int flags)
RESTART(rp, target->stptr) > target->stlen)
goto done;
- target->flags |= STRING;
-
text = target->stptr;
textlen = target->stlen;
@@ -3183,6 +3181,10 @@ done:
DEREF(target);
assert(buf != NULL);
return make_str_node(buf, textlen, ALREADY_MALLOCED);
+ } else if ((target->flags & STRING) == 0) {
+ /* return a copy of original string */
+ DEREF(target);
+ return make_str_node(target->stptr, target->stlen, 0);
}
/* return the original string */
@@ -3193,8 +3195,34 @@ done:
if ((flags & LITERAL) != 0)
DEREF(target);
else if (matches > 0) {
- unref(*lhs);
- *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+ /*
+ * 8/2021: There's a bit of a song and dance here. If someone does
+ *
+ * x = @/abc/
+ * sub(/b/, "x", x)
+ *
+ * What should the type of x be after the call? Does it get converted
+ * to string? Or does it remain a regexp? We've decided to let it
+ * remain a regexp. In that case, we have to update the compiled
+ * regular expression that it holds.
+ */
+ bool is_regex = false;
+ NODE *target = *lhs;
+
+ if ((target->flags & REGEX) != 0) {
+ is_regex = true;
+
+ // free old regex registers
+ refree(target->typed_re->re_reg[0]);
+ if (target->typed_re->re_reg[1] != NULL)
+ refree(target->typed_re->re_reg[1]);
+ freenode(target->typed_re);
+ }
+ unref(*lhs); // nuke original value
+ if (is_regex)
+ *lhs = make_typed_regex(buf, textlen);
+ else
+ *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
}
return make_number((AWKNUM) matches);
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 4b99204f..b9572ba1 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -216,7 +216,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
@@ -3152,6 +3152,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index c92f0188..c3dcd555 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2021-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST): regexsub, new test.
+ * regexsub.awk, regexsub.ok: New files.
+
2021-05-15 Eli Zaretskii <eliz@gnu.org>
* iolint.ok: Reorder results to follow the order of iolint.awk.
diff --git a/test/Makefile.am b/test/Makefile.am
index 3f9e9308..7ee23813 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1057,6 +1057,8 @@ EXTRA_DIST = \
regexpbrack2.ok \
regexprange.awk \
regexprange.ok \
+ regexsub.awk \
+ regexsub.ok \
reginttrad.awk \
reginttrad.ok \
regnul1.awk \
@@ -1456,7 +1458,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
diff --git a/test/Makefile.in b/test/Makefile.in
index e73a950b..79ca9a3b 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1320,6 +1320,8 @@ EXTRA_DIST = \
regexpbrack2.ok \
regexprange.awk \
regexprange.ok \
+ regexsub.awk \
+ regexsub.ok \
reginttrad.awk \
reginttrad.ok \
regnul1.awk \
@@ -1719,7 +1721,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 \
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
@@ -4814,6 +4816,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index a36ac8cf..12cc1644 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1880,6 +1880,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/regexsub.awk b/test/regexsub.awk
new file mode 100644
index 00000000..92dede7b
--- /dev/null
+++ b/test/regexsub.awk
@@ -0,0 +1,48 @@
+BEGIN {
+ print "Initialize strong regex"
+ rgx2 = rgx1 = @/[abc]/
+ print "Test gsub on strong regex"
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+ print "Test gsub() a strong regex"
+ gsub(/b/, "e", rgx2)
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+
+ print "Test value not found in regex"
+ gsub(/x/, "y", rgx1) # should not change
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+
+ print "Test gsub on numbers"
+ v2 = v1 = 12345
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+ gsub(/3/, "x", v2)
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+ print "Test value not found in number"
+ gsub(/9/, "x", v1)
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+
+ print "Test gensub on regex"
+ a = b = @/abc/
+ c = gensub(/b/, "x", "g", a)
+ printf("a = @/%s/\ttypeof(a) = '%s'\n", a, typeof(a))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print "Test value not found in regex"
+ c = gensub(/q/, "x", "g", b)
+ printf("b = @/%s/\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+
+ print "Test gensub on numbers"
+ a = b = 12345
+ c = gensub(/3/, "x", "g", a)
+ printf("a = \"%s\"\ttypeof(a) = '%s'\n", a, typeof(a))
+ printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print "Test value not found in number"
+ c = gensub(/9/, "x", "g", b)
+ printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print typeof(c), c
+}
diff --git a/test/regexsub.ok b/test/regexsub.ok
new file mode 100644
index 00000000..44511ebc
--- /dev/null
+++ b/test/regexsub.ok
@@ -0,0 +1,30 @@
+Initialize strong regex
+Test gsub on strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[abc]' typeof(rgx2) = 'regexp'
+Test gsub() a strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[aec]' typeof(rgx2) = 'regexp'
+Test value not found in regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+Test gsub on numbers
+v1 = '12345' typeof(v1) = 'number'
+v2 = '12345' typeof(v2) = 'number'
+v1 = '12345' typeof(v1) = 'number'
+v2 = '12x45' typeof(v2) = 'string'
+Test value not found in number
+v1 = '12345' typeof(v1) = 'number'
+Test gensub on regex
+a = @/abc/ typeof(a) = 'regexp'
+c = "axc" typeof(c) = 'string'
+Test value not found in regex
+b = @/abc/ typeof(b) = 'regexp'
+c = "abc" typeof(c) = 'string'
+Test gensub on numbers
+a = "12345" typeof(a) = 'number'
+b = "12345" typeof(b) = 'number'
+c = "12x45" typeof(c) = 'string'
+Test value not found in number
+b = "12345" typeof(b) = 'number'
+c = "12345" typeof(c) = 'string'
+string 12345