aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2022-04-05 21:31:39 -0700
committerKaz Kylheku <kaz@kylheku.com>2022-04-05 21:31:39 -0700
commit9c51abd7297034b9b109eafcf01021fbd2876ba4 (patch)
treeecaf2d7eed8ea106e193c4811301df87a39d3981
parent26dba3a636e0587e621941842bf0af96afa45f97 (diff)
downloadcppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.tar.gz
cppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.tar.bz2
cppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.zip
New function equalize.
-rw-r--r--cppawk-cons.195
-rw-r--r--cppawk-include/cons-priv.h30
-rw-r--r--cppawk-include/cons.h1
-rw-r--r--testcases-cons41
4 files changed, 151 insertions, 16 deletions
diff --git a/cppawk-cons.1 b/cppawk-cons.1
index 200876b..47bb0da 100644
--- a/cppawk-cons.1
+++ b/cppawk-cons.1
@@ -69,6 +69,7 @@ cons \- Lisp-like data representation and control flow macros
sexp(x) // convert Lisp value to S-expression string
equal(x, y) // test whether two Lisp values are equal
+ equalize(x) // convert object to canonical representation
list(...) // return argument values as a Lisp list
append(...) // append list arguments; last may be atom
@@ -888,12 +889,13 @@ function favors the last of these.
(nil . 1)
.ft R
-.SS Function \fIequal\fP
+.SS Functions \fIequal\fP and \fIequalize\fP
.bk
.B Syntax:
.ft B
equal(x, y)
+ equalize(x)
.ft R
.B Description
@@ -932,9 +934,9 @@ If
.I x
and
.I y
-are both numeric, then they are compared numerically, even if
-they are character strings. While this may seem to be the same as Awk equality,
-it is not the case. This rule is applied regardless of the origin of
+are both numbers, then they are compared numerically, While this may seem to be
+the same as Awk equality, that is not the case. This rule is applied regardless
+of the origin of
.I x
and
.IR y .
@@ -964,22 +966,26 @@ will print 1 when a record with the fields
.B 1
and
.B 1.0
-is processed. This is because at input time, Awk classifies such
-inputs as being numeric strings, attaching that to their type
-information, and two numeric strings are compared as numbers.
-Loosely speaking, the
+is processed. This is because Awk classifies certain inputs, such as fields
+delmited during input scanning, as being numeric strings if they look like
+numbers. This numeric string status is attached to their type information, and
+two numeric strings are compared as numbers. Yet, strings character-for-character
+identical to these which are produced via string manipulation are not treated
+as numeric. Loosely speaking, the
.B equal
-function compares two strings as numbers if they would be numeric
+function compares two (unboxed) strings as numbers if they would be numeric
strings if they were input as Awk fields.
.IP 2.
-A box string is
+A boxed string is
.B equal
-to an unboxed string of the same content, even though their Awk
-representation is different. In implementation terms:
+to an unboxed string of the same content, but only if the unboxed
+string isn't numeric. A numeric unboxed string is considered a number,
+and thus not equal to any boxed string.
.ft B
equal("Tabc", "abc") => 1
+ equal("T123", "123") => 0
.ft R
.IP 3.
@@ -1003,6 +1009,71 @@ to
.BI cdr( y )
.PP
+The
+.B equalize
+function is semantically related to
+.BR equal .
+It computes and returns an object similar to its argument object.
+If two objects
+.I x
+and
+.I y
+are considered to be the same by the
+.B equal
+function, then the expressions
+.BI equalize( x )
+and
+.BI equalize( y )
+each return the same string.
+
+That is to say, the following relationship holds between
+.B equalize
+and
+.BR equal :
+
+.ft B
+ equal(\fIx\fP, \fIy\fP) == (equalize(\fIx\fP) == equalize(\fIy\fP))
+.ft R
+
+Comparing two objects for equality using
+.B equal
+is the same as converting them to a canonical representation with
+.B equalize
+and then comparing that representation using the
+.B ==
+operator.
+
+The
+.b equalize
+function is useful for two reasons. Firstly, comparing objects with
+.B ==
+is much cheaper than
+.BR equal ;
+therefore, an application which performs a lot of comparisons
+may be made more efficient if it equalizes the objects and then
+uses the
+.B ==
+operator instead of
+.BR equal .
+
+Secondly, when equalized objects are used as keys for an Awk associative
+array, then, effectively, that array becomes based on
+.BR equal
+equality. That is to say, for instance, if the the objects
+.B "cons("1.0", "2.0")"
+and
+.B "cons(1, 2)"
+are used directly as associative array keys, they are different keys because
+their string representation is different. Yet, those two objects are
+.BN equal .
+Suppose that in some application there exists the requirement that
+.B equal
+objects must be be considered to be the same array key. This requirement
+can be satisfied by passing all keys through the
+.B equalize
+function, and using the equalized images of the keys for the
+array operations.
+
.SH "SEE ALSO"
cppawk(1)
diff --git a/cppawk-include/cons-priv.h b/cppawk-include/cons-priv.h
index 8a57215..3ce5873 100644
--- a/cppawk-include/cons-priv.h
+++ b/cppawk-include/cons-priv.h
@@ -216,6 +216,7 @@ function __box_str(__str)
{
return "T" __str
}
+#define __box_str(str) ("T" (str))
function __box_sym(__name)
{
@@ -382,16 +383,37 @@ function __slow_equal(__obj1, __obj2,
}
if (__tc1 == "T")
- return __unbox(__obj1) == __obj2;
+ return __numberp(__obj2) ? 0 : (__obj1 == __box_str(__obj2))
if (__tc2 == "T")
- return __obj1 == __unbox(__obj2);
+ return __numberp(__obj1) ? 0 : (__box_str(__obj1) == __obj2)
- if (__numberp(__obj1) && __numberp(__obj2)) {
+ if (__numberp(__obj1) && __numberp(__obj2))
return __obj1 + 0 == __obj2 + 0
+
+ return 0
+}
+
+function __equalize(__obj,
+ __tc, __str)
+{
+ if (!__present(__obj))
+ return ""
+
+ __tc = __typecode(__obj)
+
+ __case (__tc) {
+ __of("C")
+ __cret (__cons(__equalize(__car(__obj)),
+ __equalize(__cdr(__obj))))
+ __of("T", "S", "U", "")
+ __cret (__obj)
}
- return 0;
+ if (__numberp(__obj))
+ return __obj + 0
+
+ return __box_str(__obj)
}
function __pack(__stk, __item)
diff --git a/cppawk-include/cons.h b/cppawk-include/cons.h
index 560784a..0087930 100644
--- a/cppawk-include/cons.h
+++ b/cppawk-include/cons.h
@@ -69,6 +69,7 @@
#define ltof __ltof
#define keys __keys
#define equal __equal
+#define equalize __equalize
#define list __list
#define li __li
#define listar __listar
diff --git a/testcases-cons b/testcases-cons
index d95dbef..b2fce7f 100644
--- a/testcases-cons
+++ b/testcases-cons
@@ -369,7 +369,48 @@ BEGIN {
0
1
brk
+0
+1
1
+0
+0
+--
+24:
+$cppawk '
+#include <cons.h>
+
+function same(left, right)
+{
+ return equalize(left) == equalize(right)
+}
+
+BEGIN {
+ print same("", undef), same(0, 0), same(-1, -1), same("", "")
+ print same("", "a"), same(0, 1), same(-1, 1), same("Tabc", "xyz")
+ print same("1", "1.0"), same(box("abc"), "abc")
+ print same(box(undef), box(undef))
+ print same(cons("1", "2"), cons("1.0", "2.0"))
+ print same(cons("1", "3"), cons("1.0", "2.0"))
+ print same(cons("3", "2"), cons("1.0", "2.0"))
+ print same(cons("1a", "2a"), cons("1b", "2b"))
+ print "brk"
+ print same(box_str(1.0), 1.0)
+ print same(box_str(1.0), box_str("1"))
+ print same(box_str(1.0), box_str(1.0))
+ print same(box_str(1.0), "1.0")
+ print same("1.0", box_str(1.0))
+}'
+:
+1 1 1 1
+0 0 0 0
+1 1
+1
+1
+0
+0
+1
+brk
+0
1
1
0