diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2022-04-05 21:31:39 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2022-04-05 21:31:39 -0700 |
commit | 9c51abd7297034b9b109eafcf01021fbd2876ba4 (patch) | |
tree | ecaf2d7eed8ea106e193c4811301df87a39d3981 | |
parent | 26dba3a636e0587e621941842bf0af96afa45f97 (diff) | |
download | cppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.tar.gz cppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.tar.bz2 cppawk-9c51abd7297034b9b109eafcf01021fbd2876ba4.zip |
New function equalize.
-rw-r--r-- | cppawk-cons.1 | 95 | ||||
-rw-r--r-- | cppawk-include/cons-priv.h | 30 | ||||
-rw-r--r-- | cppawk-include/cons.h | 1 | ||||
-rw-r--r-- | testcases-cons | 41 |
4 files changed, 151 insertions, 16 deletions
diff --git a/cppawk-cons.1 b/cppawk-cons.1 index 200876b..47bb0da 100644 --- a/cppawk-cons.1 +++ b/cppawk-cons.1 @@ -69,6 +69,7 @@ cons \- Lisp-like data representation and control flow macros sexp(x) // convert Lisp value to S-expression string equal(x, y) // test whether two Lisp values are equal + equalize(x) // convert object to canonical representation list(...) // return argument values as a Lisp list append(...) // append list arguments; last may be atom @@ -888,12 +889,13 @@ function favors the last of these. (nil . 1) .ft R -.SS Function \fIequal\fP +.SS Functions \fIequal\fP and \fIequalize\fP .bk .B Syntax: .ft B equal(x, y) + equalize(x) .ft R .B Description @@ -932,9 +934,9 @@ If .I x and .I y -are both numeric, then they are compared numerically, even if -they are character strings. While this may seem to be the same as Awk equality, -it is not the case. This rule is applied regardless of the origin of +are both numbers, then they are compared numerically, While this may seem to be +the same as Awk equality, that is not the case. This rule is applied regardless +of the origin of .I x and .IR y . @@ -964,22 +966,26 @@ will print 1 when a record with the fields .B 1 and .B 1.0 -is processed. This is because at input time, Awk classifies such -inputs as being numeric strings, attaching that to their type -information, and two numeric strings are compared as numbers. -Loosely speaking, the +is processed. This is because Awk classifies certain inputs, such as fields +delmited during input scanning, as being numeric strings if they look like +numbers. This numeric string status is attached to their type information, and +two numeric strings are compared as numbers. Yet, strings character-for-character +identical to these which are produced via string manipulation are not treated +as numeric. Loosely speaking, the .B equal -function compares two strings as numbers if they would be numeric +function compares two (unboxed) strings as numbers if they would be numeric strings if they were input as Awk fields. .IP 2. -A box string is +A boxed string is .B equal -to an unboxed string of the same content, even though their Awk -representation is different. In implementation terms: +to an unboxed string of the same content, but only if the unboxed +string isn't numeric. A numeric unboxed string is considered a number, +and thus not equal to any boxed string. .ft B equal("Tabc", "abc") => 1 + equal("T123", "123") => 0 .ft R .IP 3. @@ -1003,6 +1009,71 @@ to .BI cdr( y ) .PP +The +.B equalize +function is semantically related to +.BR equal . +It computes and returns an object similar to its argument object. +If two objects +.I x +and +.I y +are considered to be the same by the +.B equal +function, then the expressions +.BI equalize( x ) +and +.BI equalize( y ) +each return the same string. + +That is to say, the following relationship holds between +.B equalize +and +.BR equal : + +.ft B + equal(\fIx\fP, \fIy\fP) == (equalize(\fIx\fP) == equalize(\fIy\fP)) +.ft R + +Comparing two objects for equality using +.B equal +is the same as converting them to a canonical representation with +.B equalize +and then comparing that representation using the +.B == +operator. + +The +.b equalize +function is useful for two reasons. Firstly, comparing objects with +.B == +is much cheaper than +.BR equal ; +therefore, an application which performs a lot of comparisons +may be made more efficient if it equalizes the objects and then +uses the +.B == +operator instead of +.BR equal . + +Secondly, when equalized objects are used as keys for an Awk associative +array, then, effectively, that array becomes based on +.BR equal +equality. That is to say, for instance, if the the objects +.B "cons("1.0", "2.0")" +and +.B "cons(1, 2)" +are used directly as associative array keys, they are different keys because +their string representation is different. Yet, those two objects are +.BN equal . +Suppose that in some application there exists the requirement that +.B equal +objects must be be considered to be the same array key. This requirement +can be satisfied by passing all keys through the +.B equalize +function, and using the equalized images of the keys for the +array operations. + .SH "SEE ALSO" cppawk(1) diff --git a/cppawk-include/cons-priv.h b/cppawk-include/cons-priv.h index 8a57215..3ce5873 100644 --- a/cppawk-include/cons-priv.h +++ b/cppawk-include/cons-priv.h @@ -216,6 +216,7 @@ function __box_str(__str) { return "T" __str } +#define __box_str(str) ("T" (str)) function __box_sym(__name) { @@ -382,16 +383,37 @@ function __slow_equal(__obj1, __obj2, } if (__tc1 == "T") - return __unbox(__obj1) == __obj2; + return __numberp(__obj2) ? 0 : (__obj1 == __box_str(__obj2)) if (__tc2 == "T") - return __obj1 == __unbox(__obj2); + return __numberp(__obj1) ? 0 : (__box_str(__obj1) == __obj2) - if (__numberp(__obj1) && __numberp(__obj2)) { + if (__numberp(__obj1) && __numberp(__obj2)) return __obj1 + 0 == __obj2 + 0 + + return 0 +} + +function __equalize(__obj, + __tc, __str) +{ + if (!__present(__obj)) + return "" + + __tc = __typecode(__obj) + + __case (__tc) { + __of("C") + __cret (__cons(__equalize(__car(__obj)), + __equalize(__cdr(__obj)))) + __of("T", "S", "U", "") + __cret (__obj) } - return 0; + if (__numberp(__obj)) + return __obj + 0 + + return __box_str(__obj) } function __pack(__stk, __item) diff --git a/cppawk-include/cons.h b/cppawk-include/cons.h index 560784a..0087930 100644 --- a/cppawk-include/cons.h +++ b/cppawk-include/cons.h @@ -69,6 +69,7 @@ #define ltof __ltof #define keys __keys #define equal __equal +#define equalize __equalize #define list __list #define li __li #define listar __listar diff --git a/testcases-cons b/testcases-cons index d95dbef..b2fce7f 100644 --- a/testcases-cons +++ b/testcases-cons @@ -369,7 +369,48 @@ BEGIN { 0 1 brk +0 +1 1 +0 +0 +-- +24: +$cppawk ' +#include <cons.h> + +function same(left, right) +{ + return equalize(left) == equalize(right) +} + +BEGIN { + print same("", undef), same(0, 0), same(-1, -1), same("", "") + print same("", "a"), same(0, 1), same(-1, 1), same("Tabc", "xyz") + print same("1", "1.0"), same(box("abc"), "abc") + print same(box(undef), box(undef)) + print same(cons("1", "2"), cons("1.0", "2.0")) + print same(cons("1", "3"), cons("1.0", "2.0")) + print same(cons("3", "2"), cons("1.0", "2.0")) + print same(cons("1a", "2a"), cons("1b", "2b")) + print "brk" + print same(box_str(1.0), 1.0) + print same(box_str(1.0), box_str("1")) + print same(box_str(1.0), box_str(1.0)) + print same(box_str(1.0), "1.0") + print same("1.0", box_str(1.0)) +}' +: +1 1 1 1 +0 0 0 0 +1 1 +1 +1 +0 +0 +1 +brk +0 1 1 0 |