Rework scalar comparisons to be (more) like in C.

author: Arnold D. Robbins <arnold@skeeve.com> 2020-11-02 20:21:46 +0200
committer: Arnold D. Robbins <arnold@skeeve.com> 2020-11-02 20:21:46 +0200
commit: 0c075409a9a17bbc774880fdf1110251493f5f57 (patch)
tree: 342aa6ad703d6cc8bb8b34cda04dffb277204c16
parent: ba3fd3d4ab19184a84720772b861c10ffb595cf0 (diff)
download: egawk-0c075409a9a17bbc774880fdf1110251493f5f57.tar.gz
egawk-0c075409a9a17bbc774880fdf1110251493f5f57.tar.bz2
egawk-0c075409a9a17bbc774880fdf1110251493f5f57.zip
7 files changed, 162 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 5caf828c..4e36e5b6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2020-11-02         Arnold D. Robbins     <arnold@skeeve.com>
 
+	Make gawk numeric comparisons act like C doubles.
+	MPFR differs from doubles w.r.t. NaN, not sure why yet.
+
+	* awk.h (scalar_cmp_t): New enum.
+	* builtin.c (format_nan_inf): Use mpfr_signbit, not mpfr_sgn.
+	* eval.c (cmp_doubles): New routine.
+	(cmp_scalars): Change type to bool, rework logic.
+	* interpret.h (r_interpret): Rework scalar comparisons.
+	* mpfr.c (mpg_cmp_as_numbers): New routine.
+	* node.c: Use <math.h>, not "math.h", minor comment edits.
+
+2020-11-02         Arnold D. Robbins     <arnold@skeeve.com>
+
 	* re.c (make_regexp): Cast len parameter to int to avoid
 	compiler warnings.
 
diff --git a/awk.h b/awk.h
index cf06d490..0540ead7 100644
--- a/awk.h
+++ b/awk.h
@@ -1574,6 +1574,15 @@ typedef enum {
 extern field_sep_type current_field_sep(void);
 extern const char *current_field_sep_str(void);
 
+typedef enum {
+	SCALAR_EQ,
+	SCALAR_NEQ,
+	SCALAR_LT,
+	SCALAR_LE,
+	SCALAR_GT,
+	SCALAR_GE,
+} scalar_cmp_t;
+
 /* gawkapi.c: */
 extern gawk_api_t api_impl;
 extern void init_ext_api(void);
diff --git a/builtin.c b/builtin.c
index afd866ac..caf3d3b9 100644
--- a/builtin.c
+++ b/builtin.c
@@ -4293,11 +4293,11 @@ format_nan_inf(NODE *n, char format)
 		return NULL;
 	else if (is_mpg_float(n)) {
 		if (mpfr_nan_p(n->mpg_numbr)) {
-			strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-nan" : "+nan");
+			strcpy(buf, mpfr_signbit(n->mpg_numbr) ? "-nan" : "+nan");
 
 			goto fmt;
 		} else if (mpfr_inf_p(n->mpg_numbr)) {
-			strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-inf" : "+inf");
+			strcpy(buf, mpfr_signbit(n->mpg_numbr) ? "-inf" : "+inf");
 
 			goto fmt;
 		} else
diff --git a/eval.c b/eval.c
index 558c021e..83d71dd8 100644
--- a/eval.c
+++ b/eval.c
@@ -24,10 +24,8 @@
  */
 
 #include "awk.h"
+#include <math.h>
 
-extern double pow(double x, double y);
-extern double modf(double x, double *yp);
-extern double fmod(double x, double y);
 NODE **fcall_list = NULL;
 long fcall_count = 0;
 int currule = 0;
@@ -1520,18 +1518,17 @@ eval_condition(NODE *t)
 	return boolval(t);
 }
 
-typedef enum {
-	SCALAR_EQ_NEQ,
-	SCALAR_RELATIONAL
-} scalar_cmp_t;
+static bool cmp_doubles(const NODE *t1, const NODE *t2, scalar_cmp_t comparison_type);
+extern bool mpg_cmp_as_numbers(const NODE *t1, const NODE *t2, scalar_cmp_t comparison_type);
 
 /* cmp_scalars -- compare two nodes on the stack */
 
-static inline int
+static bool
 cmp_scalars(scalar_cmp_t comparison_type)
 {
 	NODE *t1, *t2;
 	int di;
+	bool ret;
 
 	t2 = POP_SCALAR();
 	t1 = TOP();
@@ -1539,12 +1536,91 @@ cmp_scalars(scalar_cmp_t comparison_type)
 		DEREF(t2);
 		fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
 	}
-	di = cmp_nodes(t1, t2, comparison_type == SCALAR_EQ_NEQ);
+
+	if ((t1->flags & STRING) != 0 || (t2->flags & STRING) != 0) {
+		bool use_strcmp = (comparison_type == SCALAR_EQ || comparison_type == SCALAR_NEQ);
+		di = cmp_nodes(t1, t2, use_strcmp);
+
+		switch (comparison_type) {
+		case SCALAR_EQ:
+			ret = (di == 0);
+			break;
+		case SCALAR_NEQ:
+			ret = (di != 0);
+			break;
+		case SCALAR_LT:
+			ret = (di < 0);
+			break;
+		case SCALAR_LE:
+			ret = (di <= 0);
+			break;
+		case SCALAR_GT:
+			ret = (di > 0);
+			break;
+		case SCALAR_GE:
+			ret = (di >= 0);
+			break;
+		}
+	} else {
+		fixtype(t1);
+		fixtype(t2);
+
+#ifdef HAVE_MPFR
+		if (do_mpfr)
+			ret = mpg_cmp_as_numbers(t1, t2, comparison_type);
+		else
+#endif
+			ret = cmp_doubles(t1, t2, comparison_type);
+	}
+
 	DEREF(t1);
 	DEREF(t2);
-	return di;
+	return ret;
 }
 
+
+/* cmp_doubles --- compare two doubles */
+
+static bool
+cmp_doubles(const NODE *t1, const NODE *t2, scalar_cmp_t comparison_type)
+{
+	/*
+	 * This routine provides numeric comparisons that should work
+	 * the same as in C.  It should NOT be used for sorting.
+	 */
+
+	bool t1_nan = isnan(t1->numbr);
+	bool t2_nan = isnan(t2->numbr);
+	int ret;
+
+	if ((t1_nan || t2_nan) && comparison_type != SCALAR_NEQ)
+		return false;
+
+	switch (comparison_type) {
+	case SCALAR_EQ:
+		ret = (t1->numbr == t2->numbr);
+		break;
+	case SCALAR_NEQ:
+		ret = (t1->numbr != t2->numbr);
+		break;
+	case SCALAR_LT:
+		ret = (t1->numbr < t2->numbr);
+		break;
+	case SCALAR_LE:
+		ret = (t1->numbr <= t2->numbr);
+		break;
+	case SCALAR_GT:
+		ret = (t1->numbr > t2->numbr);
+		break;
+	case SCALAR_GE:
+		ret = (t1->numbr >= t2->numbr);
+		break;
+	}
+
+	return ret;
+}
+
+
 /* op_assign --- assignment operators excluding = */
 
 static void
diff --git a/interpret.h b/interpret.h
index fedf5255..40dd39d6 100644
--- a/interpret.h
+++ b/interpret.h
@@ -486,37 +486,37 @@ uninitialized_scalar:
 			break;
 
 		case Op_equal:
-			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) == 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ)];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_notequal:
-			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) != 0];
+			r = node_Boolean[cmp_scalars(SCALAR_NEQ)];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_less:
-			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) < 0];
+			r = node_Boolean[cmp_scalars(SCALAR_LT)];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_greater:
-			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) > 0];
+			r = node_Boolean[cmp_scalars(SCALAR_GT)];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_leq:
-			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) <= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_LE)];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_geq:
-			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) >= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_GE)];
 			UPREF(r);
 			REPLACE(r);
 			break;
diff --git a/mpfr.c b/mpfr.c
index 38f38a3a..2dfff447 100644
--- a/mpfr.c
+++ b/mpfr.c
@@ -433,6 +433,50 @@ mpg_cmp(const NODE *t1, const NODE *t2)
 	return cmp_awknums(t1, t2);
 }
 
+/* mpg_cmp_as_numbers --- compare two numbers, similar to doubles */
+
+bool
+mpg_cmp_as_numbers(const NODE *t1, const NODE *t2, scalar_cmp_t comparison_type)
+{
+	/*
+	 * This routine provides numeric comparisons that should work
+	 * the same as in C.  It should NOT be used for sorting.
+	 */
+
+	bool t1_nan = mpfr_nan_p(t1->mpg_numbr);
+	bool t2_nan = mpfr_nan_p(t2->mpg_numbr);
+	int ret;
+
+	// MPFR is different than native doubles...
+	if (t1_nan || t2_nan)
+		return comparison_type == SCALAR_NEQ;
+
+	int di = mpg_cmp(t1, t2);
+
+	switch (comparison_type) {
+	case SCALAR_EQ:
+		ret = (di == 0);
+		break;
+	case SCALAR_NEQ:
+		ret = (di != 0);
+		break;
+	case SCALAR_LT:
+		ret = (di < 0);
+		break;
+	case SCALAR_LE:
+		ret = (di <= 0);
+		break;
+	case SCALAR_GT:
+		ret = (di > 0);
+		break;
+	case SCALAR_GE:
+		ret = (di >= 0);
+		break;
+	}
+
+	return ret;
+}
+
 
 /*
  * mpg_update_var --- update NR or FNR.
diff --git a/node.c b/node.c
index 707d106d..772131a2 100644
--- a/node.c
+++ b/node.c
@@ -25,7 +25,7 @@
  */
 
 #include "awk.h"
-#include "math.h"
+#include <math.h>
 #include "floatmagic.h"	/* definition of isnan */
 
 static int is_ieee_magic_val(const char *val);
@@ -367,7 +367,7 @@ int
 cmp_awknums(const NODE *t1, const NODE *t2)
 {
 	/*
-	 * This routine is also used to sort numeric array indices or values.
+	 * This routine is used to sort numeric array indices or values.
 	 * For the purposes of sorting, NaN is considered greater than
 	 * any other value, and all NaN values are considered equivalent and equal.
 	 * This isn't in compliance with IEEE standard, but compliance w.r.t. NaN
@@ -387,7 +387,6 @@ cmp_awknums(const NODE *t1, const NODE *t2)
 	return 1;
 }
 
-
 /* make_str_node --- make a string node */
 
 NODE *
author	Arnold D. Robbins <arnold@skeeve.com>	2020-11-02 20:21:46 +0200
committer	Arnold D. Robbins <arnold@skeeve.com>	2020-11-02 20:21:46 +0200
commit	0c075409a9a17bbc774880fdf1110251493f5f57 (patch)
tree	342aa6ad703d6cc8bb8b34cda04dffb277204c16
parent	ba3fd3d4ab19184a84720772b861c10ffb595cf0 (diff)
download	egawk-0c075409a9a17bbc774880fdf1110251493f5f57.tar.gz egawk-0c075409a9a17bbc774880fdf1110251493f5f57.tar.bz2 egawk-0c075409a9a17bbc774880fdf1110251493f5f57.zip