infix: superfix relational operators; better code.

This commit extends infix with a post-processing step applied to the output of parse-infix which improves the code, and also implements math-like semantics for relational operators, which I'm calling superfix. Improving the code means that expressions like a + b + c, which turn into (+ (+ a b) c) are cleaned up into (+ a b c). This is done for all n-ary operators. superfix means that clumps of certain operators behave as a compound. For instance a < b <= c means (and (< a b) (<= b c)), where b is evaluated only once. Some relational operators are n-ary; for those we generate the n-ary expression, so that a = b = c < d becomes (and (= a b c) (< c d)). * stdlib/infix.tl (*ifx-env*): New special variable. We use this for communicating the macro environment down into the new finish-infix function, without having to pass a parameter through all the recursion. (eq, eql, equal, neq, neql, nequal, /=, <, >, <=, >=, less, greater, lequal, gequal): These operators become right associative, and are merged into a single precedence level. (finish-infix): New function which coalesces compounds of n-ary operations and converts the postfix chains of relational operators into the correct translation of superfix semantics. (infix-expand-hook): Call finish-infix on the output of parse-infix, taking care to bind the *ifx-env* variable to the environment we are given. * tests/012/infix.tl: New tests. * txr.1: Documented.
author: Kaz Kylheku <kaz@kylheku.com> 2025-05-01 06:41:24 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2025-05-01 06:41:24 -0700
commit: 5c0e21892703253846a83d3aed8e61050445c4ab (patch)
tree: fbf3280c91e53987a6e0e1c2b27bba8128fe3679
parent: 183a3b78908a864d6828a57d059dc4f176ef8a04 (diff)
download: txr-5c0e21892703253846a83d3aed8e61050445c4ab.tar.gz
txr-5c0e21892703253846a83d3aed8e61050445c4ab.tar.bz2
txr-5c0e21892703253846a83d3aed8e61050445c4ab.zip
3 files changed, 240 insertions, 33 deletions
diff --git a/stdlib/infix.tl b/stdlib/infix.tl
index cd6b265f..54d24ca9 100644
--- a/stdlib/infix.tl
+++ b/stdlib/infix.tl
@@ -28,6 +28,8 @@
 (defvarl ifx-ops (hash))
 (defvarl ifx-uops (hash))
 
+(defvar *ifx-env*)
+
 (defstruct (ifx-oper prec sym) nil
   sym
   lispsym
@@ -72,23 +74,23 @@
 (new (ifx-oper 13 'not) arity :prefix assoc :right)
 (new (ifx-oper 13 '!) arity :prefix assoc :right lispsym 'not)
 
-(new (ifx-oper 20 '=))
-(new (ifx-oper 20 '!=) lispsym '/=)
-(new (ifx-oper 20 'eq))
-(new (ifx-oper 20 'eql))
-(new (ifx-oper 20 'equal))
-(new (ifx-oper 20 'neq))
-(new (ifx-oper 20 'neql))
-(new (ifx-oper 20 'nequal))
-
-(new (ifx-oper 25 '<))
-(new (ifx-oper 25 '>))
-(new (ifx-oper 25 '<=))
-(new (ifx-oper 25 '>=))
-(new (ifx-oper 25 'less))
-(new (ifx-oper 25 'greater))
-(new (ifx-oper 25 'lequal))
-(new (ifx-oper 25 'gequal))
+(new (ifx-oper 20 '=) assoc :right)
+(new (ifx-oper 20 '!=) assoc :right lispsym '/=)
+(new (ifx-oper 20 'eq) assoc :right)
+(new (ifx-oper 20 'eql) assoc :right)
+(new (ifx-oper 20 'equal) assoc :right)
+(new (ifx-oper 20 'neq) assoc :right)
+(new (ifx-oper 20 'neql) assoc :right)
+(new (ifx-oper 20 'nequal) assoc :right)
+
+(new (ifx-oper 20 '<) assoc :right)
+(new (ifx-oper 20 '>) assoc :right)
+(new (ifx-oper 20 '<=) assoc :right)
+(new (ifx-oper 20 '>=) assoc :right)
+(new (ifx-oper 20 'less) assoc :right)
+(new (ifx-oper 20 'greater) assoc :right)
+(new (ifx-oper 20 'lequal) assoc :right)
+(new (ifx-oper 20 'gequal) assoc :right)
 
 (new (ifx-oper 26 '|) lispsym 'logior)
 (new (ifx-oper 26 '~) lispsym 'logxor)
@@ -227,6 +229,33 @@
       (infix-error oexp "nodestack extra entries ~s" nodestack))
     (first nodestack)))
 
+(defun-match finish-infix
+  (((@(memq @op '(and or + - * / logand logior logxor))
+     (@op @a0 @a1 . @resta) @b . @restb))
+   (finish-infix ^(,op ,a0 ,a1 ,*resta ,b ,*restb)))
+  (((@(memq @op '(expt and))
+     @a (@op @b0 @b1 . @restb)))
+   (finish-infix ^(,op ,a ,b0 ,b1 ,*restb)))
+  (((@(memq @o0 '#1=(eq eql equal
+                     neq neql nequal . #2=(= /= < > <= >=
+                                           less greater lequal gequal)))
+      @a (@(memq @o1 '#1#) @b . @restb)))
+   (cond
+     ((or (and (atom b) (not (macro-form-p b *ifx-env*)))
+          (if-match (@(meq @nil 'sys:var 'sys:expr) @(atom)) b t))
+      (finish-infix ^(and (,o0 ,a ,b) ,(finish-infix ^(,o1 ,b ,*restb)))))
+     ((with-gensyms (bg)
+        ^(let (,bg)
+          ,(finish-infix ^(and (,o0 ,a (set ,bg ,b))
+                               ,(finish-infix ^(,o1 ,bg ,*restb)))))))))
+  (((and @(sme (@(memq @op '#2#)) nil (@b) a) (@op @b . @restb) . @rest))
+   (finish-infix ^(and (,op ,*(butlast a) ,b ,*restb) ,*rest)))
+  (((and @expr))
+   expr)
+  (((@op . @args))
+   (identity (cons op [mapcar finish-infix args])))
+  ((@else) else))
+
 (defmacro funp (env sym)
  ^(or (fboundp ,sym) (lexical-fun-p ,env ,sym)))
 
@@ -259,7 +288,8 @@
   (cond
     ((eq type-sym :macro) exp)
     ((and (meq type-sym :fun nil) (detect-infix exp env))
-     (parse-infix exp))
+     (let ((*ifx-env* env))
+       (finish-infix (parse-infix exp))))
     ((match-case exp
        ([] exp)
        ([@nil] exp)
diff --git a/tests/012/infix.tl b/tests/012/infix.tl
index b93ca3bb..566b5d25 100644
--- a/tests/012/infix.tl
+++ b/tests/012/infix.tl
@@ -149,3 +149,50 @@
 
 (msstest
   (map (lop / 4) #(10 20 30)) #(2.5 5.0 7.5))
+
+(defmacro mtest-ifx (:form f . pairs)
+  (unless (evenp (len pairs))
+    (compile-error f "even number of arguments required"))
+  ^(mtest ,*(mappend (aret ^((expand '(ifx ,@1)) ,@2)) (tuples 2 pairs))))
+
+(mtest-pif
+  (a eq b) (eq a b)
+  (a nequal b) (nequal a b)
+  (a = b) (= a b)
+  (a eq b eq c) (eq a (eq b c))
+  (a nequal b nequal c) (nequal a (nequal b c))
+  (a = b = c) (= a (= b c))
+  (a eq b = c = d != e
+     = f = g < h < i < j) (eq a
+                              (= b
+                                 (= c
+                                    (/= d
+                                        (= e
+                                           (= f
+                                              (< g
+                                                 (< h
+                                                    (< i
+                                                       j))))))))))
+(mtest-ifx
+  (a eq b) (eq a b)
+  (a nequal b) (nequal a b)
+  (a = b) (= a b)
+  (a eq b eq c) (and (eq a b) (eq b c))
+  (a nequal b nequal c) (and (nequal a b) (nequal b c))
+  (a = b = c) (= a b c)
+  (a eq b = c = d != e = f = g < h < i < j) (and (eq a b) (= b c d) (/= d e)
+                                                 (= e f g) (< g h i j)))
+
+(mtest-pif
+  (a + b) (+ a b)
+  (a + b + c) (+ (+ a b) c)
+  (a + b + c + d) (+ (+ (+ a b) c) d)
+  (a * b * c + d * e + e * f * g) (+ (+ (* (* a b) c)
+                                        (* d e))
+                                     (* (* e f) g)))
+
+(mtest-ifx
+  (a + b) (+ a b)
+  (a + b + c) (+ a b c)
+  (a + b + c + d) (+ a b c d)
+  (a * b * c + d * e + e * f * g) (+ (* a b c) (* d e) (* e f g)))
diff --git a/txr.1 b/txr.1
index 5f024a89..6c44b77d 100644
--- a/txr.1
+++ b/txr.1
@@ -54432,6 +54432,7 @@ In this situation, the remaining arguments may be transformed as described
 below. In this second situation, if no argument transformation takes place,
 then the form is not recognized as "phony infix".
 .RE
+
 In either of the above two cases, additional elements after the function
 symbol (the third and subsequent form elements in case 1, or second and
 subsequent elements in case 2) are taken as an independent expression which is
@@ -54502,6 +54503,7 @@ each other or to an atom, spaces may be elided, as allowed by
 Lisp syntax, exemplified by:
 .codn "(log(a[i][j]++))" .
 
+.ne 28
 .NP* Operator Table
 
 .TS
@@ -54509,28 +54511,28 @@ tab(,);
 l l l l.
 Operators,Class,Precedence,Associativity
 mathfn \f[4]=\f[],prefix,0,right
-\f[4]:= -= += *= /=\f[],infix,1,right,
+\f[4]:= -= += *= /=\f[],infix,1,right
 \f[4]%= &&= ||= |=\f[],,,
 \f[4]~= &= >>= <<=\f[],,,
 \f[4]or ||\f[],infix,2,left
 \f[4]and &&\f[],infix,3,left
 \f[4]not !\f[],prefix,3,right
-\f[4]= eq eql equal\f[],infix,4,left
-\f[4]!= neq neql nequal\f[],,,left
-\f[4]< > <= >=\f[],infix,5,left
+\f[4]= eq eql equal\f[],,,
+\f[4]!= neq neql nequal\f[],,,
+\f[4]< > <= >=\f[],superfix,4,right
 \f[4]less greater\f[],,,
 \f[4]lequal gequal\f[],,,
-\f[4]|\f[],infix,6,left
-\f[4]~\f[],infix,7,left
-\f[4]&\f[],infix,8,left
-\f[4]<< >>\f[],infix,9,left
-\f[4]+ -\f[],infix,10,left
-\f[4]+ -\f[],prefix,11,right
-\f[4]* / %\f[],infix,12,left
-\f[4]**\f[],infix,139,right
-\f[4]++ --\f[],prefix,14,right
-\f[4]++ --\f[],postfix,15,left
-e1\f[4][\f[]e2\f[4]]\f[],postfix,16,left-to-right
+\f[4]|\f[],infix,5,left
+\f[4]~\f[],infix,6,left
+\f[4]&\f[],infix,7,left
+\f[4]<< >>\f[],infix,8,left
+\f[4]+ -\f[],infix,9,left
+\f[4]+ -\f[],prefix,10,right
+\f[4]* / %\f[],infix,11,left
+\f[4]**\f[],infix,12,right
+\f[4]++ --\f[],prefix,13,right
+\f[4]++ --\f[],postfix,14,left
+e1\f[4][\f[]e2\f[4]]\f[],postfix,15,left-to-right
 mathfn\f[4](\f[]...\f[4])\f[],,,
 .TE
 
@@ -54716,6 +54718,134 @@ first transforms to
 and then by the same rule to
 .codn "[[a (i)] (j + 1)]" .
 
+.NP* Compound Relational Expressions
+
+The relational operators at precedence level 4 are
+designated as
+.I superfix .
+These operators are parsed as right-associative infix
+operators, after which the resulting abstract syntax
+undergoes a linearizing transformation which turns
+combinations of adjacent relational operators into
+compound relations.
+
+For instance, the the element sequence
+
+.verb
+  a = b = c = d != e = f = g = i < j
+.brev
+
+first parses into the expression
+
+.verb
+  (= a (= b (= c (= d (/= e (= f (= g (< i j))))))))
+.brev
+
+after which this structure is recognized, and transformed into
+
+.verb
+  (and (= a b c d)
+       (/= d e)
+       (= e f g i)
+       (< i j))
+.brev
+
+which has a completely different, and useful meaning.
+
+Note that in this depiction of the transformation, the terms
+.codn d ,
+.code e
+and
+.code i
+appear twice. In the actual implementation, care is taken not
+to evaluate such repeated terms multiple times.
+
+Parentheses may be used to selectively override parts of this transformation.
+In the following example, all three expressions have different semantics:
+
+.verb
+  a eq b eq c    ->  (and (eq a b) (eq b c))
+  (a eq b) eq c  ->  (eq (eq a b) c)
+  a eq (b eq c)  ->  (eq a (eq b c))
+.brev
+
+The first expression tests whether all three variables
+.codn a ,
+.code b
+and
+.code c
+hold the same object. The second tests whether
+.code c
+holds a truth value corresponding to whether
+.code a
+and
+.code b
+are equal. Similarly, the third expression tests whether
+.code a
+holds a truth value corresponding to whether
+.code b
+and
+.code c
+are equal.
+
+The transformation takes advantage of some of the operators being
+variadic. For instance, since
+.code =
+is variadic,
+.code "a = b = c"
+becomes
+.codn "(= a b c)" .
+However since
+.code eq
+is not variadic,
+.code "a eq b eq c"
+becomes
+.codn "(and (eq a b) (eq b c))" .
+This makes a difference to evaluation semantics because in the expression
+.codn "(= a b c)" ,
+.code c
+is evaluated even if
+.code a
+and
+.code b
+are not equal; all three argument expressions are evaluated before the
+.code =
+function is invoked. If the coalescing were not performed, leaving the
+expressions as
+.codn "(and (= a b) (= b c))" ,
+then
+.code c
+would not be evaluated upon
+.code a
+and
+.code b
+being found unequal. Code which requires the short-circuiting behavior
+must use the expression
+.codn "a = b and b = c" .
+Short-circuiting behavior may be relied on for operators which correspond
+to strictly binary functions, and in all situations in which different
+relational operators are compounded:
+.code "a = b < c"
+relies on calls to two functions;
+.code c
+is not evaluated and
+.code <
+is not invoked if the
+.code =
+comparison fails. The following relational operators correspond to strictly
+binary functions:
+.codn eq ,
+.codn eql ,
+.codn equal ,
+.codn neq ,
+.code neql
+and
+.codn nequal ,
+such that adjacent combinations of them exhibit short-circuit behavior,
+as if they were dissimilar operators.  All others correspond to variadic
+functions, such that when compounds are produced using the same operator,
+short-circuit evaluation doesn't take place among the operands of the compound.
+
 .NP* Precedence Demotion Rule
 
 The algorithm used by \*(TL's
author	Kaz Kylheku <kaz@kylheku.com>	2025-05-01 06:41:24 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2025-05-01 06:41:24 -0700
commit	5c0e21892703253846a83d3aed8e61050445c4ab (patch)
tree	fbf3280c91e53987a6e0e1c2b27bba8128fe3679
parent	183a3b78908a864d6828a57d059dc4f176ef8a04 (diff)
download	txr-5c0e21892703253846a83d3aed8e61050445c4ab.tar.gz txr-5c0e21892703253846a83d3aed8e61050445c4ab.tar.bz2 txr-5c0e21892703253846a83d3aed8e61050445c4ab.zip