aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2022-04-04 07:43:11 -0700
committerKaz Kylheku <kaz@kylheku.com>2022-04-04 07:43:11 -0700
commitfc1c70004db5ebf8e4124ee1bc655ad0358e8a6f (patch)
tree08174787212d59fe3779a35b3bb66ca0ec948f78
parent60095ceba927559a1ed226f8923327a1bb52f5c8 (diff)
downloadcppawk-fc1c70004db5ebf8e4124ee1bc655ad0358e8a6f.tar.gz
cppawk-fc1c70004db5ebf8e4124ee1bc655ad0358e8a6f.tar.bz2
cppawk-fc1c70004db5ebf8e4124ee1bc655ad0358e8a6f.zip
Starting man page for <cons.h>.
-rw-r--r--cppawk-cons.1775
1 files changed, 775 insertions, 0 deletions
diff --git a/cppawk-cons.1 b/cppawk-cons.1
new file mode 100644
index 0000000..5cd6c82
--- /dev/null
+++ b/cppawk-cons.1
@@ -0,0 +1,775 @@
+.\" cppawk: C preprocessor wrapper around awk
+.\" Kaz Kylheku <kaz@kylheku.com>
+.\"
+.\" BSD-2 License
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright notice,
+.\" this list of conditions and the following disclaimer.
+.\"
+.\" 2. Redistributions in binary form must reproduce the above copyright notice,
+.\" this list of conditions and the following disclaimer in the documentation
+.\" and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.de bk
+.IP " "
+.PP
+..
+.TH CPPAWK-CONS 1 "29 March 2022" "cppawk Libraries" "Cons Cells"
+
+.SH NAME
+cons \- Lisp-like data representation and control flow macros
+
+.SH SYNOPSIS
+
+.ft B
+ #include <cons.h>
+
+ // Basic control-flow macros
+
+ progn(...) // eval multiple expressions, yield last
+ prog(...) // eval multiple expressions, yield 1
+ and(...) // short circuit and; yields nil or last expr
+ or(...) // short-circuit or: yields first true expr
+
+ // Lisp-like data structuring
+
+ nil // empty list; Boolean false.
+ consp(x) // is x a cons cell?
+ atom(x) // is x an atom?
+ null(x) // is x the nil object?
+ endp(x) // true if x is cons, false if nil, else error
+
+ numberp(x) // true if x is a number
+ stringp(x) // true if x is a boxed string
+ symbolp(x) // true if x is a boxed string
+
+ box(av) // convert Awk number or string Lisp value.
+ unbox(lv) // convert Lisp value to Awk number or string.
+ box_sym(av) // create Lisp symbol named av
+
+ cons(a, d) // create cons cell with car = a and cdr = d.
+ car(x) // retrieve car of cons cell x.
+ cdr(x) // retrieve cdr of cons cell x.
+
+ sexp(x) // convert Lisp value to S-expression string
+
+ equal(x, y) // test whether two Lisp values are equal
+
+ list(...) // return argument values as a Lisp list
+ append(...) // append list arguments; last may be atom
+ li(...) // inline macro version of list
+ listar(...) // Lisp's list*, implemented as a macro
+
+ member(y, x) // first suffix of list x starting with y
+
+ position(y, x) // zero-based position of y in list x
+
+ nth(i, x) // zero-based i-th item from list x
+
+ nthcdr(i, x) // suffix of x starting at i-th item
+
+ reverse(x) // reverse list x
+
+ iota(x, y[, d]) // numbers from x to y, incrementing by
+
+ uniq(x) // list x deduplicated
+
+ mapcar(f, x) // map list through function f
+
+ mappend(f, x) // map list through f, append results
+
+ // array -> list conversion
+
+ atol(x) // convert values of Awk array a to list
+ keys(x) // return list of keys of Awk array x
+
+ // field <-> list conversion
+
+ ftol(x) // convert Awk positional fields to list
+ ltof(x) // set Awk positional fields from list x
+
+ // list iteration
+
+ dolist(item, list)
+ statement
+
+ dolisti(item, index, list)
+ statement
+
+ // push and pop
+
+ push(y, x) // push item y onto x, updating location x
+ pop(x) // pop item from list x, updating x
+
+ // procedural list construction
+
+ bag = list_create()
+ bag = list_add(bag, item1)
+ bag = list_add(bag, item2)
+ list = list_end(bag)
+
+ // bags macro: collect into multiple bags that become lists
+
+ bags (b1, b2, ...) { bag(b1, value) ... }
+.ft R
+
+.SH OVERVIEW
+
+Due to the data structuring limitations of the Awk language, the
+.B cppawk
+representation of Lisp-like data structures is only a sham built
+on character strings. The term
+.I "mock Lisp"
+is sometimes given to this kind of phony, but functional, imitation of Lisp.
+The term is due to James Gosling, who in the early 1980's implemented a
+language actually called "Mock Lisp" in support of a text editor. Mock Lisp
+treated character strings containing words and parentheses as if they were
+nested lists.
+
+.BR cppawk 's
+mock Lisp data structures do not internally use parentheses but
+.B are
+nevertheless implemented using the string data type. Each mock Lisp value
+is an Awk character string. The exact specification for how this works is
+given in the BOXED VS. UNBOXED section below.
+
+Rationale: why the character strings is used as the basis is that it is
+the only aggregate data structure that Awk can pass into functions as an
+argument, and return out of functions. The only two other aggregate structures
+in Awk are the associative array, and the positional fields. The positional
+fields are a kind of global array that exists as a single instance accessed
+by the
+.B $
+operator together with a numeric argument. Even if this somehow were useful
+to an implementor of Lisp data structures, the plan would be foiled by
+the requirement that the Awk application has full control and use of the
+positional parameters. The associative array seems more useful, but though
+arrays can be passed
+.B into
+functions, they cannot be returned. Moreover, arrays are never anonymous
+in Awk; they are always stored in a named variable.
+
+Other Lisp data structuring imitations in Awk have been written, which
+typically use a global array to simulate a Lisp heap, with reference semantics,
+garbage collection and all. The goal of
+.BR cppawk 's
+.B cons
+library is not to create a Lisp interpreter within Awk (and there isn't one),
+but to enhance Awk programming with Lisp-inspired List processing which
+seamlessly integrates with existing Awk programming idioms.
+
+Given what it is, and how it is implemented, the library provides Lisp-like
+list processing of decent fidelity. It replicates the cons cell abstraction:
+it features lists made of cons cells, terminated by a
+.B nil
+symbol.
+
+.SH BOXED VS. UNBOXED
+
+The
+.B cons
+library flexibly handles two kinds of data:
+.I boxed
+values ("Lisp objects")
+and
+.I unboxed
+values ("Awk values").
+
+Certain kinds of values only exist in the boxed representation. Awk has no
+native cons data type, or symbol type; so these only exist as boxed
+representations.
+
+Numbers exist only in the unboxed representation; nothing special is done
+with Awk numbers to incorporate them into a Lisp structure such as a list;
+their character string image is stored. Awk numbers already have a string
+nature, so packing them as strings into a larger string is natural to Awk.
+
+In the boxed representation, every object is a string whose first character
+is a type code. The rest of the string has a meaning which depends on the
+type code.
+
+There are currently three type codes:
+.IP T
+The type code letter
+.B T
+stands for "text": it denotes a character string. The characters after
+the T specify the string data.
+.IP S
+The type code
+.B S
+denotes a symbol; the characters after the type code are the symbol name.
+.IP C
+The type code letter
+.B C
+denotes a cons cell. This has a more complicated structure than
+.B T
+or
+.BR S .
+The
+.B C
+is immediately followed by a header consisting of four items:
+a non-negative decimal integer, a comma, another non-negative decimal
+integer, and a colon. More data may follow after the colon.
+The first integer gives the length, in characters, of the cons cell's
+.I car
+object. The second integer gives the length, in characters, of the
+cons cell's
+.I cdr
+object. Thus, it is clear, that a "cons cell" in
+.B cppawk
+is not actually a heap-allocated node with pointers to other objects,
+but a string which entirely contains the objects. The list
+.BR "(1 2 3)" ,
+for instance, gets represented by the character string
+.BR "C1,12:1C1,6:2C1,0:3" .
+The string fully describes it; there is no part of the list stored
+elsewhere. Three
+.BR C 's
+appear in the string, because the list has tree items and thus three cons
+cells.
+.B "C:1,12"
+means that the first
+.I car
+is one character long, and the rest of the list is
+12 characters long. That one-character-long
+.I car
+is the
+.B 1
+that immediately follows the colon after the length 12. The rest of the
+list,
+.BR "(2 3)" ,
+is then the
+.B "C1,6:2C1,0:3"
+part. Here, again, there is a one-character-long
+.I car
+which is
+.B 2
+and then the six-character rest of the list
+.BR C1,0:3 .
+Here is where things get interesting. The
+.I car
+of the last cell is 3. Curiously, the length of the
+.I cdr
+is zero, and nothing appears after the 3. The reason for this is that the
+list is terminated by the
+.B nil
+object. The
+.B nil
+object has zero length because in
+.BR cppawk ,
+.B nil
+is represented by the empty string.
+.IP U
+The
+.B U
+type code represents the boxed version of the Awk undefined value,
+such as the value of an undefined variable. Application code which needs
+to reliably preserve undefinedness of a value through Lisp operations
+should
+.B box
+and
+.B unbox
+it.
+.PP
+It should be obvious that because the cons cell representation uses
+a length + data encoding, a cons cell can store any pair of Awk values,
+whether they are boxed or unboxed. For instance,
+
+.ft B
+ cons("C3,5:d", 4)
+.ft R
+
+works perfectly well; and if the
+.B car
+function is applied to the result, it will yield the string
+.BR "\(dqC3,5:d\(dq" .
+Note that this string also looks like a corrupt cons cell: it has the
+.B C
+type code followed by length fields, but the data portion is insufficiently
+long. This will only be a problem if the application expects that
+the
+.I car
+of the cell is a boxed Lisp object, and treats it as such: for instance
+by trying to perform some list operation on it. It's up to the application to
+put a boxed value into a cons cell, if it expects to retrieve one.
+
+.SH TREATMENT OF BOOLEAN VALUES
+In Lisp, how Boolean truth works it that the
+.B nil
+object is false, and every other object is true.
+Recall that
+.B nil
+also serves as the empty list; so empty lists are "falsy", and non empty lists
+"truthy".
+
+In the
+.B cppawk
+mock Lisp system, this is adjusted to fit Awk semantics.
+
+In Awk, three possible values are false:
+.IP 1.
+The undefined value, such as the value of a variable that has never
+been assigned, or a function parameter that was never passed,
+.IP 2.
+The empty string.
+.IP 3.
+The number zero.
+.PP
+The mock Lisp system adopts these same conventions in order to integrate
+with Awk. One of these values is chosen as the symbol
+.B nil
+and that is the empty string. This is defined as a macro:
+
+.ft B
+ #define nil ""
+.ft R
+
+By empty string, we here mean the empty Awk string. The empty Lisp
+string is represented as the one-character-long Awk string
+.BR \(dqT\(dq ,
+which is not false.
+
+Note that the boxed undefined value tests true, not false.
+
+.SH CONTROL FLOW PRIMITIVES
+
+The control flow primitives are macros patterned after similar
+macros found in some Lisp dialects.
+
+.SS Macros \fIprog\fP and \fIprogn\fP
+.bk
+.B Syntax:
+
+.ft B
+ prog(expr1, expr2, ...)
+ progn(expr1, expr2, ...)
+.ft R
+
+.B Description:
+
+The
+.B prog
+and
+.B progn
+macros evaluate all their argument forms from left to right.
+
+The
+.B prog
+macro evaluates one or more expressions
+.IR expr1 ,
+.IR expr2 ,
+\... and yields the value 1 as its result.
+
+The
+.B progn
+macro evaluates one or more expressions
+.IR expr1 ,
+.IR expr2 ,
+\... and yields the value of the last one as the result.
+
+.B Example:
+
+.ft B
+ // simulate missing comma operator in Awk
+
+ for (prog(i = 0, j = 0);
+ i < N;
+ prog(i++, j += i))
+ {
+ }
+
+ // Write a macro swap() that can be used anywhere
+ // where an expression can be used, and returns the
+ // prior value of a.
+
+ #define swap(a, b, temp) (progn(temp = a, a = b, b = temp))
+.ft R
+
+.SS Macros \fIand\fP and \fIor\fP
+.bk
+.B Syntax:
+
+.ft B
+ and(expr1, expr2, ...)
+ or(expr1, expr2, ...)
+.ft R
+
+.B Description:
+
+The
+.B and
+and
+.B or
+macros evaluate their argument expressions from left to right.
+
+The
+.B and
+macro stops evaluating when one of the expressions yields a
+false value, and yields that value. If all expressions yield
+a true value, then
+.B and
+yields the value of the last expression.
+
+The
+.B or
+macro stops evaluating when one of the expressions yields a true value,
+and yields that value. The remaining expressions are not evaluated.
+If
+.B or
+reaches the last expression, then it yields that expression's value.
+
+.B Examples:
+
+.ft B
+ BEGIN { print or(0, "", nil, 3, 4) } # output is 3
+
+ BEGIN { print and(1, 2, 3, 4) } # output is 4
+
+ BEGIN { print and(0, 2, 3, 4) } # output is 0
+
+ BEGIN { print and(1, "", 3, 4) } # output same as print ""
+.ft R
+
+.SH DATA REPRESENTATION LIBRARY
+
+In the following descriptions, the notations
+.IB X => Y
+and
+.IB X -> Y
+denote that the expression
+.I X
+returns the value
+.IR Y ,
+
+The
+.B =>
+notation indicates that
+.I Y
+is being given as a native Awk value.
+
+The
+.B ->
+notation indicates that
+.I Y
+is a boxed Lisp value being shown in Lisp syntax:
+
+.B Examples:
+
+.ft B
+ cons(1, 2) -> (1 . 2)
+
+ cons(1, 2) => "C1,1:12"
+.ft R
+
+.SS Macro \fInil\fP
+.bk
+.B Syntax:
+
+.ft B
+ nil
+.ft R
+
+.B Description:
+
+The
+.B nil
+macro expands to the empty string
+.BR \(dq\(dq .
+it is the representation of the empty list, and behaves as a Boolean
+false, along with zero.
+
+.SS Functions \fIconsp\fP and \fIatom\fP
+.bk
+.B Syntax:
+
+.ft B
+ consp(x)
+ atom(x)
+.ft R
+
+.B Description:
+The
+.B consp
+function returns 1 if
+.I x
+is a cons cell, otherwise 0.
+
+The
+.B atom
+function is the negation of
+.BR consp :
+it returns 0 is a cons, otherwise 1.
+Any object that is not a cons is classified as an atom.
+
+.SS Functions \fInull\fP and \fIendp\fP
+.bk
+.B Syntax:
+
+.ft B
+ null(x)
+ endp(x)
+.ft R
+
+.B Description:
+The
+.B null
+function returns 1 if, and only if,
+.I x
+is the
+.B nil
+object (which is the empty string).
+Otherwise it returns 1.
+
+The
+.B endp
+function returns 1 if
+.I x
+is the
+.B nil
+object. If
+.I x
+is a cons, then it returns zero.
+If
+.I x
+is any other object (and thus, an atom other than
+.BR nil )
+the function prints a diagnostic and terminates.
+
+The purpose of
+.B endp
+is to provide a termination test for code that iterates
+over lists, with error checking that detects improper
+lists. Improper lists are lists that end in an atom
+other than the empty list
+.BR nil .
+
+.SS Functions \fInumberp\fP, \fIstringp\fP and \fIsymbolp\fP
+.bk
+.B Syntax:
+
+.ft B
+ numberp(x)
+ stringp(x)
+ symbolp(x)
+.ft R
+
+.B Description:
+
+These functions test, respectively, whether the object
+.B x
+is a number, string or symbol, returning 1 to indicate true,
+0 to indicate false.
+
+An object is a string if, and only if, it is a boxed string. See the
+.B box
+function.
+Thus,
+.BR stringp( \(dqabc\(dq )
+returns zero. Code not working with boxed objects shouldn't rely on this
+function and instead use
+.B numberp
+to distinguish numbers from non-numbers.
+
+.B Examples:
+
+.ft B
+ numberp(3) -> 1
+ numberp(0) -> 1
+ numberp("") -> 0
+ numberp("abc") -> 0
+ numberp(cons(1, 2)) -> 0
+
+ stringp("") -> 0 // "" is the object nil
+ stringp("abc") -> 0 // not a boxed string
+ stringp(box("abc")) -> 1
+ stringp("Tabc")) -> 1 // manually boxed "abc"
+
+ symbolp(nil) -> 1 // nil is a symbol
+ symbolp("") -> 1 // indistinguishable from nil
+ symbolp(3) -> 0 // numbers are not symbols
+ symbolp("abc") -> 0 // not a symbol
+ symbolp("Sabc") -> 1 // manually produced symbol abc
+.ft R
+
+.SS Functions \fIbox\fP, \fIunbox\fP and \fIbox_sym\fP
+.bk
+.B Syntax:
+
+.ft B
+ box(av)
+ unbox(lv)
+ box_sym(av)
+.ft R
+
+.B Description:
+
+The
+.B box
+function creates a Lisp object from a native Awk value
+.IR av .
+If
+.I av
+is a number, then
+.B box
+returns
+.IR av .
+If
+.I av
+is the Awk undefined value, such as the value of a variable that
+has never been assigned, then
+.B box
+returns a boxed representation of the undefined value.
+Otherwise
+.B box
+returns a boxed string representation of
+.IR av .
+
+The
+.B unbox
+function recovers the Awk value from the Lisp object
+.IR lv .
+If
+.I lv
+is a number, then
+.B unbox
+returns
+.IR lv .
+If
+.I lv
+is a boxed string, then
+.B unbox
+returns the plain Awk string.
+If
+.I lv
+is a symbol, then
+.B unbox
+returns its name.
+
+For any other value,
+.B unbox
+prints a diagnostic message and terminates the process.
+
+The
+.B box_sym
+function boxes an Awk value
+.I av
+as a symbol. The string representation of
+.I av
+becomes the symbol's name. The string \fB"nil"\fP boxes as the
+.B nil
+symbol, and not as \f"B"Snil"\fP.
+
+.B Examples:
+
+.ft B
+ box(0.707) => 0.707
+ box("") => "T"
+ box("abc") => "Tabc"
+ box(undefined_var) => "U"
+
+ unbox(nil) => "nil" // name of symbol nil is "nil"
+ unbox(box("abc")) => "abc"
+ unbox(3.14) -> 3.14
+ unbox(symbol("abc")) => "abc"
+ unbox("xyz") => ;; error
+ unbox("Txyz") => "xyz" // T type code indicates boxed string
+
+ box_sym("") => "S" // symbol with empty string name
+ box_sym(3.14) => "S3.14" // the symbol 3.14 (not a number)
+ box_sym("abc") => "Sabc" // the symbol abc
+ box_sym("nil") => "" -> nil // "nil" is the symbol nil
+.ft R
+
+.SS Functions \fIcons\fP, \fIcar\fP and \fIcdr\fP
+.bk
+.B Syntax:
+
+.ft B
+ cons(a, d)
+ car(c)
+ cdr(c)
+.ft R
+
+.B Description
+
+The
+.B cons
+function constructs and returns a binary pair object called
+.I "cons cell"
+or just
+.IR "cons" .
+The cons holds the two argument values in two fields called
+.I car
+and
+.IR cdr .
+
+The arguments may be any values: any
+combination of boxed or unboxed objects.
+
+The
+.B car
+function returns the
+.I car
+field of its cons cell argument.
+
+Likewise, the
+.B cdr
+function returns the
+.I cdr
+field of its cons cell argument.
+
+The
+.B car
+and
+.B cdr
+functions may be given the
+.B nil
+symbol as an argument instead of a cons, in which case they return
+.BR nil .
+
+.B Examples:
+
+.ft B
+ cons(1, 2) => "C1,1:12" -> (1 . 2)
+
+ car(cons(1, 2)) -> 1
+ cdr(cons(1, "abc")) => "abc"
+
+ // Below, abc and def are assumed to be unassigned.
+
+ // Without boxing, undefined gets treated as nil.
+
+ cons(abc, def) => "C0,0:" -> (nil . nil)
+ car(cons(abc, def)) => "" -> nil
+
+ // Boxing passes through and recovers Awk undefined value
+
+ cons(box(abc), box(def)) => "C1,1:UU" -> (#U . #U)
+ car(cons(box(abc), box(def))) => ;; Awk undefined value
+.ft R
+
+.SH "SEE ALSO"
+
+cppawk(1)
+
+.SH BUGS
+
+.SH AUTHOR
+Kaz Kylheku <kaz@kylheku.com>
+
+.SH COPYRIGHT
+Copyright 2022, BSD2 License.