From 5275930bb59eaaf60143ef83e4c2f17bc19b25ed Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 22 Sep 2016 06:45:36 -0700 Subject: Regexes now callable as functions. * lib.c (generic_funcall): Add case for regexes. Handle arguments in such a way that the string is always rightmost, with a view to convenient partial application. * txr.1: Documented in multiple places. --- lib.c | 16 +++++++++++ txr.1 | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 110 insertions(+), 6 deletions(-) diff --git a/lib.c b/lib.c index 20ffd069..245f49c5 100644 --- a/lib.c +++ b/lib.c @@ -5357,6 +5357,22 @@ val generic_funcall(val fun, struct args *args_in) default: callerror(fun, lit("too many arguments")); } + } else if (fun->co.cls == regex_s) { + bug_unless (args->argc >= ARGS_MIN); + args_normalize(args, 3); + + switch (args->fill) { + case 0: + callerror(fun, lit("missing required arguments")); + case 1: + return search_regst(z(args->arg[0]), fun, nil, nil); + case 2: + return search_regst(z(args->arg[1]), fun, z(args->arg[0]), nil); + case 3: + return search_regst(z(args->arg[2]), fun, z(args->arg[0]), z(args->arg[1])); + default: + callerror(fun, lit("too many arguments")); + } } else if (structp(fun)) { fun = method(fun, lambda_s); break; diff --git a/txr.1 b/txr.1 index 0256d039..c7c9b8df 100644 --- a/txr.1 +++ b/txr.1 @@ -10759,12 +10759,13 @@ they produce lazy lists. .SS* Callable Objects -In \*(TL, sequences (strings, vectors and lists) and hashes can be used -as functions everywhere, not just with the DWIM brackets. Sequences work -as one or two-argument functions. With a single argument, an element is -selected by position and returned. With two arguments, a range is extracted and -returned. Hashes also work as one or two argument functions, corresponding -to the arguments of the gethash function. +In \*(TL, sequences (strings, vectors and lists) as well as hashes and +regular expressions can be used as functions everywhere, not just with the DWIM +brackets. + +Sequences work as one or two-argument functions. With a single argument, an +element is selected by position and returned. With two arguments, a range is +extracted and returned. Moreover, when a sequence is used as a function of one argument, and the argument is a range object rather than an integer, then the call is equivalent @@ -10773,6 +10774,14 @@ to the two-argument form. This is the basis for array slice syntax like ["abc" 0..1] . .cble +Hashes also work as one or two argument functions, corresponding to the +arguments of the gethash function. + +A regular expression behaves as a one, two, or three argument function, which +operates on a string argument. +It returns the leftmost matching substring, or else +.codn nil . + .B Example 1: .cblk @@ -10823,6 +10832,18 @@ select function, as if .code "(select '(1 2 3 4) '(0 2))" were called. +.B Example 4: + +.cblk + (call #/b./ "abcd") -> "bc" +.cble + +Here, the regular expression, called as a function, finds the matching +substring +.str bc +within the argument +.strn abcd . + .SS* Special Variables Similarly to Common Lisp, \*(TL is lexically scoped by default, but also has dynamically scoped (a.k.a "special") variables. @@ -12525,6 +12546,33 @@ if there is no such entry. The expression .meta alt is always evaluated, whether or not its value is used. +.meIP >> [ regex >> [ start <> [ from-end ]] < string ] +Determine whether regular expression +.meta regex +matches +.metn string , +and in that case return the +(possibly empty) leftmost matching substring. +Otherwise, return +.codn nil . + +If +.meta start +is specified, it gives the starting position where +the search begins, and if +.meta from-end +is given, and has a value other than +.codn nil , +it specifies a search from right to left. These optional +arguments have the same conventions and semantics as +their equivalents in the +.code search-regst +function. + +Note that +.meta string +is always required, and is always the rightmost argument. + .RE .PP @@ -31676,6 +31724,46 @@ non-deterministically: .cble .SS* Regular Expression Library +.NP* Regular Expressions as Functions +.synb +.mets >> [ regex >> [ start <> [ from-end ]] < string ] +.syne +.desc +A regular expression is callable as a function in \*(TL. +When used this way, it requires a string argument. It searches +the string for the leftmost match for itself, and returns +the matching substring, which could be empty. If no match is +found, it returns +.codn nil . + +A regex takes one, two, or three arguments. The required +.meta string +is always the rightmost argument. This allows for convenient +partial application of the optional arguments using +macros in the +.code op +family, and macros in which the +.code op +syntax is implicit. + +The optional arguments +.meta start +and +.meta from-end +are treated exactly as their like-named counterparts in the +.code search-regst +function. + +.TP* Example: +Keep those elements from a list of strings which match +the regular expression +.codn #/a.*b/ : + +.cblk + (keep-if #/a.*b/ '#"abracadabra zebra hat adlib adobe deer") + --> ("abracadabra" "adlib" "adobe") +.cble + .coNP Functions @ search-regex and @ range-regex .synb .mets (search-regex < string < regex >> [ start <> [ from-end ]]) -- cgit v1.2.3