diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-03-09 18:14:47 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-03-09 18:14:47 -0700 |
commit | e8d08d737e213f3a3a85002220762b512afbda92 (patch) | |
tree | f34b447f2b2f41732ea69c1f00cbd11c3a6e4eb6 | |
parent | 365ce154f09ee833ab208ea7cf50b25e5705b4ce (diff) | |
download | txr-e8d08d737e213f3a3a85002220762b512afbda92.tar.gz txr-e8d08d737e213f3a3a85002220762b512afbda92.tar.bz2 txr-e8d08d737e213f3a3a85002220762b512afbda92.zip |
Expose brace expansion bexp function.
* autoload.c (glob_set_entries): Remove autoload on
sys:brace-expand. Add usr:exp.
* stdlib/glob.tl (brace-expand): Renamed to usr:bexp.
(glob*): Call bexp rather than brace-expand.
* tests/018/glob.tl: Rename references to sys:brace
expand to bexp.
* txr.1: Add section describing the bexp function.
Move brace expansion documentation from glob* to this
new section, adjusting the wording a little bit, mainly
to avoid referring to "patterns". Point glob* documentation
to bexp, which also in turn references glob*.
-rw-r--r-- | autoload.c | 7 | ||||
-rw-r--r-- | stdlib/glob.tl | 4 | ||||
-rw-r--r-- | tests/018/glob.tl | 74 | ||||
-rw-r--r-- | txr.1 | 319 |
4 files changed, 229 insertions, 175 deletions
@@ -985,15 +985,10 @@ static val csort_instantiate(void) static val glob_set_entries(val fun) { - val sys_name[] = { - lit("brace-expand"), - nil - }; val name[] = { - lit("glob*"), + lit("bexp"), lit("glob*"), nil }; - autoload_sys_set(al_fun, sys_name, fun); autoload_set(al_fun, name, fun); return nil; } diff --git a/stdlib/glob.tl b/stdlib/glob.tl index 4b132107..75e6afa3 100644 --- a/stdlib/glob.tl +++ b/stdlib/glob.tl @@ -25,7 +25,7 @@ ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ;; POSSIBILITY OF SUCH DAMAGE. -(defun brace-expand (str) +(defun usr:bexp (str) (bexp-expand (bexp-parse str))) (defstruct bexp-parse-ctx () @@ -122,6 +122,6 @@ (if (or (logtest flags glob-xnobrace) (null (find-if (op find #\{) patterns))) (glob patterns xflags) - (let ((xpatterns [mappend brace-expand patterns])) + (let ((xpatterns [mappend bexp patterns])) (append-each ((p xpatterns)) (glob p xflags)))))) diff --git a/tests/018/glob.tl b/tests/018/glob.tl index e08179f4..1a61b775 100644 --- a/tests/018/glob.tl +++ b/tests/018/glob.tl @@ -5,92 +5,92 @@ (exit)) (mtest - (sys:brace-expand "~/{Downloads,Pictures}/*.{jpg,gif,png}") + (bexp "~/{Downloads,Pictures}/*.{jpg,gif,png}") #"~/Downloads/*.jpg ~/Downloads/*.gif ~/Downloads/*.png \ ~/Pictures/*.jpg ~/Pictures/*.gif ~/Pictures/*.png" - (sys:brace-expand "It{{em,alic}iz,erat}e{d,}, please.") + (bexp "It{{em,alic}iz,erat}e{d,}, please.") ("Itemized, please." "Itemize, please." "Italicized, please." "Italicize, please." "Iterated, please." "Iterate, please.") - (sys:brace-expand "{,{,gotta have{ ,\\, again\\, }}more }cowbell!") + (bexp "{,{,gotta have{ ,\\, again\\, }}more }cowbell!") ("cowbell!" "more cowbell!" "gotta have more cowbell!" "gotta have\\, again\\, more cowbell!") - (sys:brace-expand "{}} some }{,{\\\\{ edge, edge} \\,}{ cases, {here} \\\\\\\\\\}") + (bexp "{}} some }{,{\\\\{ edge, edge} \\,}{ cases, {here} \\\\\\\\\\}") ("{}} some }{,{\\\\ edge \\,}{ cases, {here} \\\\\\\\\\}" "{}} some }{,{\\\\ edge \\,}{ cases, {here} \\\\\\\\\\}")) (mtest - (sys:brace-expand "{..}") + (bexp "{..}") ("") - (sys:brace-expand "{..a}") + (bexp "{..a}") ("{..a}") - (sys:brace-expand "{a..}") + (bexp "{a..}") ("{a..}") - (sys:brace-expand "{a..b}") + (bexp "{a..b}") #"a b" - (sys:brace-expand "{aa..cc}") + (bexp "{aa..cc}") #"aa ab ac ba bb bc ca cb cc" - (sys:brace-expand "x{aa..cc}") + (bexp "x{aa..cc}") #"xaa xab xac xba xbb xbc xca xcb xcc" - (sys:brace-expand "{aa..cc}y") + (bexp "{aa..cc}y") #"aay aby acy bay bby bcy cay cby ccy" - (sys:brace-expand "x{aa..cc}y") + (bexp "x{aa..cc}y") #"xaay xaby xacy xbay xbby xbcy xcay xcby xccy" - (sys:brace-expand "x{aa..ccc}y") + (bexp "x{aa..ccc}y") #"x{aa..ccc}y" - (sys:brace-expand "x{a..b..c}y") + (bexp "x{a..b..c}y") #"x{a..b..c}y" - (sys:brace-expand "x{a..b..0}y") + (bexp "x{a..b..0}y") #"x{a..b..0}y" - (sys:brace-expand "x{a..b..1}y") + (bexp "x{a..b..1}y") #"xay xby" - (sys:brace-expand "x{aa..cc..2}y") + (bexp "x{aa..cc..2}y") #"xaay xacy xbby xcay xccy" - (sys:brace-expand "x{aa..cc..2..}y") + (bexp "x{aa..cc..2..}y") #"x{aa..cc..2..}y" - (sys:brace-expand "x{aa..cc..{2,3}..}y") + (bexp "x{aa..cc..{2,3}..}y") #"x{aa..cc..2..}y x{aa..cc..3..}y") (mtest - (sys:brace-expand "{0..0}") + (bexp "{0..0}") #"0" - (sys:brace-expand "{0..1}") + (bexp "{0..1}") #"0 1" - (sys:brace-expand "{9..11}") + (bexp "{9..11}") #"9 10 11" - (sys:brace-expand "{9..11..2}") + (bexp "{9..11..2}") #"9 11" - (sys:brace-expand "{9..11..3}") + (bexp "{9..11..3}") #"9" - (sys:brace-expand "{09..11}") + (bexp "{09..11}") #"09 10 11" - (sys:brace-expand "{997..1001}") + (bexp "{997..1001}") #"997 998 999 1000 1001" - (sys:brace-expand "{0997..1001}") + (bexp "{0997..1001}") #"0997 0998 0999 1000 1001" - (sys:brace-expand "{1..0}") + (bexp "{1..0}") #"1 0" - (sys:brace-expand "{11..9}") + (bexp "{11..9}") #"11 10 9" - (sys:brace-expand "{11..9..2}") + (bexp "{11..9..2}") #"11 9" - (sys:brace-expand "{11..9..3}") + (bexp "{11..9..3}") #"11" - (sys:brace-expand "{11..09}") + (bexp "{11..09}") #"11 10 09" - (sys:brace-expand "{1001..997}") + (bexp "{1001..997}") #"1001 1000 999 998 997" - (sys:brace-expand "{1001..0997}") + (bexp "{1001..0997}") #"1001 1000 0999 0998 0997") (mtest - (sys:brace-expand "{a..c}{-,+}{1..2}") + (bexp "{a..c}{-,+}{1..2}") #"a-1 a-2 a+1 a+2 b-1 b-2 b+1 b+2 c-1 c-2 c+1 c+2" - (sys:brace-expand "x{a..c}y{-,+}z{1..2}w") + (bexp "x{a..c}y{-,+}z{1..2}w") #"xay-z1w xay-z2w xay+z1w xay+z2w xby-z1w xby-z2w \ xby+z1w xby+z2w xcy-z1w xcy-z2w xcy+z1w xcy+z2w") (test - (sys:brace-expand "x{a,b,{1..3}}y") + (bexp "x{a,b,{1..3}}y") #"xay xby x1y x2y x3y") (mtest @@ -27535,6 +27535,184 @@ is returned. (break-str "abc,def.ghi" ":") -> nil .brev +.coNP Function @ bexp +.synb +.mets (bexp << string ) +.syne +.desc +The +.code bexp +function implements an operation which belongs to a family of algorithms +informally known as +.I "brace expansion" . +Brace expansion is associated with the generation of file +names in certain Unix shells such as GNU Bash. There isn't a single, +precisely-specified brace expansion algorithm. The +.code bexp +function implements a brace expansion which is broadly compatible +with other implementations, but has its own unique features. + +Brace expansion is so named because it scans its input looking +for certain syntax enclosed in braces. Brace expansion produces +a list of one or more strings based on the input +.metn string . + +The +.code bexp +function is the basis for brace expansion support in \*(TX's +.code glob* +function. Since +.code bexp +it is a purely string processing function which performs no file system access, +and which can have applications outside of file-name-generating contexts, it is +documented as a string processing function. + +The +.code bexp +function scans the input +.meta string +look for instances of brace expansion syntax. If no brace expansion +syntax occurs, then a list is returned which contains one element: +that element is a string identical to +.metn string . + +When a single brace expansion appears in +.metn string , +that string turns +into a list of strings, which contains as many elements as are +generated by the brace expansion. + +Brace expansion syntax takes on two forms. If one or more commas +occur in the braces, that is classic element expansion, denoting substitution +of successive comma-separated elements into the surrounding string. +If no commas occur, but the character sequence +.code .. +(dot dot) occurs, then the brace syntax potentially denotes +a range expansion. In order for the +.code .. +syntax to denote range expansion, it has to meet certain conditions. +If these conditions are not met, then the syntax loses its brace +expansion meaning and just denotes a literal character sequence. +Range expansion is inspired by the sequence expansion feature in +GNU Bash, but is somewhat more capable. + +For instance, the element expansion +.str x{a,b}y +denotes the list of strings +.codn "(\(dqxay\(dq \(dqxby\(dq)" . +The there are two elements in the list because the braces contain +two elements. The first string replaces +.str {a,b} +with +.str a +and the second replaces it with +.strn b . + +When multiple braces occur in a string, then all combinations +(Cartesian product) of the braces is produced. + +Element expansion may also nest. When the element of a brace expansion itself +uses braces, then that element is subject to brace expansion. The elements +which emerge then become items of the enclosing brace, as if they were +comma-separated elements. +For instance +.str x{a,{b,c}y}z +is equivalent to +.str x{a,by,cy}z +which then expands to the three strings +.strn xaz , +.str xbyz +and +.strn xcyz . +Range expansions may be nested within element expansions; i.e. +appear as an element. The converse is not true; range expansions +do not contain nested brace syntax. For example +.str x{a,{1..3},b}y +produces the same expansion as +.strn x{a,1,2,3,b}y + +Range expansions have two syntactic forms and, independently of that, +two semantic forms. The two syntactic forms are +.mono +.mets {from..to} +.onom +and +.codn {from..to..step} . +If there are more than three elements separated by +.code .. +then the syntax isn't considered brace expansion. +If +.code step +is present, it must be a sequence of decimal digits denoting +a positive integer. This value determines the increment step +through he sequence. For instance if the value is +.code 2 +then every other element is taken. This works similarly to +skip range feature described under the +.code iter-begin +function. + +If both +.code from +and +.code to +are sequences of decimal digits, they denote an integer +range, providing an expansion of integers starting from the value +.code from +and ending with the value +.codn to . +The decimal integers expanded by integer range expansion each +have a minimum number of digits, which is met with the help of +leading zeros, if necessary. The minimum number of digits +is the smaller of the two lengths of +.code from +and +.codn to . +For instance, if +.code from +is three digits wide, and +.code to +is four digits wide, then the generated decimal integers will +be three digits wide. Thus, the range expansion +.code {01..999} +will produce the decimal strings +.codn 01 , +.codn 02 , +\&..., +.codn 99 , +.codn 100 , +\&..., +.code 998 +and +.codn 999 , +since the minimum number digits is 2, the smaller of the +number of digits in +.code 01 +and +.codn 999 . + +If either +.code from +or +.code to +does not consist entirely of decimal digits, then the range +expansion denotes a string range. This is the case only if +.code from +and +.code to +are of equal length, otherwise the syntax is not considered +brace expansion. A brace expansion string range generates +elements in exactly the same way as the string range described under the +.code iter-begin +function. + +Braces, commas and the +.code .. +(dot dot) sequence may be escaped with backslashes to disable their special +meaning. Likewise, the commas may be escaped by a backslash to preserve their +special meaning. Brace expansion preserves these backslashes; they appear in +the output. + .SS* Lazy Strings Lazy strings are objects that were developed for the \*(TX pattern-matching language, and are exposed via \*(TL. Lazy strings behave much like strings, @@ -78223,7 +78401,12 @@ supports brace expansion. Brace expansion is enabled by default in .code glob* and can be disabled using the .code glob-xnobrace -flag. Brace expansion is described in detail below. +flag. The brace expansion supported by +.code glob* +is that provided by the +.code bexp +function, and is described in detail in the documentation of +that function. Lastly, the .code glob* @@ -78326,135 +78509,11 @@ function in the platform's C library. The brace expansion supported by .code glob* is a string generation mechanism driven by a syntax which specifies -comma-separated elements enclosed in braces. - -When a single brace expansion appears in a pattern, that pattern turns -into a list of patterns, which contains as many elements as are -generated by the brace expansion syntax. - -Brace expansion syntax comes in two forms. If one or more commas -occur in the braces, that is classic element expansion, denoting substitution -of successive comma-separated elements into the surrounding pattern. -If no commas occur, but the character sequence -.code .. -(dot dot) occurs, then the brace syntax potentially denotes -a range expansion. In order for the -.code .. -syntax to denote range expansion, it has to meet certain conditions. -If these conditions are not met, then the syntax loses its brace -expansion meaning and just denotes a literal character sequence. -Range expansion is inspired by the sequence expansion feature in -GNU Bash, but is somewhat more capable. - -For instance, the element expansion -.str x{a,b}y -denotes the list of strings -.codn "(\(dqxay\(dq \(dqxby\(dq)" . -The there are two elements in the list because the braces contain -two elements. The first string replaces -.str {a,b} -with -.str a -and the second replaces it with -.strn b . - -When multiple braces occur in a pattern, then all combinations -(Cartesian product) of the braces is produced. - -Element expansion may also nest. When the element of a brace expansion itself -uses braces, then that element is subject to brace expansion. The elements -which emerge then become items of the enclosing brace, as if they were -comma-separated elements. -For instance -.str x{a,{b,c}y}z -is equivalent to -.str x{a,by,cy}z -which then expands to the three strings -.strn xaz , -.str xbyz -and -.strn xcyz . -Range expansions may be nested within element expansions; i.e. -appear as an element. The converse is not true; range expansions -do not contain nested brace syntax. For example -.str x{a,{1..3},b}y -produces the same expansion as -.strn x{a,1,2,3,b}y - -Range expansions have two syntactic forms and, independently of that, -two semantic forms. The two syntactic forms are -.mono -.mets {from..to} -.onom -and -.codn {from..to..step} . -If there are more than three elements separated by -.code .. -then the syntax isn't considered brace expansion. -If -.code step -is present, it must be a sequence of decimal digits denoting -a positive integer. This value determines the increment step -through he sequence. For instance if the value is -.code 2 -then every other element is taken. This works similarly to -skip range feature described under the -.code iter-begin -function. - -If both -.code from -and -.code to -are sequences of decimal digits, they denote an integer -range, providing an expansion of integers starting from the value -.code from -and ending with the value -.codn to . -The decimal integers expanded by integer range expansion each -have a minimum number of digits, which is met with the help of -leading zeros, if necessary. The minimum number of digits -is the smaller of the two lengths of -.code from -and -.codn to . -For instance, if -.code from -is three digits wide, and -.code to -is four digits wide, then the generated decimal integers will -be three digits wide. Thus, the range expansion -.code {01..999} -will produce the decimal strings -.codn 01 , -.codn 02 , -\&..., -.codn 99 , -.codn 100 , -\&..., -.code 998 -and -.codn 999 , -since the minimum number digits is 2, the smaller of the -number of digits in -.code 01 -and -.codn 999 . - -If either -.code from -or -.code to -does not consist entirely of decimal digits, then the range -expansion denotes a string range. This is the case only if -.code from -and -.code to -are of equal length, otherwise the syntax is not considered -brace expansion. A brace expansion string range generates -elements in exactly the same way as the string range described under the -.code iter-begin -function. +comma-separated elements enclosed in braces. This is described +in the documentation of the +.code bexp +function, which provides access to brace expansion independently of +.codn glob* . Braces may be escaped by a backslash to disable their special meaning. Likewise, the commas may be escaped by a backslash to preserve their special |