diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-03-08 08:33:57 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-03-08 08:33:57 -0800 |
commit | 365ce154f09ee833ab208ea7cf50b25e5705b4ce (patch) | |
tree | 92b2b483358d54a87515271b1443879ad6b3c81e | |
parent | d92391abf99df1f6fccbb93b5879daf888e906ce (diff) | |
download | txr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.tar.gz txr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.tar.bz2 txr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.zip |
glob*: add string and integer ranges to brace expansion.
* stdlb/glob.tl (bexp-parse): Recognize .. as a token.
(bexp-parse-brace): If a brace expansion doesn't contain
commas, then check whether it contains .. and that its elements
are all strings. In that case it is a possible range expansion
and we thus transform it to a (- ...) node, subject to
more validation in bexp-expand.
(bexp-expand): Add casees to handle range expansion,
taking care that invalid forms translate to verbatim
syntax.
* tests/018/glob.tl: New tests.
* txr.1: Documented.
-rw-r--r-- | stdlib/glob.tl | 56 | ||||
-rw-r--r-- | tests/018/glob.tl | 77 | ||||
-rw-r--r-- | txr.1 | 115 |
3 files changed, 229 insertions, 19 deletions
diff --git a/stdlib/glob.tl b/stdlib/glob.tl index ae6d9322..4b132107 100644 --- a/stdlib/glob.tl +++ b/stdlib/glob.tl @@ -35,7 +35,7 @@ (defun bexp-parse (str) (let ((ctx (new bexp-parse-ctx str str - toks (remqual "" (tok #/([{},]|{}|\\\\|\\.)/ t str))))) + toks (remqual "" (tok #/([{},]|{}|\.\.|\\\\|\\.)/ t str))))) (build (whilet ((next (pop ctx.toks))) (add @@ -51,15 +51,21 @@ ("}" (return :ok)) (t (add next)))) (:ok - (cond - ((memqual "," (get)) - (flow (get) - (split* @1 (op where (op equal ","))) - (cons '/))) - (t - (add* "{") - (add "}") - (get)))) + (let ((toks (get))) + (cond + ((memqual "," toks) + (flow toks + (split* @1 (op where (op equal ","))) + (cons '/))) + ((and (memqual ".." toks) + [all toks stringp]) + (flow toks + (split* @1 (op where (op equal ".."))) + (cons '-))) + (t + (add* "{") + (add "}") + (get))))) (nil (add* "{") (get))))) @@ -73,13 +79,41 @@ (each ((elem alt)) path.(oust saved-path) (pend (bexp-expand (cons elem rest) path))))) + (@(or ((- @from @to) . @rest) + ((- @from @to (`@{skip #/\d*[1-9]\d*/}`)) . @rest)) + (let ((saved-path path.(get)) + (fj (join from)) + (tj (join to)) + (sk (if skip (toint skip) 1))) + (cond + ((and (plusp (len fj)) + (plusp (len tj)) + [all fj chr-isdigit] + [all tj chr-isdigit]) + (let ((fn (toint fj)) + (tn (toint tj)) + (wid (min (len fj) (len tj)))) + (if (<= fn tn) + (inc tn) + (inc fn)) + (each ((elem fn..tn..sk)) + path.(oust saved-path) + (pend (bexp-expand (cons (fmt "~,0*d" wid elem) rest) path))))) + ((eql (len fj) (len tj)) + (each ((elem fj..tj..sk)) + path.(oust saved-path) + (pend (bexp-expand (cons elem rest) path)))) + (t path.(add `{@fj..@tj@(if skip `..@skip`)}`) + (pend (bexp-expand rest path)))))) + (((- . @elem) . @rest) + path.(add `{@(join-with ".." [mapcar join elem])}`) + (pend (bexp-expand rest path))) ((@(consp @succ) . @rest) (pend (bexp-expand (append succ rest) path))) ((@head . @rest) path.(add head) (pend (bexp-expand rest path)))))) - (defun glob* (pattern-or-patterns : (flags 0)) (let ((xflags (logior flags sys:glob-xstar)) (patterns (if (listp pattern-or-patterns) diff --git a/tests/018/glob.tl b/tests/018/glob.tl index 438e4c65..e08179f4 100644 --- a/tests/018/glob.tl +++ b/tests/018/glob.tl @@ -10,7 +10,7 @@ ~/Pictures/*.jpg ~/Pictures/*.gif ~/Pictures/*.png" (sys:brace-expand "It{{em,alic}iz,erat}e{d,}, please.") ("Itemized, please." "Itemize, please." "Italicized, please." - "Italicize, please." "Iterated, please." "Iterate, please.") + "Italicize, please." "Iterated, please." "Iterate, please.") (sys:brace-expand "{,{,gotta have{ ,\\, again\\, }}more }cowbell!") ("cowbell!" "more cowbell!" "gotta have more cowbell!" "gotta have\\, again\\, more cowbell!") @@ -19,6 +19,81 @@ "{}} some }{,{\\\\ edge \\,}{ cases, {here} \\\\\\\\\\}")) (mtest + (sys:brace-expand "{..}") + ("") + (sys:brace-expand "{..a}") + ("{..a}") + (sys:brace-expand "{a..}") + ("{a..}") + (sys:brace-expand "{a..b}") + #"a b" + (sys:brace-expand "{aa..cc}") + #"aa ab ac ba bb bc ca cb cc" + (sys:brace-expand "x{aa..cc}") + #"xaa xab xac xba xbb xbc xca xcb xcc" + (sys:brace-expand "{aa..cc}y") + #"aay aby acy bay bby bcy cay cby ccy" + (sys:brace-expand "x{aa..cc}y") + #"xaay xaby xacy xbay xbby xbcy xcay xcby xccy" + (sys:brace-expand "x{aa..ccc}y") + #"x{aa..ccc}y" + (sys:brace-expand "x{a..b..c}y") + #"x{a..b..c}y" + (sys:brace-expand "x{a..b..0}y") + #"x{a..b..0}y" + (sys:brace-expand "x{a..b..1}y") + #"xay xby" + (sys:brace-expand "x{aa..cc..2}y") + #"xaay xacy xbby xcay xccy" + (sys:brace-expand "x{aa..cc..2..}y") + #"x{aa..cc..2..}y" + (sys:brace-expand "x{aa..cc..{2,3}..}y") + #"x{aa..cc..2..}y x{aa..cc..3..}y") + +(mtest + (sys:brace-expand "{0..0}") + #"0" + (sys:brace-expand "{0..1}") + #"0 1" + (sys:brace-expand "{9..11}") + #"9 10 11" + (sys:brace-expand "{9..11..2}") + #"9 11" + (sys:brace-expand "{9..11..3}") + #"9" + (sys:brace-expand "{09..11}") + #"09 10 11" + (sys:brace-expand "{997..1001}") + #"997 998 999 1000 1001" + (sys:brace-expand "{0997..1001}") + #"0997 0998 0999 1000 1001" + (sys:brace-expand "{1..0}") + #"1 0" + (sys:brace-expand "{11..9}") + #"11 10 9" + (sys:brace-expand "{11..9..2}") + #"11 9" + (sys:brace-expand "{11..9..3}") + #"11" + (sys:brace-expand "{11..09}") + #"11 10 09" + (sys:brace-expand "{1001..997}") + #"1001 1000 999 998 997" + (sys:brace-expand "{1001..0997}") + #"1001 1000 0999 0998 0997") + +(mtest + (sys:brace-expand "{a..c}{-,+}{1..2}") + #"a-1 a-2 a+1 a+2 b-1 b-2 b+1 b+2 c-1 c-2 c+1 c+2" + (sys:brace-expand "x{a..c}y{-,+}z{1..2}w") + #"xay-z1w xay-z2w xay+z1w xay+z2w xby-z1w xby-z2w \ + xby+z1w xby+z2w xcy-z1w xcy-z2w xcy+z1w xcy+z2w") + +(test + (sys:brace-expand "x{a,b,{1..3}}y") + #"xay xby x1y x2y x3y") + +(mtest (glob* "tests/**/002") ("tests/002") (glob* "tests/**/{003,004}") ("tests/003" "tests/004")) @@ -78317,17 +78317,36 @@ function does not rely on .code glob for brace expansion, even if it is available. +The platform-specific brace expansion that may be available in +.code glob +is not described in this manual; refer to the documentation for the +.code glob +function in the platform's C library. + The brace expansion supported by .code glob* is a string generation mechanism driven by a syntax which specifies comma-separated elements enclosed in braces. When a single brace expansion appears in a pattern, that pattern turns -into a list of patterns. There are as many elements in the list -as there are elements between the braces. Each element replaces the -braces with a different element from between the braces. +into a list of patterns, which contains as many elements as are +generated by the brace expansion syntax. -For instance, +Brace expansion syntax comes in two forms. If one or more commas +occur in the braces, that is classic element expansion, denoting substitution +of successive comma-separated elements into the surrounding pattern. +If no commas occur, but the character sequence +.code .. +(dot dot) occurs, then the brace syntax potentially denotes +a range expansion. In order for the +.code .. +syntax to denote range expansion, it has to meet certain conditions. +If these conditions are not met, then the syntax loses its brace +expansion meaning and just denotes a literal character sequence. +Range expansion is inspired by the sequence expansion feature in +GNU Bash, but is somewhat more capable. + +For instance, the element expansion .str x{a,b}y denotes the list of strings .codn "(\(dqxay\(dq \(dqxby\(dq)" . @@ -78342,9 +78361,10 @@ and the second replaces it with When multiple braces occur in a pattern, then all combinations (Cartesian product) of the braces is produced. -Braces may also nest. When the element of a brace itself uses braces, then that -element is subject to brace expansion. The elements which emerge then become -items of the enclosing brace, as if they were comma-separated elements. +Element expansion may also nest. When the element of a brace expansion itself +uses braces, then that element is subject to brace expansion. The elements +which emerge then become items of the enclosing brace, as if they were +comma-separated elements. For instance .str x{a,{b,c}y}z is equivalent to @@ -78354,6 +78374,87 @@ which then expands to the three strings .str xbyz and .strn xcyz . +Range expansions may be nested within element expansions; i.e. +appear as an element. The converse is not true; range expansions +do not contain nested brace syntax. For example +.str x{a,{1..3},b}y +produces the same expansion as +.strn x{a,1,2,3,b}y + +Range expansions have two syntactic forms and, independently of that, +two semantic forms. The two syntactic forms are +.mono +.mets {from..to} +.onom +and +.codn {from..to..step} . +If there are more than three elements separated by +.code .. +then the syntax isn't considered brace expansion. +If +.code step +is present, it must be a sequence of decimal digits denoting +a positive integer. This value determines the increment step +through he sequence. For instance if the value is +.code 2 +then every other element is taken. This works similarly to +skip range feature described under the +.code iter-begin +function. + +If both +.code from +and +.code to +are sequences of decimal digits, they denote an integer +range, providing an expansion of integers starting from the value +.code from +and ending with the value +.codn to . +The decimal integers expanded by integer range expansion each +have a minimum number of digits, which is met with the help of +leading zeros, if necessary. The minimum number of digits +is the smaller of the two lengths of +.code from +and +.codn to . +For instance, if +.code from +is three digits wide, and +.code to +is four digits wide, then the generated decimal integers will +be three digits wide. Thus, the range expansion +.code {01..999} +will produce the decimal strings +.codn 01 , +.codn 02 , +\&..., +.codn 99 , +.codn 100 , +\&..., +.code 998 +and +.codn 999 , +since the minimum number digits is 2, the smaller of the +number of digits in +.code 01 +and +.codn 999 . + +If either +.code from +or +.code to +does not consist entirely of decimal digits, then the range +expansion denotes a string range. This is the case only if +.code from +and +.code to +are of equal length, otherwise the syntax is not considered +brace expansion. A brace expansion string range generates +elements in exactly the same way as the string range described under the +.code iter-begin +function. Braces may be escaped by a backslash to disable their special meaning. Likewise, the commas may be escaped by a backslash to preserve their special |