summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-03-08 08:33:57 -0800
committerKaz Kylheku <kaz@kylheku.com>2025-03-08 08:33:57 -0800
commit365ce154f09ee833ab208ea7cf50b25e5705b4ce (patch)
tree92b2b483358d54a87515271b1443879ad6b3c81e
parentd92391abf99df1f6fccbb93b5879daf888e906ce (diff)
downloadtxr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.tar.gz
txr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.tar.bz2
txr-365ce154f09ee833ab208ea7cf50b25e5705b4ce.zip
glob*: add string and integer ranges to brace expansion.
* stdlb/glob.tl (bexp-parse): Recognize .. as a token. (bexp-parse-brace): If a brace expansion doesn't contain commas, then check whether it contains .. and that its elements are all strings. In that case it is a possible range expansion and we thus transform it to a (- ...) node, subject to more validation in bexp-expand. (bexp-expand): Add casees to handle range expansion, taking care that invalid forms translate to verbatim syntax. * tests/018/glob.tl: New tests. * txr.1: Documented.
-rw-r--r--stdlib/glob.tl56
-rw-r--r--tests/018/glob.tl77
-rw-r--r--txr.1115
3 files changed, 229 insertions, 19 deletions
diff --git a/stdlib/glob.tl b/stdlib/glob.tl
index ae6d9322..4b132107 100644
--- a/stdlib/glob.tl
+++ b/stdlib/glob.tl
@@ -35,7 +35,7 @@
(defun bexp-parse (str)
(let ((ctx (new bexp-parse-ctx
str str
- toks (remqual "" (tok #/([{},]|{}|\\\\|\\.)/ t str)))))
+ toks (remqual "" (tok #/([{},]|{}|\.\.|\\\\|\\.)/ t str)))))
(build
(whilet ((next (pop ctx.toks)))
(add
@@ -51,15 +51,21 @@
("}" (return :ok))
(t (add next))))
(:ok
- (cond
- ((memqual "," (get))
- (flow (get)
- (split* @1 (op where (op equal ",")))
- (cons '/)))
- (t
- (add* "{")
- (add "}")
- (get))))
+ (let ((toks (get)))
+ (cond
+ ((memqual "," toks)
+ (flow toks
+ (split* @1 (op where (op equal ",")))
+ (cons '/)))
+ ((and (memqual ".." toks)
+ [all toks stringp])
+ (flow toks
+ (split* @1 (op where (op equal "..")))
+ (cons '-)))
+ (t
+ (add* "{")
+ (add "}")
+ (get)))))
(nil
(add* "{")
(get)))))
@@ -73,13 +79,41 @@
(each ((elem alt))
path.(oust saved-path)
(pend (bexp-expand (cons elem rest) path)))))
+ (@(or ((- @from @to) . @rest)
+ ((- @from @to (`@{skip #/\d*[1-9]\d*/}`)) . @rest))
+ (let ((saved-path path.(get))
+ (fj (join from))
+ (tj (join to))
+ (sk (if skip (toint skip) 1)))
+ (cond
+ ((and (plusp (len fj))
+ (plusp (len tj))
+ [all fj chr-isdigit]
+ [all tj chr-isdigit])
+ (let ((fn (toint fj))
+ (tn (toint tj))
+ (wid (min (len fj) (len tj))))
+ (if (<= fn tn)
+ (inc tn)
+ (inc fn))
+ (each ((elem fn..tn..sk))
+ path.(oust saved-path)
+ (pend (bexp-expand (cons (fmt "~,0*d" wid elem) rest) path)))))
+ ((eql (len fj) (len tj))
+ (each ((elem fj..tj..sk))
+ path.(oust saved-path)
+ (pend (bexp-expand (cons elem rest) path))))
+ (t path.(add `{@fj..@tj@(if skip `..@skip`)}`)
+ (pend (bexp-expand rest path))))))
+ (((- . @elem) . @rest)
+ path.(add `{@(join-with ".." [mapcar join elem])}`)
+ (pend (bexp-expand rest path)))
((@(consp @succ) . @rest)
(pend (bexp-expand (append succ rest) path)))
((@head . @rest)
path.(add head)
(pend (bexp-expand rest path))))))
-
(defun glob* (pattern-or-patterns : (flags 0))
(let ((xflags (logior flags sys:glob-xstar))
(patterns (if (listp pattern-or-patterns)
diff --git a/tests/018/glob.tl b/tests/018/glob.tl
index 438e4c65..e08179f4 100644
--- a/tests/018/glob.tl
+++ b/tests/018/glob.tl
@@ -10,7 +10,7 @@
~/Pictures/*.jpg ~/Pictures/*.gif ~/Pictures/*.png"
(sys:brace-expand "It{{em,alic}iz,erat}e{d,}, please.")
("Itemized, please." "Itemize, please." "Italicized, please."
- "Italicize, please." "Iterated, please." "Iterate, please.")
+ "Italicize, please." "Iterated, please." "Iterate, please.")
(sys:brace-expand "{,{,gotta have{ ,\\, again\\, }}more }cowbell!")
("cowbell!" "more cowbell!" "gotta have more cowbell!"
"gotta have\\, again\\, more cowbell!")
@@ -19,6 +19,81 @@
"{}} some }{,{\\\\ edge \\,}{ cases, {here} \\\\\\\\\\}"))
(mtest
+ (sys:brace-expand "{..}")
+ ("")
+ (sys:brace-expand "{..a}")
+ ("{..a}")
+ (sys:brace-expand "{a..}")
+ ("{a..}")
+ (sys:brace-expand "{a..b}")
+ #"a b"
+ (sys:brace-expand "{aa..cc}")
+ #"aa ab ac ba bb bc ca cb cc"
+ (sys:brace-expand "x{aa..cc}")
+ #"xaa xab xac xba xbb xbc xca xcb xcc"
+ (sys:brace-expand "{aa..cc}y")
+ #"aay aby acy bay bby bcy cay cby ccy"
+ (sys:brace-expand "x{aa..cc}y")
+ #"xaay xaby xacy xbay xbby xbcy xcay xcby xccy"
+ (sys:brace-expand "x{aa..ccc}y")
+ #"x{aa..ccc}y"
+ (sys:brace-expand "x{a..b..c}y")
+ #"x{a..b..c}y"
+ (sys:brace-expand "x{a..b..0}y")
+ #"x{a..b..0}y"
+ (sys:brace-expand "x{a..b..1}y")
+ #"xay xby"
+ (sys:brace-expand "x{aa..cc..2}y")
+ #"xaay xacy xbby xcay xccy"
+ (sys:brace-expand "x{aa..cc..2..}y")
+ #"x{aa..cc..2..}y"
+ (sys:brace-expand "x{aa..cc..{2,3}..}y")
+ #"x{aa..cc..2..}y x{aa..cc..3..}y")
+
+(mtest
+ (sys:brace-expand "{0..0}")
+ #"0"
+ (sys:brace-expand "{0..1}")
+ #"0 1"
+ (sys:brace-expand "{9..11}")
+ #"9 10 11"
+ (sys:brace-expand "{9..11..2}")
+ #"9 11"
+ (sys:brace-expand "{9..11..3}")
+ #"9"
+ (sys:brace-expand "{09..11}")
+ #"09 10 11"
+ (sys:brace-expand "{997..1001}")
+ #"997 998 999 1000 1001"
+ (sys:brace-expand "{0997..1001}")
+ #"0997 0998 0999 1000 1001"
+ (sys:brace-expand "{1..0}")
+ #"1 0"
+ (sys:brace-expand "{11..9}")
+ #"11 10 9"
+ (sys:brace-expand "{11..9..2}")
+ #"11 9"
+ (sys:brace-expand "{11..9..3}")
+ #"11"
+ (sys:brace-expand "{11..09}")
+ #"11 10 09"
+ (sys:brace-expand "{1001..997}")
+ #"1001 1000 999 998 997"
+ (sys:brace-expand "{1001..0997}")
+ #"1001 1000 0999 0998 0997")
+
+(mtest
+ (sys:brace-expand "{a..c}{-,+}{1..2}")
+ #"a-1 a-2 a+1 a+2 b-1 b-2 b+1 b+2 c-1 c-2 c+1 c+2"
+ (sys:brace-expand "x{a..c}y{-,+}z{1..2}w")
+ #"xay-z1w xay-z2w xay+z1w xay+z2w xby-z1w xby-z2w \
+ xby+z1w xby+z2w xcy-z1w xcy-z2w xcy+z1w xcy+z2w")
+
+(test
+ (sys:brace-expand "x{a,b,{1..3}}y")
+ #"xay xby x1y x2y x3y")
+
+(mtest
(glob* "tests/**/002") ("tests/002")
(glob* "tests/**/{003,004}") ("tests/003" "tests/004"))
diff --git a/txr.1 b/txr.1
index 48c9002e..753b3600 100644
--- a/txr.1
+++ b/txr.1
@@ -78317,17 +78317,36 @@ function does not rely on
.code glob
for brace expansion, even if it is available.
+The platform-specific brace expansion that may be available in
+.code glob
+is not described in this manual; refer to the documentation for the
+.code glob
+function in the platform's C library.
+
The brace expansion supported by
.code glob*
is a string generation mechanism driven by a syntax which specifies
comma-separated elements enclosed in braces.
When a single brace expansion appears in a pattern, that pattern turns
-into a list of patterns. There are as many elements in the list
-as there are elements between the braces. Each element replaces the
-braces with a different element from between the braces.
+into a list of patterns, which contains as many elements as are
+generated by the brace expansion syntax.
-For instance,
+Brace expansion syntax comes in two forms. If one or more commas
+occur in the braces, that is classic element expansion, denoting substitution
+of successive comma-separated elements into the surrounding pattern.
+If no commas occur, but the character sequence
+.code ..
+(dot dot) occurs, then the brace syntax potentially denotes
+a range expansion. In order for the
+.code ..
+syntax to denote range expansion, it has to meet certain conditions.
+If these conditions are not met, then the syntax loses its brace
+expansion meaning and just denotes a literal character sequence.
+Range expansion is inspired by the sequence expansion feature in
+GNU Bash, but is somewhat more capable.
+
+For instance, the element expansion
.str x{a,b}y
denotes the list of strings
.codn "(\(dqxay\(dq \(dqxby\(dq)" .
@@ -78342,9 +78361,10 @@ and the second replaces it with
When multiple braces occur in a pattern, then all combinations
(Cartesian product) of the braces is produced.
-Braces may also nest. When the element of a brace itself uses braces, then that
-element is subject to brace expansion. The elements which emerge then become
-items of the enclosing brace, as if they were comma-separated elements.
+Element expansion may also nest. When the element of a brace expansion itself
+uses braces, then that element is subject to brace expansion. The elements
+which emerge then become items of the enclosing brace, as if they were
+comma-separated elements.
For instance
.str x{a,{b,c}y}z
is equivalent to
@@ -78354,6 +78374,87 @@ which then expands to the three strings
.str xbyz
and
.strn xcyz .
+Range expansions may be nested within element expansions; i.e.
+appear as an element. The converse is not true; range expansions
+do not contain nested brace syntax. For example
+.str x{a,{1..3},b}y
+produces the same expansion as
+.strn x{a,1,2,3,b}y
+
+Range expansions have two syntactic forms and, independently of that,
+two semantic forms. The two syntactic forms are
+.mono
+.mets {from..to}
+.onom
+and
+.codn {from..to..step} .
+If there are more than three elements separated by
+.code ..
+then the syntax isn't considered brace expansion.
+If
+.code step
+is present, it must be a sequence of decimal digits denoting
+a positive integer. This value determines the increment step
+through he sequence. For instance if the value is
+.code 2
+then every other element is taken. This works similarly to
+skip range feature described under the
+.code iter-begin
+function.
+
+If both
+.code from
+and
+.code to
+are sequences of decimal digits, they denote an integer
+range, providing an expansion of integers starting from the value
+.code from
+and ending with the value
+.codn to .
+The decimal integers expanded by integer range expansion each
+have a minimum number of digits, which is met with the help of
+leading zeros, if necessary. The minimum number of digits
+is the smaller of the two lengths of
+.code from
+and
+.codn to .
+For instance, if
+.code from
+is three digits wide, and
+.code to
+is four digits wide, then the generated decimal integers will
+be three digits wide. Thus, the range expansion
+.code {01..999}
+will produce the decimal strings
+.codn 01 ,
+.codn 02 ,
+\&...,
+.codn 99 ,
+.codn 100 ,
+\&...,
+.code 998
+and
+.codn 999 ,
+since the minimum number digits is 2, the smaller of the
+number of digits in
+.code 01
+and
+.codn 999 .
+
+If either
+.code from
+or
+.code to
+does not consist entirely of decimal digits, then the range
+expansion denotes a string range. This is the case only if
+.code from
+and
+.code to
+are of equal length, otherwise the syntax is not considered
+brace expansion. A brace expansion string range generates
+elements in exactly the same way as the string range described under the
+.code iter-begin
+function.
Braces may be escaped by a backslash to disable their special meaning.
Likewise, the commas may be escaped by a backslash to preserve their special