diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2025-03-21 20:37:25 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2025-03-21 20:37:25 -0700 |
commit | 8999e74125d1ca2f526d25f437848861cdc19024 (patch) | |
tree | b5ed8485658fa81321a12eb7bd2cfd40b2268fdb | |
parent | 7fcd263dbcf065646caf031b087f06b6a2517390 (diff) | |
download | txr-8999e74125d1ca2f526d25f437848861cdc19024.tar.gz txr-8999e74125d1ca2f526d25f437848861cdc19024.tar.bz2 txr-8999e74125d1ca2f526d25f437848861cdc19024.zip |
rand: use PRNG bits more economically for small moduli.
The rand function always calls rand32 at least one for
each call, even for small moduli that need only a few
pseudo-random bits. For instance when the modulus
is 2, the function requires only one pseudo-random
bit from the PRNG, yet it takes 32 and throws away 31.
With this commit, that changes. For moduli of 65536
or smaller, the bits are used more efficiently;
and for a modulus of 2, the function can satisfy
32 calls using the bits of a single rand32_t word:
one stepping of the WELL512a PRNG.
* rand.c (struct rand_state): New member, shift. Holds the
shift register for rand/random to take bits from, replenished
from rand32() when it runs out. The shift register detects
when it runs out of bits in a clever way, without any
additional variable. The register is regarded as being
33 bits wide, with a top bit that is always 1. When
the register is empty, a 32 bit word is taken from the
PRNG. The required random bits are taken from the word, and
it is then shifted to the right. (We take only power-of-two
amounts out of the shift register: 1, 2, 4, 8 or 16 bits).
Even the smallest shift produces enough room that the
33rd bit can be added to the word, into its shifted position.
After that, the shift register is considered to have enough
bits for a given modulus if its value is less than equal
to the mask. I.e if we were to take bits from it, we would
be including the unconditional signaling bit. At that
point we clobber the shift register with a new set of 32 bits
from the PRNG, take the random bits we need, shift it to
the right and add the signaling bit.
(opt_noshift): New static variable; indicates whether
we are in compatibility mode, requiring the shift register
optimization to be defeated.
(make_random_state): Initialize shift register to 0
in several places.
(random): Implement various small modulus cases. There are
specific cases for moduli that are exactly 65536, 256, 16, 4,
3 and 2. The in-between cases are handled by shifting the
bits in the same amounts as the next higher power of two from
this list of sizes: 16, 8, or 4 bits. For these cases, we
calculate the smallest Mersenne modulus which covers the bits
of the actual moduls and use that for rejecting potential
values, just as we do in the general large modulus case. For
instance if the modulus is 60 (range 0 to 59), that lands into
the 8 bit shift range: we pull 8 bits at a time from the shift
register. But the modulus 60 is covered by the six bit mask
63. We mask each 8 bit value with 63, and if it is in the
required range 0 to 59, we accept it, otherwise draw
another 8 bits.
(rand_compat_fixup): Initialize opt_noshift to 1 if
the requested compat version is 299 or less.
* tests/012/sort.tl: Fix one test case involving shuffled
data. The shufle function uses rand with small moduli,
so its behavior changes for the same PRNG sequence.
* tests/013/maze.expected: Likewise, the generated
pseudo-random maze in the maze test case is different now;
we must update to the new expected output.
* txr.1: Document that a value of 299 or less of the
compatibility -C option has an effect on rand.
-rw-r--r-- | rand.c | 150 | ||||
-rw-r--r-- | tests/012/sort.tl | 2 | ||||
-rw-r--r-- | tests/013/maze.expected | 118 | ||||
-rw-r--r-- | txr.1 | 16 |
4 files changed, 225 insertions, 61 deletions
@@ -63,12 +63,15 @@ typedef unsigned long rand32_t; struct rand_state { rand32_t state[16]; unsigned cur; + rand32_t shift; }; val random_state_s, random_state_var_s, random_warmup_s; struct cobj_class *random_state_cls; +static int opt_noshift; + static val random_state_clone(val rand_state) { return make_random_state(rand_state, nil); @@ -195,6 +198,7 @@ val make_random_state(val seed, val warmup) r->state[i] = c_unum(seed->v.vec[i], self); r->cur = c_num(seed->v.vec[i], self); + r->shift = 0; return rs; } else if (bufp(seed)) { ucnum len = c_unum(seed->b.len, self); @@ -246,6 +250,7 @@ val make_random_state(val seed, val warmup) r->state[i] = rand_tab[i]; r->cur = 0; + r->shift = 0; { uses_or2; @@ -411,7 +416,7 @@ val random(val state, val modulus) cnum m = c_num(modulus, self); if (m == 1) { return zero; - } else if (m > 1) { + } else if (m > 65536 || (opt_noshift && m > 1)) { unsigned bits = highest_bit(m - 1); #if CHAR_BIT * SIZEOF_PTR >= 64 ucnum rands_needed = (bits + 32 - 1) / 32; @@ -437,6 +442,146 @@ val random(val state, val modulus) continue; return num(out); } + } else if (m == 65536) { + rand32_t s = r->shift; + + if (s <= 65535) { + s = rand32(r); + r->shift = s >> 16 | 0x00010000U; + } else { + r->shift = s >> 16; + } + + return num_fast(s & 65535); + } else if (m > 256) { + rand32_t u = r->shift; + rand32_t mask = 65535; + + while (mask >> 1 > convert(rand32_t, m)) + mask >>= 1; + + for (;;) { + rand32_t s = u; + if (s <= mask) { + s = rand32(r); + u = s >> 16 | 0x00010000U; + } else { + u = s >> 16; + } + if ((s & mask) < convert(rand32_t, m)) { + r->shift = u; + return num_fast(s & mask); + } + } + } else if (m == 256) { + rand32_t s = r->shift; + + if (s <= 255) { + s = rand32(r); + r->shift = s >> 8 | 0x01000000U; + } else { + r->shift = s >> 8; + } + + return num_fast(s & 255); + } else if (m > 16) { + rand32_t u = r->shift; + rand32_t mask = 255; + + while (mask >> 1 > convert(rand32_t, m)) + mask >>= 1; + + for (;;) { + rand32_t s = u; + if (s <= mask) { + s = rand32(r); + u = s >> 8 | 0x01000000U; + } else { + u = s >> 8; + } + if ((s & mask) < convert(rand32_t, m)) { + r->shift = u; + return num_fast(s & mask); + } + } + } else if (m == 16) { + rand32_t s = r->shift; + + if (s <= 15) { + s = rand32(r); + r->shift = s >> 4 | 0x10000000U; + } else { + r->shift = s >> 4; + } + + return num_fast(s & 15); + } else if (m > 4) { + rand32_t u = r->shift; + rand32_t mask = 15; + + while (mask >> 1 > convert(rand32_t, m)) + mask >>= 1; + + for (;;) { + rand32_t s = u; + if (s <= 15) { + s = rand32(r); + u = s >> 4 | 0x10000000U; + } else { + u = s >> 4; + } + if ((s & mask) < convert(rand32_t, m)) { + r->shift = u; + return num_fast(s & mask); + } + } + } else switch (m) { + case 4: + { + rand32_t s = r->shift; + + if (s <= 3) { + s = rand32(r); + r->shift = s >> 2 | 0x40000000U; + } else { + r->shift = s >> 2; + } + + return num_fast(s & 3); + } + case 3: + { + rand32_t u = r->shift; + + for (;;) { + rand32_t s = u; + if (s <= 3) { + s = rand32(r); + u = s >> 2 | 0x40000000U; + } else { + u = s >> 2; + } + if ((s & 3) < 3) { + r->shift = u; + return num_fast(s & 3); + } + } + } + case 2: + { + rand32_t s = r->shift; + + if (s <= 1) { + s = rand32(r); + r->shift = s >> 1 | 0x80000000U; + } else { + r->shift = s >> 1; + } + + return s & 1 ? one : zero; + } + default: + break; } } @@ -490,6 +635,9 @@ val random_buf(val size, val state) void rand_compat_fixup(int compat_ver) { + if (compat_ver <= 299) + opt_noshift = 1; + if (compat_ver <= 243) { loc l = lookup_var_l(nil, random_state_var_s); if (compat_ver <= 139) { diff --git a/tests/012/sort.tl b/tests/012/sort.tl index d08bce3a..8529160e 100644 --- a/tests/012/sort.tl +++ b/tests/012/sort.tl @@ -98,4 +98,4 @@ [hist-sort-by upcase-str '("a" "b" "c" "a" "b" "a" "b" "a")] (("A" . 4) ("B" . 3) ("C" . 1))) (let ((*random-state* (make-random-state 0))) - (test (shuffle 1..10) #(4 1 7 6 2 8 3 5 9))) + (test (shuffle 1..10) #(9 5 3 1 2 6 4 8 7))) diff --git a/tests/013/maze.expected b/tests/013/maze.expected index 8cf588e4..d44a5ff2 100644 --- a/tests/013/maze.expected +++ b/tests/013/maze.expected @@ -1,61 +1,61 @@ + +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ -| | | | | | | | -| | | | | | | | -+ +----+----+ + +----+ + +----+ +----+----+ +----+ +----+----+ +----+ + -| | | | | | | | | | | -| | | | | | | | | | | -+ + +----+----+----+ +----+----+ +----+ + +----+----+----+ +----+----+ + + -| | | | | | | | | | | -| | | | | | | | | | | -+ +----+ + +----+ + +----+----+----+----+ + + + +----+ + +----+ + -| | | | | | | | | | -| | | | | | | | | | -+----+----+----+----+ + +----+ +----+ +----+----+ + +----+ + +----+----+ + -| | | | | | | | | | | -| | | | | | | | | | | -+ + + +----+----+ + +----+ + +----+ + +----+ +----+ +----+----+ + -| | | | | | | | | | | | -| | | | | | | | | | | | -+----+ +----+ +----+----+ +----+----+ + +----+ +----+----+----+----+----+ +----+ -| | | | | | | | | -| | | | | | | | | -+ +----+ +----+ + + +----+ + +----+----+----+----+ +----+ +----+ +----+ -| | | | | | | | | | | | | -| | | | | | | | | | | | | -+ + +----+ + + +----+----+ +----+ + +----+ + + +----+----+----+ + -| | | | | | | | | | | | -| | | | | | | | | | | | -+ + +----+----+ + +----+----+----+ + + + +----+ +----+ + +----+ + -| | | | | | | | | | | | -| | | | | | | | | | | | -+ +----+----+----+ + +----+ +----+ + + + +----+----+ + + +----+----+ -| | | | | | | | | | | | | -| | | | | | | | | | | | | -+ + + + + +----+----+----+ + +----+ +----+ + +----+ +----+----+ + -| | | | | | | | | | | | -| | | | | | | | | | | | -+ +----+ +----+----+ + +----+----+ + + + +----+ + +----+ +----+ + -| | | | | | | | | | | | | | -| | | | | | | | | | | | | | -+----+ +----+ + + +----+----+ +----+ + + + + + + + + + + -| | | | | | | | | | | | | | | | | | -| | | | | | | | | | | | | | | | | | -+ +----+ +----+ +----+ + +----+ + + + + + + + + + +----+ -| | | | | | | | | | | | | | -| | | | | | | | | | | | | | -+ + +----+ +----+ +----+ + +----+----+----+----+ + +----+ + +----+ + -| | | | | | | | | | | | -| | | | | | | | | | | | -+ + + +----+ + + + + +----+----+----+----+----+----+ + +----+----+ + -| | | | | | | | | | -| | | | | | | | | | -+----+----+ + +----+----+ + +----+----+----+----+----+ + +----+----+ + +----+ -| | | | | | | | | | | -| | | | | | | | | | | -+ +----+ + +----+----+----+ + +----+ + + + +----+ + +----+----+ + -| | | | | | | | | | | | | -| | | | | | | | | | | | | -+ +----+----+----+ + + +----+ +----+----+ +----+ + +----+ + + + + -| | | | | | | -| | | | | | | +| | | | | | +| | | | | | ++ +----+----+----+----+ +----+----+ + +----+ + + + +----+----+ +----+ + +| | | | | | | | | | | | +| | | | | | | | | | | | ++----+ + + + +----+----+----+ + +----+ + + + + +----+----+ +----+ +| | | | | | | | | | | | | | | +| | | | | | | | | | | | | | | ++ + + + + +----+----+----+----+----+ + +----+ +----+----+ + + + + +| | | | | | | | | | | | | +| | | | | | | | | | | | | ++ +----+----+ +----+ + + +----+----+ + + +----+----+ +----+----+ + + +| | | | | | | | | | +| | | | | | | | | | ++ + +----+----+----+----+ +----+ + +----+ + +----+----+----+ + +----+ + +| | | | | | | | | | +| | | | | | | | | | ++ + + +----+----+----+----+ +----+----+----+ + + + +----+----+----+----+ + +| | | | | | | | | | +| | | | | | | | | | ++ + +----+----+----+----+ +----+ + +----+----+----+ +----+ + +----+ + + +| | | | | | | | | | | +| | | | | | | | | | | ++ + + +----+----+ +----+----+----+ +----+----+----+----+ +----+----+ + +----+ +| | | | | | | | | | | | +| | | | | | | | | | | | ++ +----+ + + +----+ + + +----+ + + +----+----+ + + +----+ + +| | | | | | | | | | | | +| | | | | | | | | | | | ++ + +----+ +----+ +----+----+ +----+ +----+----+----+----+ +----+----+ +----+ +| | | | | | | | | | | | +| | | | | | | | | | | | ++ +----+ + +----+ +----+ + + +----+ + + + + + +----+----+ + +| | | | | | | | | | | | | | +| | | | | | | | | | | | | | ++ +----+ + + +----+ +----+ + + +----+----+ + +----+----+ + + + +| | | | | | | | | | | | +| | | | | | | | | | | | ++ +----+----+----+ + + + +----+ + + +----+----+----+ + +----+----+ + +| | | | | | | | | | | | | +| | | | | | | | | | | | | ++ + +----+ + +----+----+----+ +----+----+----+ + +----+----+ + + + + +| | | | | | | | | | | | | +| | | | | | | | | | | | | ++ + + +----+----+ +----+ + +----+ +----+----+ + +----+ + + + + +| | | | | | | | | | | | | +| | | | | | | | | | | | | ++----+ + + +----+----+ + + + +----+ +----+----+----+ + +----+----+ + +| | | | | | | | | | +| | | | | | | | | | ++ +----+ +----+ +----+----+----+ +----+ +----+----+----+----+----+ +----+ +----+ +| | | | | | | | | | +| | | | | | | | | | ++ +----+ + +----+----+----+ +----+ +----+ + +----+----+ +----+ + + + +| | | | | | | | | | | +| | | | | | | | | | | ++----+ +----+----+----+----+ + +----+----+ +----+ +----+ +----+----+----+----+ + +| | | | +| | | | +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ + @@ -95902,6 +95902,22 @@ of these version values, the described behaviors are provided if is given an argument which is equal or lower. For instance .code "-C 103" selects the behaviors described below for version 105, but not those for 102. +.IP 299 +Starting with \*(TX 300, the +.code rand +function produces different values in some cases, due to being better +optimized for small moduli. Internally, the function obtains pseudo-random bits +from the PRNG in blocks of 32; i.e. 32 bit words. The previous +.code rand +implementation always obtained at least one new 32 bit word from the PRNG for +each call, even for small moduli. The new implementation of the function, +for moduli less than or equal to 65536, can satisfy multiple calls by taking +bits from a 32-bit shift register, maintained in the random state object. +The register only has to be refilled by a new 32 bit word from the PRNG +when its bits are exhausted. +A compatibility value of 299 or lower will disable this optimization, causing +.code rand +to return the original sequence of values, for a given random state. .IP 298 Until \*(TX 298, the .code lop |