summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2025-03-21 20:37:25 -0700
committerKaz Kylheku <kaz@kylheku.com>2025-03-21 20:37:25 -0700
commit8999e74125d1ca2f526d25f437848861cdc19024 (patch)
treeb5ed8485658fa81321a12eb7bd2cfd40b2268fdb
parent7fcd263dbcf065646caf031b087f06b6a2517390 (diff)
downloadtxr-8999e74125d1ca2f526d25f437848861cdc19024.tar.gz
txr-8999e74125d1ca2f526d25f437848861cdc19024.tar.bz2
txr-8999e74125d1ca2f526d25f437848861cdc19024.zip
rand: use PRNG bits more economically for small moduli.
The rand function always calls rand32 at least one for each call, even for small moduli that need only a few pseudo-random bits. For instance when the modulus is 2, the function requires only one pseudo-random bit from the PRNG, yet it takes 32 and throws away 31. With this commit, that changes. For moduli of 65536 or smaller, the bits are used more efficiently; and for a modulus of 2, the function can satisfy 32 calls using the bits of a single rand32_t word: one stepping of the WELL512a PRNG. * rand.c (struct rand_state): New member, shift. Holds the shift register for rand/random to take bits from, replenished from rand32() when it runs out. The shift register detects when it runs out of bits in a clever way, without any additional variable. The register is regarded as being 33 bits wide, with a top bit that is always 1. When the register is empty, a 32 bit word is taken from the PRNG. The required random bits are taken from the word, and it is then shifted to the right. (We take only power-of-two amounts out of the shift register: 1, 2, 4, 8 or 16 bits). Even the smallest shift produces enough room that the 33rd bit can be added to the word, into its shifted position. After that, the shift register is considered to have enough bits for a given modulus if its value is less than equal to the mask. I.e if we were to take bits from it, we would be including the unconditional signaling bit. At that point we clobber the shift register with a new set of 32 bits from the PRNG, take the random bits we need, shift it to the right and add the signaling bit. (opt_noshift): New static variable; indicates whether we are in compatibility mode, requiring the shift register optimization to be defeated. (make_random_state): Initialize shift register to 0 in several places. (random): Implement various small modulus cases. There are specific cases for moduli that are exactly 65536, 256, 16, 4, 3 and 2. The in-between cases are handled by shifting the bits in the same amounts as the next higher power of two from this list of sizes: 16, 8, or 4 bits. For these cases, we calculate the smallest Mersenne modulus which covers the bits of the actual moduls and use that for rejecting potential values, just as we do in the general large modulus case. For instance if the modulus is 60 (range 0 to 59), that lands into the 8 bit shift range: we pull 8 bits at a time from the shift register. But the modulus 60 is covered by the six bit mask 63. We mask each 8 bit value with 63, and if it is in the required range 0 to 59, we accept it, otherwise draw another 8 bits. (rand_compat_fixup): Initialize opt_noshift to 1 if the requested compat version is 299 or less. * tests/012/sort.tl: Fix one test case involving shuffled data. The shufle function uses rand with small moduli, so its behavior changes for the same PRNG sequence. * tests/013/maze.expected: Likewise, the generated pseudo-random maze in the maze test case is different now; we must update to the new expected output. * txr.1: Document that a value of 299 or less of the compatibility -C option has an effect on rand.
-rw-r--r--rand.c150
-rw-r--r--tests/012/sort.tl2
-rw-r--r--tests/013/maze.expected118
-rw-r--r--txr.116
4 files changed, 225 insertions, 61 deletions
diff --git a/rand.c b/rand.c
index 4d068826..4610d8f7 100644
--- a/rand.c
+++ b/rand.c
@@ -63,12 +63,15 @@ typedef unsigned long rand32_t;
struct rand_state {
rand32_t state[16];
unsigned cur;
+ rand32_t shift;
};
val random_state_s, random_state_var_s, random_warmup_s;
struct cobj_class *random_state_cls;
+static int opt_noshift;
+
static val random_state_clone(val rand_state)
{
return make_random_state(rand_state, nil);
@@ -195,6 +198,7 @@ val make_random_state(val seed, val warmup)
r->state[i] = c_unum(seed->v.vec[i], self);
r->cur = c_num(seed->v.vec[i], self);
+ r->shift = 0;
return rs;
} else if (bufp(seed)) {
ucnum len = c_unum(seed->b.len, self);
@@ -246,6 +250,7 @@ val make_random_state(val seed, val warmup)
r->state[i] = rand_tab[i];
r->cur = 0;
+ r->shift = 0;
{
uses_or2;
@@ -411,7 +416,7 @@ val random(val state, val modulus)
cnum m = c_num(modulus, self);
if (m == 1) {
return zero;
- } else if (m > 1) {
+ } else if (m > 65536 || (opt_noshift && m > 1)) {
unsigned bits = highest_bit(m - 1);
#if CHAR_BIT * SIZEOF_PTR >= 64
ucnum rands_needed = (bits + 32 - 1) / 32;
@@ -437,6 +442,146 @@ val random(val state, val modulus)
continue;
return num(out);
}
+ } else if (m == 65536) {
+ rand32_t s = r->shift;
+
+ if (s <= 65535) {
+ s = rand32(r);
+ r->shift = s >> 16 | 0x00010000U;
+ } else {
+ r->shift = s >> 16;
+ }
+
+ return num_fast(s & 65535);
+ } else if (m > 256) {
+ rand32_t u = r->shift;
+ rand32_t mask = 65535;
+
+ while (mask >> 1 > convert(rand32_t, m))
+ mask >>= 1;
+
+ for (;;) {
+ rand32_t s = u;
+ if (s <= mask) {
+ s = rand32(r);
+ u = s >> 16 | 0x00010000U;
+ } else {
+ u = s >> 16;
+ }
+ if ((s & mask) < convert(rand32_t, m)) {
+ r->shift = u;
+ return num_fast(s & mask);
+ }
+ }
+ } else if (m == 256) {
+ rand32_t s = r->shift;
+
+ if (s <= 255) {
+ s = rand32(r);
+ r->shift = s >> 8 | 0x01000000U;
+ } else {
+ r->shift = s >> 8;
+ }
+
+ return num_fast(s & 255);
+ } else if (m > 16) {
+ rand32_t u = r->shift;
+ rand32_t mask = 255;
+
+ while (mask >> 1 > convert(rand32_t, m))
+ mask >>= 1;
+
+ for (;;) {
+ rand32_t s = u;
+ if (s <= mask) {
+ s = rand32(r);
+ u = s >> 8 | 0x01000000U;
+ } else {
+ u = s >> 8;
+ }
+ if ((s & mask) < convert(rand32_t, m)) {
+ r->shift = u;
+ return num_fast(s & mask);
+ }
+ }
+ } else if (m == 16) {
+ rand32_t s = r->shift;
+
+ if (s <= 15) {
+ s = rand32(r);
+ r->shift = s >> 4 | 0x10000000U;
+ } else {
+ r->shift = s >> 4;
+ }
+
+ return num_fast(s & 15);
+ } else if (m > 4) {
+ rand32_t u = r->shift;
+ rand32_t mask = 15;
+
+ while (mask >> 1 > convert(rand32_t, m))
+ mask >>= 1;
+
+ for (;;) {
+ rand32_t s = u;
+ if (s <= 15) {
+ s = rand32(r);
+ u = s >> 4 | 0x10000000U;
+ } else {
+ u = s >> 4;
+ }
+ if ((s & mask) < convert(rand32_t, m)) {
+ r->shift = u;
+ return num_fast(s & mask);
+ }
+ }
+ } else switch (m) {
+ case 4:
+ {
+ rand32_t s = r->shift;
+
+ if (s <= 3) {
+ s = rand32(r);
+ r->shift = s >> 2 | 0x40000000U;
+ } else {
+ r->shift = s >> 2;
+ }
+
+ return num_fast(s & 3);
+ }
+ case 3:
+ {
+ rand32_t u = r->shift;
+
+ for (;;) {
+ rand32_t s = u;
+ if (s <= 3) {
+ s = rand32(r);
+ u = s >> 2 | 0x40000000U;
+ } else {
+ u = s >> 2;
+ }
+ if ((s & 3) < 3) {
+ r->shift = u;
+ return num_fast(s & 3);
+ }
+ }
+ }
+ case 2:
+ {
+ rand32_t s = r->shift;
+
+ if (s <= 1) {
+ s = rand32(r);
+ r->shift = s >> 1 | 0x80000000U;
+ } else {
+ r->shift = s >> 1;
+ }
+
+ return s & 1 ? one : zero;
+ }
+ default:
+ break;
}
}
@@ -490,6 +635,9 @@ val random_buf(val size, val state)
void rand_compat_fixup(int compat_ver)
{
+ if (compat_ver <= 299)
+ opt_noshift = 1;
+
if (compat_ver <= 243) {
loc l = lookup_var_l(nil, random_state_var_s);
if (compat_ver <= 139) {
diff --git a/tests/012/sort.tl b/tests/012/sort.tl
index d08bce3a..8529160e 100644
--- a/tests/012/sort.tl
+++ b/tests/012/sort.tl
@@ -98,4 +98,4 @@
[hist-sort-by upcase-str '("a" "b" "c" "a" "b" "a" "b" "a")] (("A" . 4) ("B" . 3) ("C" . 1)))
(let ((*random-state* (make-random-state 0)))
- (test (shuffle 1..10) #(4 1 7 6 2 8 3 5 9)))
+ (test (shuffle 1..10) #(9 5 3 1 2 6 4 8 7)))
diff --git a/tests/013/maze.expected b/tests/013/maze.expected
index 8cf588e4..d44a5ff2 100644
--- a/tests/013/maze.expected
+++ b/tests/013/maze.expected
@@ -1,61 +1,61 @@
+ +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
-| | | | | | | |
-| | | | | | | |
-+ +----+----+ + +----+ + +----+ +----+----+ +----+ +----+----+ +----+ +
-| | | | | | | | | | |
-| | | | | | | | | | |
-+ + +----+----+----+ +----+----+ +----+ + +----+----+----+ +----+----+ + +
-| | | | | | | | | | |
-| | | | | | | | | | |
-+ +----+ + +----+ + +----+----+----+----+ + + + +----+ + +----+ +
-| | | | | | | | | |
-| | | | | | | | | |
-+----+----+----+----+ + +----+ +----+ +----+----+ + +----+ + +----+----+ +
-| | | | | | | | | | |
-| | | | | | | | | | |
-+ + + +----+----+ + +----+ + +----+ + +----+ +----+ +----+----+ +
-| | | | | | | | | | | |
-| | | | | | | | | | | |
-+----+ +----+ +----+----+ +----+----+ + +----+ +----+----+----+----+----+ +----+
-| | | | | | | | |
-| | | | | | | | |
-+ +----+ +----+ + + +----+ + +----+----+----+----+ +----+ +----+ +----+
-| | | | | | | | | | | | |
-| | | | | | | | | | | | |
-+ + +----+ + + +----+----+ +----+ + +----+ + + +----+----+----+ +
-| | | | | | | | | | | |
-| | | | | | | | | | | |
-+ + +----+----+ + +----+----+----+ + + + +----+ +----+ + +----+ +
-| | | | | | | | | | | |
-| | | | | | | | | | | |
-+ +----+----+----+ + +----+ +----+ + + + +----+----+ + + +----+----+
-| | | | | | | | | | | | |
-| | | | | | | | | | | | |
-+ + + + + +----+----+----+ + +----+ +----+ + +----+ +----+----+ +
-| | | | | | | | | | | |
-| | | | | | | | | | | |
-+ +----+ +----+----+ + +----+----+ + + + +----+ + +----+ +----+ +
-| | | | | | | | | | | | | |
-| | | | | | | | | | | | | |
-+----+ +----+ + + +----+----+ +----+ + + + + + + + + + +
-| | | | | | | | | | | | | | | | | |
-| | | | | | | | | | | | | | | | | |
-+ +----+ +----+ +----+ + +----+ + + + + + + + + + +----+
-| | | | | | | | | | | | | |
-| | | | | | | | | | | | | |
-+ + +----+ +----+ +----+ + +----+----+----+----+ + +----+ + +----+ +
-| | | | | | | | | | | |
-| | | | | | | | | | | |
-+ + + +----+ + + + + +----+----+----+----+----+----+ + +----+----+ +
-| | | | | | | | | |
-| | | | | | | | | |
-+----+----+ + +----+----+ + +----+----+----+----+----+ + +----+----+ + +----+
-| | | | | | | | | | |
-| | | | | | | | | | |
-+ +----+ + +----+----+----+ + +----+ + + + +----+ + +----+----+ +
-| | | | | | | | | | | | |
-| | | | | | | | | | | | |
-+ +----+----+----+ + + +----+ +----+----+ +----+ + +----+ + + + +
-| | | | | | |
-| | | | | | |
+| | | | | |
+| | | | | |
++ +----+----+----+----+ +----+----+ + +----+ + + + +----+----+ +----+ +
+| | | | | | | | | | | |
+| | | | | | | | | | | |
++----+ + + + +----+----+----+ + +----+ + + + + +----+----+ +----+
+| | | | | | | | | | | | | | |
+| | | | | | | | | | | | | | |
++ + + + + +----+----+----+----+----+ + +----+ +----+----+ + + + +
+| | | | | | | | | | | | |
+| | | | | | | | | | | | |
++ +----+----+ +----+ + + +----+----+ + + +----+----+ +----+----+ + +
+| | | | | | | | | |
+| | | | | | | | | |
++ + +----+----+----+----+ +----+ + +----+ + +----+----+----+ + +----+ +
+| | | | | | | | | |
+| | | | | | | | | |
++ + + +----+----+----+----+ +----+----+----+ + + + +----+----+----+----+ +
+| | | | | | | | | |
+| | | | | | | | | |
++ + +----+----+----+----+ +----+ + +----+----+----+ +----+ + +----+ + +
+| | | | | | | | | | |
+| | | | | | | | | | |
++ + + +----+----+ +----+----+----+ +----+----+----+----+ +----+----+ + +----+
+| | | | | | | | | | | |
+| | | | | | | | | | | |
++ +----+ + + +----+ + + +----+ + + +----+----+ + + +----+ +
+| | | | | | | | | | | |
+| | | | | | | | | | | |
++ + +----+ +----+ +----+----+ +----+ +----+----+----+----+ +----+----+ +----+
+| | | | | | | | | | | |
+| | | | | | | | | | | |
++ +----+ + +----+ +----+ + + +----+ + + + + + +----+----+ +
+| | | | | | | | | | | | | |
+| | | | | | | | | | | | | |
++ +----+ + + +----+ +----+ + + +----+----+ + +----+----+ + + +
+| | | | | | | | | | | |
+| | | | | | | | | | | |
++ +----+----+----+ + + + +----+ + + +----+----+----+ + +----+----+ +
+| | | | | | | | | | | | |
+| | | | | | | | | | | | |
++ + +----+ + +----+----+----+ +----+----+----+ + +----+----+ + + + +
+| | | | | | | | | | | | |
+| | | | | | | | | | | | |
++ + + +----+----+ +----+ + +----+ +----+----+ + +----+ + + + +
+| | | | | | | | | | | | |
+| | | | | | | | | | | | |
++----+ + + +----+----+ + + + +----+ +----+----+----+ + +----+----+ +
+| | | | | | | | | |
+| | | | | | | | | |
++ +----+ +----+ +----+----+----+ +----+ +----+----+----+----+----+ +----+ +----+
+| | | | | | | | | |
+| | | | | | | | | |
++ +----+ + +----+----+----+ +----+ +----+ + +----+----+ +----+ + + +
+| | | | | | | | | | |
+| | | | | | | | | | |
++----+ +----+----+----+----+ + +----+----+ +----+ +----+ +----+----+----+----+ +
+| | | |
+| | | |
+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ +
diff --git a/txr.1 b/txr.1
index 599638bc..358c3ca8 100644
--- a/txr.1
+++ b/txr.1
@@ -95902,6 +95902,22 @@ of these version values, the described behaviors are provided if
is given an argument which is equal or lower. For instance
.code "-C 103"
selects the behaviors described below for version 105, but not those for 102.
+.IP 299
+Starting with \*(TX 300, the
+.code rand
+function produces different values in some cases, due to being better
+optimized for small moduli. Internally, the function obtains pseudo-random bits
+from the PRNG in blocks of 32; i.e. 32 bit words. The previous
+.code rand
+implementation always obtained at least one new 32 bit word from the PRNG for
+each call, even for small moduli. The new implementation of the function,
+for moduli less than or equal to 65536, can satisfy multiple calls by taking
+bits from a 32-bit shift register, maintained in the random state object.
+The register only has to be refilled by a new 32 bit word from the PRNG
+when its bits are exhausted.
+A compatibility value of 299 or lower will disable this optimization, causing
+.code rand
+to return the original sequence of values, for a given random state.
.IP 298
Until \*(TX 298, the
.code lop