From e7d52c9b22f569c4c02d59c4c958f7750af2e264 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Fri, 9 May 2025 21:22:56 -0700 Subject: buf-count-ones: optimize for common cases. * buf.c (buf_count_ones): Short-circuit loop iteration if word or byte is zero. If the word/byte is a power of two (1 bit set) or if it has all bits set, then skip the bit trick. --- buf.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/buf.c b/buf.c index 906a800a..daf00202 100644 --- a/buf.c +++ b/buf.c @@ -1770,22 +1770,31 @@ val buf_count_ones(val buf) for (i = 0; i < l / sizeof (ucnum); i++) { ucnum d = ucdata[i]; + + if (d == 0) { + continue; + } else if (d == convert(ucnum, -1)) { + d = sizeof(ucnum) * 8; + } else if ((d & (d - 1)) == 0) { + d = 1; + } else { #if SIZEOF_PTR == 8 - d = ((d & 0xAAAAAAAAAAAAAAAA) >> 1) + (d & 0x5555555555555555); - d = ((d & 0xCCCCCCCCCCCCCCCC) >> 2) + (d & 0x3333333333333333); - d = ((d & 0xF0F0F0F0F0F0F0F0) >> 4) + (d & 0x0F0F0F0F0F0F0F0F); - d = ((d & 0xFF00FF00FF00FF00) >> 8) + (d & 0x00FF00FF00FF00FF); - d = ((d & 0xFFFF0000FFFF0000) >> 16) + (d & 0x0000FFFF0000FFFF); - d = ((d & 0xFFFFFFFF00000000) >> 32) + (d & 0x00000000FFFFFFFF); + d = ((d & 0xAAAAAAAAAAAAAAAA) >> 1) + (d & 0x5555555555555555); + d = ((d & 0xCCCCCCCCCCCCCCCC) >> 2) + (d & 0x3333333333333333); + d = ((d & 0xF0F0F0F0F0F0F0F0) >> 4) + (d & 0x0F0F0F0F0F0F0F0F); + d = ((d & 0xFF00FF00FF00FF00) >> 8) + (d & 0x00FF00FF00FF00FF); + d = ((d & 0xFFFF0000FFFF0000) >> 16) + (d & 0x0000FFFF0000FFFF); + d = ((d & 0xFFFFFFFF00000000) >> 32) + (d & 0x00000000FFFFFFFF); #elif SIZEOF_PTR == 4 - d = ((d & 0xAAAAAAAA) >> 1) + (d & 0x55555555); - d = ((d & 0xCCCCCCCC) >> 2) + (d & 0x33333333); - d = ((d & 0xF0F0F0F0) >> 4) + (d & 0x0F0F0F0F); - d = ((d & 0xFF00FF00) >> 8) + (d & 0x00FF00FF); - d = ((d & 0xFFFF0000) >> 16) + (d & 0x0000FFFF); + d = ((d & 0xAAAAAAAA) >> 1) + (d & 0x55555555); + d = ((d & 0xCCCCCCCC) >> 2) + (d & 0x33333333); + d = ((d & 0xF0F0F0F0) >> 4) + (d & 0x0F0F0F0F); + d = ((d & 0xFF00FF00) >> 8) + (d & 0x00FF00FF); + d = ((d & 0xFFFF0000) >> 16) + (d & 0x0000FFFF); #else #error portme #endif + } total[1] += d; if (total[1] < d) total[0]++; @@ -1794,9 +1803,17 @@ val buf_count_ones(val buf) for (i = l / sizeof (ucnum) * sizeof (ucnum); i < l; i++) { unsigned d = b->data[i]; - d = ((d & 0xAA) >> 1) + (d & 0x55); - d = ((d & 0xCC) >> 2) + (d & 0x33); - d = ((d & 0xF0) >> 4) + (d & 0x0F); + if (!d) { + continue; + } else if (d == 255) { + d = 8; + } else if ((d & (d - 1)) == 0) { + d = 1; + } else { + d = ((d & 0xAA) >> 1) + (d & 0x55); + d = ((d & 0xCC) >> 2) + (d & 0x33); + d = ((d & 0xF0) >> 4) + (d & 0x0F); + } total[1] += d; if (total[1] < d) -- cgit v1.2.3