From 150cfb92b3c4ff72eb9e4e68014d7b70a6b3d30f Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 6 Oct 2022 20:43:58 -0700 Subject: strings: take advantage of malloc_usable_size On platforms which have the malloc_usable_size function, we don't have to store the allocated size of an object; malloc provides us the allocated size (which may be larger than we requested). Here we take advantage of this for strings. And since we don't have to store the string allocated size any more, we use that field for something else: storing the hash code (for seed zero). This can speed up some hashing operations. * configure (have_malloc_usable_size): New variable. Configure test for have_malloc_usable size. We have to try several header files, too. We set the configure variable HAVE_MALLOC_USABLE_SIZE, and possibly HAVE_MALLOC_H or HAVE_MALLOC_NP_H. * lib.h (struct string): If HAVE_MALLOC_USABLE_SIZE is true, we define a member called hash insetad of alloc. Also, we change alloc to cnum. * lib.c: Include if HAVE_MALLOC_NP_H is defined. (string_own, string, string_utf8, mkstring, mkustring, init_str, string_extend, string_finish, string_set_code, string_get_code, length_str, replace_str, chr_str_set): Fix code for both cases. On platforms with malloc_usable_size, we have the allocated size from malloc, so we don't have to retrieve it from the object or store it. Any operations which mutate the string must reset the hash field to zero; zero means "hash has not been calculated". * hash.c (equal_hash): Just retrive a string's hash value, if it is nonzero, otherwise calculate, cache it and return it. * gc.c (mark_obj): The alloc member of struct string is a machine integer now; no need to mark it. --- configure | 29 +++++++++++++++++++++++ gc.c | 3 +-- hash.c | 7 ++++++ lib.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------- lib.h | 6 ++++- 5 files changed, 109 insertions(+), 16 deletions(-) diff --git a/configure b/configure index 203400a1..4d2ae494 100755 --- a/configure +++ b/configure @@ -214,6 +214,7 @@ have_termios= have_winsize= termios_define= have_pkgconfig= +have_malloc_usable_size= libffi_cflags= darwin_target= solaris_target= @@ -3335,6 +3336,34 @@ int main(void) break done +printf "Checking for malloc_usable_size ..." + +for header in stdlib malloc malloc_np ; do + cat > conftest.c < + +int main(int argc, char **argv) +{ + void *p = malloc(42); + size_t s = malloc_usable_size(p); + return 0; +} +! + + if conftest ; then + printf "yes (<%s.h>)\n" $header + printf "#define HAVE_MALLOC_USABLE_SIZE 1\n" >> config.h + if [ $header != stdlib ] ; then + header=$(printf "%s" $header | tr '[a-z]' '[A-Z]') + printf "#define HAVE_%s_H 1\n" $header >> config.h + fi + have_malloc_usable_size=y + break + fi +done + +[ "$have_malloc_usable_size" ] || printf "no\n" + printf "Checking for termios ... " cat > conftest.c <c.car); mark_obj_tail(obj->c.cdr); case STR: - mark_obj(obj->st.len); - mark_obj_tail(obj->st.alloc); + mark_obj_tail(obj->st.len); case SYM: mark_obj(obj->s.name); mark_obj_tail(obj->s.package); diff --git a/hash.c b/hash.c index 4982c8b5..1a55e0c8 100644 --- a/hash.c +++ b/hash.c @@ -315,6 +315,13 @@ ucnum equal_hash(val obj, int *count, ucnum seed) return equal_hash(obj->c.car, count, seed) + equal_hash(obj->c.cdr, count, seed + (CONS << 8)); case STR: +#if HAVE_MALLOC_USABLE_SIZE + if (seed == 0) { + return if3(obj->st.hash != 0, + obj->st.hash, + obj->st.hash = hash_c_str(obj->st.str, 0, count)); + } +#endif return hash_c_str(obj->st.str, seed, count); case CHR: return c_ch(obj); diff --git a/lib.c b/lib.c index 7bc1a837..1bca83ef 100644 --- a/lib.c +++ b/lib.c @@ -48,6 +48,9 @@ #if HAVE_MALLOC_H #include #endif +#if HAVE_MALLOC_NP_H +#include +#endif #include "lib.h" #include "gc.h" #include "arith.h" @@ -4856,7 +4859,11 @@ val string_own(wchar_t *str) obj->st.type = STR; obj->st.str = str; obj->st.len = nil; - obj->st.alloc = nil; +#if HAVE_MALLOC_USABLE_SIZE + obj->st.hash = 0; +#else + obj->st.alloc = 0; +#endif return obj; } @@ -4866,7 +4873,11 @@ val string(const wchar_t *str) obj->st.type = STR; obj->st.str = chk_strdup(str); obj->st.len = nil; - obj->st.alloc = nil; +#if HAVE_MALLOC_USABLE_SIZE + obj->st.hash = 0; +#else + obj->st.alloc = 0; +#endif return obj; } @@ -4876,7 +4887,11 @@ val string_utf8(const char *str) obj->st.type = STR; obj->st.str = utf8_dup_from(str); obj->st.len = nil; - obj->st.alloc = nil; +#if HAVE_MALLOC_USABLE_SIZE + obj->st.hash = 0; +#else + obj->st.alloc = 0; +#endif return obj; } @@ -4912,7 +4927,9 @@ val mkstring(val len, val ch_in) wmemset(str, c_chr(ch), l); str[l] = 0; s->st.len = len; - s->st.alloc = plus(len, one); +#if !HAVE_MALLOC_USABLE_SIZE + s->st.alloc = c_num(len, self) + 1; +#endif return s; } @@ -4927,13 +4944,18 @@ val mkustring(val len) val s = string_own(str); str[l] = 0; s->st.len = len; - s->st.alloc = plus(len, one); +#if !HAVE_MALLOC_USABLE_SIZE + s->st.alloc = c_num(len, self) + 1; +#endif return s; } val init_str(val str, const wchar_t *data, val self) { wmemcpy(str->st.str, data, c_num(str->st.len, self)); +#if HAVE_MALLOC_USABLE_SIZE + str->st.hash = 0; +#endif return str; } @@ -5016,8 +5038,12 @@ val string_extend(val str, val tail, val finish_in) { val finish = default_null_arg(finish_in); cnum len = c_fixnum(length_str(str), self); - cnum oalloc = c_fixnum(str->st.alloc, self), alloc = oalloc; - cnum delta, needed; +#if HAVE_MALLOC_USABLE_SIZE + cnum oalloc = malloc_usable_size(str->st.str) / sizeof str->st.str[0]; +#else + cnum oalloc = str->st.alloc; +#endif + cnum alloc = oalloc, delta, needed; if (stringp(tail)) delta = c_fixnum(length_str(tail), self); @@ -5043,7 +5069,9 @@ val string_extend(val str, val tail, val finish_in) if (alloc != oalloc) { str->st.str = chk_wrealloc(str->st.str, alloc); - set(mkloc(str->st.alloc, str), num(alloc)); +#if !HAVE_MALLOC_USABLE_SIZE + str->st.alloc = alloc; +#endif } } @@ -5055,6 +5083,9 @@ val string_extend(val str, val tail, val finish_in) str->st.str[len] = c_chr(tail); str->st.str[len + 1] = 0; } +#if HAVE_MALLOC_USABLE_SIZE + str->st.hash = 0; +#endif } return str; @@ -5067,12 +5098,18 @@ val string_finish(val str) { cnum len = c_fixnum(length_str(str), self); - cnum alloc = c_fixnum(str->st.alloc, self); +#if HAVE_MALLOC_USABLE_SIZE + cnum alloc = malloc_usable_size(str->st.str) / sizeof str->st.str[0]; +#else + cnum alloc = str->st.alloc; +#endif if (alloc > len + 1) { alloc = len + 1; str->st.str = chk_wrealloc(str->st.str, alloc); +#if !HAVE_MALLOC_USABLE_SIZE set(mkloc(str->st.alloc, str), num(alloc)); +#endif } } @@ -5086,11 +5123,14 @@ val string_set_code(val str, val code) { cnum len = c_fixnum(length_str(str), self); - cnum alloc = c_fixnum(str->st.alloc, self); +#if HAVE_MALLOC_USABLE_SIZE + cnum alloc = malloc_usable_size(str->st.str) / sizeof str->st.str[0]; +#else + cnum alloc = str->st.alloc; +#endif if (alloc < len + 2) { string_extend(str, one, t); - alloc = c_fixnum(str->st.alloc, self); set(mkloc(str->st.len, str), num(len)); } @@ -5109,7 +5149,11 @@ val string_get_code(val str) { cnum len = c_fixnum(length_str(str), self); - cnum alloc = c_fixnum(str->st.alloc, self); +#if HAVE_MALLOC_USABLE_SIZE + cnum alloc = malloc_usable_size(str->st.str) / sizeof str->st.str[0]; +#else + cnum alloc = str->st.alloc; +#endif if (alloc >= len + 2) return num(str->st.str[len + 1]); @@ -5155,7 +5199,9 @@ val length_str(val str) case STR: if (!str->st.len) { set(mkloc(str->st.len, str), num(wcslen(str->st.str))); - set(mkloc(str->st.alloc, str), plus(str->st.len, one)); +#if !HAVE_MALLOC_USABLE_SIZE + str->st.alloc = c_num(str->st.len, self) + 1; +#endif } return str->st.len; default: @@ -5551,6 +5597,10 @@ val replace_str(val str_in, val items, val from, val to) to = max2(zero, min2(to, len)); +#if HAVE_MALLOC_USABLE_SIZE + str_in->st.hash = 0; +#endif + { val len_rep = minus(to, from); val len_it = length(items); @@ -6835,6 +6885,10 @@ val chr_str_set(val str, val ind, val chr) self, str, nao); } +#if HAVE_MALLOC_USABLE_SIZE + str->st.hash = 0; +#endif + if (index < 0) { ind = plus(length_str(str), ind); index = c_num(ind, self); diff --git a/lib.h b/lib.h index dc12c355..48320174 100644 --- a/lib.h +++ b/lib.h @@ -154,7 +154,11 @@ struct string { obj_common; wchar_t *str; val len; - val alloc; +#if HAVE_MALLOC_USABLE_SIZE + ucnum hash; +#else + cnum alloc; +#endif }; typedef struct { -- cgit v1.2.3