diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2023-06-20 20:14:19 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2023-06-20 20:14:19 -0700 |
commit | 82141a5d9e0114fbc138b3b6c4797e3c220de243 (patch) | |
tree | 8678e9b5e516b3ba1d1ed31e1d5fe16fe162ebb4 | |
parent | 96ea20112aee6ba8d4b5aa5d6a8a0d6b5d3dd2e0 (diff) | |
download | txr-82141a5d9e0114fbc138b3b6c4797e3c220de243.tar.gz txr-82141a5d9e0114fbc138b3b6c4797e3c220de243.tar.bz2 txr-82141a5d9e0114fbc138b3b6c4797e3c220de243.zip |
hash: support existing mutation+iteration semantics.
The original chained hashing scheme makes certain
guarantees in situation when a hash table that is being
iterated is also undergoing insertions or deletions.
The original scheme meets these requirements simply,
by putting a freeze against hash table growth while
there are outstanding iterations. Chained hashing
gracefully handles load factors above 1.
Load factors above 1 are not possible under open
addressing (and we don't even want to approach 1)
but we would like to preserve the requirements.
The requirements are:
1. If an iterator has already visited an item, it
will not see that item again, regardless of insertions
which cause the table to be reorganized.
2. It is not specified/required that an iterator will
visit newly inserted items. It may visit some of those
items, but not others.
3. If an iterator has not yet visited an item, and
that item is deleted, it will not see that item,
regardless of any insertions that reorganize the table.
In this commit, we implement a "table stack" scheme.
1. When a table is resized due to insertions, and
it is being iterated (h->usecount > 0), in that situation
it will push the existing small table onto a stack,
the h->tblstack (table stack).
2. Iterators take a hash table's current table and its
size, and work with that snapshot of the table.
If the original hash table grows, existing iterators
work with the original table as it existed just before
the reorganization. So after that they do not see any
new insertions.
3. Whenever the delete operation (hash_remove) finds
the item and removes it from the current table,
it also walks the table stack, searches for the item
in every table in the stack and nulls it out.
This search is oblivious to nil; it's a blind search
that goes around the table starting at the first
probe position, looking for the identical cons cell
to take out. This algorithm ensures that iterators
will not see a deleted item, unless they already visited
it before the deletion, of course.
* hash.h (struct hash_iter): New members table, and mask.
* hash.c (struct hash): New member, tblstack.
(hash_grow): We drop the vec argument and recreate it
locally (not essential to this commit).
If we find that the usecount is positive, we push the
existing table onto the table stack. Otherwise,
we take the opportunity to obliterate the table stack.
(hash_insert): Drop the restriction that hash_grow is
only called when the use count is zero. Adjust calls
to hash_grow to drop the vec argument.
(hash_remove): When an item is found and deleted, and
the table is in use by iterators, walk the table stack
and delete it from each previous table. Otherwise,
if the table is not in use by iterators, obliterate
the table stack.
(hash_mark): Exit early also if there is a table stack,
and mark that stack.
(do_make_hash, make_similar_hash, copy_hash): Initialize
table stack in new hash.
(hash_iter_mark): Mark the iterator's table. This is
likely not necessary since we also mark the hash table,
which should have a pointer to that same table.
That wouldn't be defensive programming, though.
(hash_iter_init, us_hash_iter_init): Initialize table and mask.
(hash_iter_next_impl, hash_iter_peek): These functions
have to walk the table snapshot taken by the iterator,
using the captured mask, and not the current table.
(has_reset): If the target table's use count drops to zero,
obliterate its table stack. We add a missing setcheck here;
this operation potentially stores a different hash into
an existing iterator. It's not being done safely with
regard to generational GC.
* tests/010/hash.tl: New tests.
-rw-r--r-- | hash.c | 76 | ||||
-rw-r--r-- | hash.h | 2 | ||||
-rw-r--r-- | tests/010/hash.tl | 33 |
3 files changed, 96 insertions, 15 deletions
@@ -75,6 +75,7 @@ struct hash { ucnum count; val userdata; int usecount; + val tblstack; struct hash_ops *hops; }; @@ -498,15 +499,24 @@ static val hash_lookup(struct hash *h, val key, ucnum hcode) return nil; } -static void hash_grow(val hash, struct hash *h, val *vec, ucnum mask) +static void hash_grow(val hash, struct hash *h, ucnum mask) { ucnum j, nmask = (mask << 1) | 1; val ntable; + val table = h->table; + val *vec = h->table->v.vec; val *nvec; if (nmask > NUM_MAX - 1) uw_throwf(error_s, lit("hash table overflow"), nao); + if (h->usecount > 0) { + push(table, &h->tblstack); + setcheck(hash, h->tblstack); + } else { + h->tblstack = nil; + } + ntable = vector(num_fast(nmask + 1), nil); nvec = ntable->v.vec; @@ -549,8 +559,8 @@ static val hash_insert(val hash, struct hash *h, val key, ucnum hcode, loc new_p setcheck(table, ncell); if (!nullocp(new_p)) deref(new_p) = t; - if (++h->count > h->mask >> 1 && h->usecount == 0) - hash_grow(hash, h, vec, mask); + if (++h->count > h->mask >> 1) + hash_grow(hash, h, mask); return ncell; } @@ -563,7 +573,7 @@ static val hash_insert(val hash, struct hash *h, val key, ucnum hcode, loc new_p i = (i + 1) & h->mask; } while (i != start); - hash_grow(hash, h, vec, mask); + hash_grow(hash, h, mask); return hash_insert(hash, h, key, hcode, new_p); } @@ -575,6 +585,7 @@ static val hash_remove(struct hash *h, ucnum victim) ucnum start = victim, i = start, mask = h->mask; val cell = vec[i]; val ret = nil; + val vicentry = vec[victim]; if (cell == nil) return ret; @@ -604,6 +615,27 @@ static val hash_remove(struct hash *h, ucnum victim) bug_unless (h->count > 0); h->count--; + if (h->usecount) { + val tblit; + + for (tblit = h->tblstack; tblit; tblit = us_cdr(tblit)) { + val stbl = us_car(tblit); + val *svec = stbl->v.vec; + ucnum smask = c_unum(svec[vec_length], nil) - 1; + ucnum start = victim & smask; + ucnum end = (victim + smask) & smask; + + for (i = start; i != end; i = (i + 1) & smask) { + if (svec[i] == vicentry) { + svec[i] = nil; + break; + } + } + } + } else if (h->tblstack) { + h->tblstack = nil; + } + return ret; } @@ -865,8 +897,9 @@ static void hash_mark(val hash) gc_mark(h->userdata); - if (h->count == 0) { + if (h->count == 0 || h->tblstack) { gc_mark(table); + gc_mark(h->tblstack); return; } @@ -995,6 +1028,7 @@ static val do_make_hash(hash_weak_opt_t wkopt, hash_type_t type, val seed) h->userdata = nil; h->usecount = 0; + h->tblstack = nil; switch (type) { case hash_type_eq: @@ -1069,6 +1103,7 @@ val make_similar_hash(val existing) h->seed = ex->seed; h->wkopt = ex->wkopt; h->usecount = 0; + h->tblstack = 0; h->hops = ex->hops; return hash; @@ -1094,6 +1129,7 @@ val copy_hash(val existing) h->seed = ex->seed; h->wkopt = ex->wkopt; h->usecount = 0; + h->tblstack = 0; h->hops = ex->hops; for (i = 0; i <= h->mask; i++) { @@ -1230,8 +1266,8 @@ val hashp(val obj) static void hash_iter_mark(val hash_iter) { struct hash_iter *hi = coerce(struct hash_iter *, hash_iter->co.handle); - if (hi->hash) - gc_mark(hi->hash); + gc_mark(hi->hash); + gc_mark(hi->table); hi->next = reachable_iters; reachable_iters = hi; } @@ -1247,6 +1283,8 @@ void hash_iter_init(struct hash_iter *hi, val hash, val self) struct hash *h = coerce(struct hash *, cobj_handle(self, hash, hash_cls)); hi->next = 0; hi->hash = hash; + hi->table = h->table; + hi->mask = h->mask; hi->index = 0; h->usecount++; } @@ -1256,6 +1294,8 @@ void us_hash_iter_init(struct hash_iter *hi, val hash) struct hash *h = coerce(struct hash *, hash->co.handle); hi->next = 0; hi->hash = hash; + hi->table = h->table; + hi->mask = h->mask; hi->index = 0; h->usecount++; } @@ -1264,19 +1304,21 @@ static val hash_iter_next_impl(struct hash_iter *hi) { val hash = hi->hash; struct hash *h = hash ? coerce(struct hash *, hash->co.handle) : 0; - ucnum mask = h->mask; + ucnum mask = hi->mask; if (!h) return nil; while (hi->index <= mask) { - val cell = h->table->v.vec[hi->index++]; + val cell = hi->table->v.vec[hi->index++]; if (cell) return cell; } hi->hash = nil; - h->usecount--; + if (--h->usecount <= 0) + h->tblstack = nil; + return nil; } @@ -1289,13 +1331,13 @@ val hash_iter_peek(struct hash_iter *hi) { val hash = hi->hash; struct hash *h = hash ? coerce(struct hash *, hash->co.handle) : 0; - ucnum mask = h->mask, index = hi->index; + ucnum mask = hi->mask, index = hi->index; if (!h) return nil; do { - val cell = h->table->v.vec[index++]; + val cell = hi->table->v.vec[index++]; if (cell) return cell; } while (index <= mask); @@ -1339,13 +1381,17 @@ val hash_reset(val iter, val hash) if (hi->hash) { struct hash *h = coerce(struct hash *, hash->co.handle); - h->usecount--; + if (--h->usecount <= 0) + h->tblstack = nil; } - if (hash) + if (hash) { hash_iter_init(hi, hash, self); - else + if (hi->table) + setcheck(iter, hash); + } else { memset(hi, 0, sizeof *hi); + } return iter; } @@ -37,6 +37,8 @@ typedef enum hash_weak_opt { struct hash_iter { struct hash_iter *next; val hash; + val table; + ucnum mask; ucnum index; }; diff --git a/tests/010/hash.tl b/tests/010/hash.tl index 558688af..5af8b167 100644 --- a/tests/010/hash.tl +++ b/tests/010/hash.tl @@ -27,3 +27,36 @@ (hash-props 1 2 'a 'b) #H(() (1 2) (a b)) (hash-props 1) :error (hash-props 1 2 'a) :error) + +;; Test that growing a hash table works while iterators +;; are referencing it. +(let ((h (hash-list (range 0 199)))) + (let ((i (hash-begin h))) + (each ((x 200..1000)) + (set [h x] x)) + (each ((x 0..1000)) + (vtest [h x] x)))) + +;; Test that when an iterator is created which references +;; a table which is then resized, and from which all +;; entries are subsequently deleted, when the iterator +;; then marches, it will not see the deleted entries. +(let ((h (hash-list (range 0 199)))) + (let ((i (hash-begin h))) + (each ((x 200..1000)) + (set [h x] x)) + (each ((x 0..1000)) + (del [h x])) + (test (hash-next i) nil))) + +;; Test that when an iterator is created which references +;; a table which is then resized, and from which values +;; are never deleted, the iterator will visit all the +;; original items that existed when it was created. +(let ((h (hash-list (range 0 199)))) + (let ((i (hash-begin h))) + (each ((x 200..1000)) + (set [h x] x)) + (let ((items (build (whilet ((cell (hash-next i))) + (add (car cell)))))) + (test (diff 0..200 items) nil)))) |