summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2023-06-20 20:14:19 -0700
committerKaz Kylheku <kaz@kylheku.com>2023-06-20 20:14:19 -0700
commitca6b26810adb42e35c32d3f961982fec638d0d4c (patch)
tree8678e9b5e516b3ba1d1ed31e1d5fe16fe162ebb4
parent5fca2149d681753b11c97a88b8f2a9584d7817f6 (diff)
downloadtxr-ca6b26810adb42e35c32d3f961982fec638d0d4c.tar.gz
txr-ca6b26810adb42e35c32d3f961982fec638d0d4c.tar.bz2
txr-ca6b26810adb42e35c32d3f961982fec638d0d4c.zip
hash: support existing mutation+iteration semantics.
The original chained hashing scheme makes certain guarantees in situation when a hash table that is being iterated is also undergoing insertions or deletions. The original scheme meets these requirements simply, by putting a freeze against hash table growth while there are outstanding iterations. Chained hashing gracefully handles load factors above 1. Load factors above 1 are not possible under open addressing (and we don't even want to approach 1) but we would like to preserve the requirements. The requirements are: 1. If an iterator has already visited an item, it will not see that item again, regardless of insertions which cause the table to be reorganized. 2. It is not specified/required that an iterator will visit newly inserted items. It may visit some of those items, but not others. 3. If an iterator has not yet visited an item, and that item is deleted, it will not see that item, regardless of any insertions that reorganize the table. In this commit, we implement a "table stack" scheme. 1. When a table is resized due to insertions, and it is being iterated (h->usecount > 0), in that situation it will push the existing small table onto a stack, the h->tblstack (table stack). 2. Iterators take a hash table's current table and its size, and work with that snapshot of the table. If the original hash table grows, existing iterators work with the original table as it existed just before the reorganization. So after that they do not see any new insertions. 3. Whenever the delete operation (hash_remove) finds the item and removes it from the current table, it also walks the table stack, searches for the item in every table in the stack and nulls it out. This search is oblivious to nil; it's a blind search that goes around the table starting at the first probe position, looking for the identical cons cell to take out. This algorithm ensures that iterators will not see a deleted item, unless they already visited it before the deletion, of course. * hash.h (struct hash_iter): New members table, and mask. * hash.c (struct hash): New member, tblstack. (hash_grow): We drop the vec argument and recreate it locally (not essential to this commit). If we find that the usecount is positive, we push the existing table onto the table stack. Otherwise, we take the opportunity to obliterate the table stack. (hash_insert): Drop the restriction that hash_grow is only called when the use count is zero. Adjust calls to hash_grow to drop the vec argument. (hash_remove): When an item is found and deleted, and the table is in use by iterators, walk the table stack and delete it from each previous table. Otherwise, if the table is not in use by iterators, obliterate the table stack. (hash_mark): Exit early also if there is a table stack, and mark that stack. (do_make_hash, make_similar_hash, copy_hash): Initialize table stack in new hash. (hash_iter_mark): Mark the iterator's table. This is likely not necessary since we also mark the hash table, which should have a pointer to that same table. That wouldn't be defensive programming, though. (hash_iter_init, us_hash_iter_init): Initialize table and mask. (hash_iter_next_impl, hash_iter_peek): These functions have to walk the table snapshot taken by the iterator, using the captured mask, and not the current table. (has_reset): If the target table's use count drops to zero, obliterate its table stack. We add a missing setcheck here; this operation potentially stores a different hash into an existing iterator. It's not being done safely with regard to generational GC. * tests/010/hash.tl: New tests.
-rw-r--r--hash.c76
-rw-r--r--hash.h2
-rw-r--r--tests/010/hash.tl33
3 files changed, 96 insertions, 15 deletions
diff --git a/hash.c b/hash.c
index 706c716f..dddca0d7 100644
--- a/hash.c
+++ b/hash.c
@@ -75,6 +75,7 @@ struct hash {
ucnum count;
val userdata;
int usecount;
+ val tblstack;
struct hash_ops *hops;
};
@@ -498,15 +499,24 @@ static val hash_lookup(struct hash *h, val key, ucnum hcode)
return nil;
}
-static void hash_grow(val hash, struct hash *h, val *vec, ucnum mask)
+static void hash_grow(val hash, struct hash *h, ucnum mask)
{
ucnum j, nmask = (mask << 1) | 1;
val ntable;
+ val table = h->table;
+ val *vec = h->table->v.vec;
val *nvec;
if (nmask > NUM_MAX - 1)
uw_throwf(error_s, lit("hash table overflow"), nao);
+ if (h->usecount > 0) {
+ push(table, &h->tblstack);
+ setcheck(hash, h->tblstack);
+ } else {
+ h->tblstack = nil;
+ }
+
ntable = vector(num_fast(nmask + 1), nil);
nvec = ntable->v.vec;
@@ -549,8 +559,8 @@ static val hash_insert(val hash, struct hash *h, val key, ucnum hcode, loc new_p
setcheck(table, ncell);
if (!nullocp(new_p))
deref(new_p) = t;
- if (++h->count > h->mask >> 1 && h->usecount == 0)
- hash_grow(hash, h, vec, mask);
+ if (++h->count > h->mask >> 1)
+ hash_grow(hash, h, mask);
return ncell;
}
@@ -563,7 +573,7 @@ static val hash_insert(val hash, struct hash *h, val key, ucnum hcode, loc new_p
i = (i + 1) & h->mask;
} while (i != start);
- hash_grow(hash, h, vec, mask);
+ hash_grow(hash, h, mask);
return hash_insert(hash, h, key, hcode, new_p);
}
@@ -575,6 +585,7 @@ static val hash_remove(struct hash *h, ucnum victim)
ucnum start = victim, i = start, mask = h->mask;
val cell = vec[i];
val ret = nil;
+ val vicentry = vec[victim];
if (cell == nil)
return ret;
@@ -604,6 +615,27 @@ static val hash_remove(struct hash *h, ucnum victim)
bug_unless (h->count > 0);
h->count--;
+ if (h->usecount) {
+ val tblit;
+
+ for (tblit = h->tblstack; tblit; tblit = us_cdr(tblit)) {
+ val stbl = us_car(tblit);
+ val *svec = stbl->v.vec;
+ ucnum smask = c_unum(svec[vec_length], nil) - 1;
+ ucnum start = victim & smask;
+ ucnum end = (victim + smask) & smask;
+
+ for (i = start; i != end; i = (i + 1) & smask) {
+ if (svec[i] == vicentry) {
+ svec[i] = nil;
+ break;
+ }
+ }
+ }
+ } else if (h->tblstack) {
+ h->tblstack = nil;
+ }
+
return ret;
}
@@ -865,8 +897,9 @@ static void hash_mark(val hash)
gc_mark(h->userdata);
- if (h->count == 0) {
+ if (h->count == 0 || h->tblstack) {
gc_mark(table);
+ gc_mark(h->tblstack);
return;
}
@@ -995,6 +1028,7 @@ static val do_make_hash(hash_weak_opt_t wkopt, hash_type_t type, val seed)
h->userdata = nil;
h->usecount = 0;
+ h->tblstack = nil;
switch (type) {
case hash_type_eq:
@@ -1069,6 +1103,7 @@ val make_similar_hash(val existing)
h->seed = ex->seed;
h->wkopt = ex->wkopt;
h->usecount = 0;
+ h->tblstack = 0;
h->hops = ex->hops;
return hash;
@@ -1094,6 +1129,7 @@ val copy_hash(val existing)
h->seed = ex->seed;
h->wkopt = ex->wkopt;
h->usecount = 0;
+ h->tblstack = 0;
h->hops = ex->hops;
for (i = 0; i <= h->mask; i++) {
@@ -1230,8 +1266,8 @@ val hashp(val obj)
static void hash_iter_mark(val hash_iter)
{
struct hash_iter *hi = coerce(struct hash_iter *, hash_iter->co.handle);
- if (hi->hash)
- gc_mark(hi->hash);
+ gc_mark(hi->hash);
+ gc_mark(hi->table);
hi->next = reachable_iters;
reachable_iters = hi;
}
@@ -1247,6 +1283,8 @@ void hash_iter_init(struct hash_iter *hi, val hash, val self)
struct hash *h = coerce(struct hash *, cobj_handle(self, hash, hash_cls));
hi->next = 0;
hi->hash = hash;
+ hi->table = h->table;
+ hi->mask = h->mask;
hi->index = 0;
h->usecount++;
}
@@ -1256,6 +1294,8 @@ void us_hash_iter_init(struct hash_iter *hi, val hash)
struct hash *h = coerce(struct hash *, hash->co.handle);
hi->next = 0;
hi->hash = hash;
+ hi->table = h->table;
+ hi->mask = h->mask;
hi->index = 0;
h->usecount++;
}
@@ -1264,19 +1304,21 @@ static val hash_iter_next_impl(struct hash_iter *hi)
{
val hash = hi->hash;
struct hash *h = hash ? coerce(struct hash *, hash->co.handle) : 0;
- ucnum mask = h->mask;
+ ucnum mask = hi->mask;
if (!h)
return nil;
while (hi->index <= mask) {
- val cell = h->table->v.vec[hi->index++];
+ val cell = hi->table->v.vec[hi->index++];
if (cell)
return cell;
}
hi->hash = nil;
- h->usecount--;
+ if (--h->usecount <= 0)
+ h->tblstack = nil;
+
return nil;
}
@@ -1289,13 +1331,13 @@ val hash_iter_peek(struct hash_iter *hi)
{
val hash = hi->hash;
struct hash *h = hash ? coerce(struct hash *, hash->co.handle) : 0;
- ucnum mask = h->mask, index = hi->index;
+ ucnum mask = hi->mask, index = hi->index;
if (!h)
return nil;
do {
- val cell = h->table->v.vec[index++];
+ val cell = hi->table->v.vec[index++];
if (cell)
return cell;
} while (index <= mask);
@@ -1339,13 +1381,17 @@ val hash_reset(val iter, val hash)
if (hi->hash) {
struct hash *h = coerce(struct hash *, hash->co.handle);
- h->usecount--;
+ if (--h->usecount <= 0)
+ h->tblstack = nil;
}
- if (hash)
+ if (hash) {
hash_iter_init(hi, hash, self);
- else
+ if (hi->table)
+ setcheck(iter, hash);
+ } else {
memset(hi, 0, sizeof *hi);
+ }
return iter;
}
diff --git a/hash.h b/hash.h
index ec994647..e4a67086 100644
--- a/hash.h
+++ b/hash.h
@@ -37,6 +37,8 @@ typedef enum hash_weak_opt {
struct hash_iter {
struct hash_iter *next;
val hash;
+ val table;
+ ucnum mask;
ucnum index;
};
diff --git a/tests/010/hash.tl b/tests/010/hash.tl
index 558688af..5af8b167 100644
--- a/tests/010/hash.tl
+++ b/tests/010/hash.tl
@@ -27,3 +27,36 @@
(hash-props 1 2 'a 'b) #H(() (1 2) (a b))
(hash-props 1) :error
(hash-props 1 2 'a) :error)
+
+;; Test that growing a hash table works while iterators
+;; are referencing it.
+(let ((h (hash-list (range 0 199))))
+ (let ((i (hash-begin h)))
+ (each ((x 200..1000))
+ (set [h x] x))
+ (each ((x 0..1000))
+ (vtest [h x] x))))
+
+;; Test that when an iterator is created which references
+;; a table which is then resized, and from which all
+;; entries are subsequently deleted, when the iterator
+;; then marches, it will not see the deleted entries.
+(let ((h (hash-list (range 0 199))))
+ (let ((i (hash-begin h)))
+ (each ((x 200..1000))
+ (set [h x] x))
+ (each ((x 0..1000))
+ (del [h x]))
+ (test (hash-next i) nil)))
+
+;; Test that when an iterator is created which references
+;; a table which is then resized, and from which values
+;; are never deleted, the iterator will visit all the
+;; original items that existed when it was created.
+(let ((h (hash-list (range 0 199))))
+ (let ((i (hash-begin h)))
+ (each ((x 200..1000))
+ (set [h x] x))
+ (let ((items (build (whilet ((cell (hash-next i)))
+ (add (car cell))))))
+ (test (diff 0..200 items) nil))))