aboutsummaryrefslogtreecommitdiffstats
path: root/cint_array.c
diff options
context:
space:
mode:
Diffstat (limited to 'cint_array.c')
-rw-r--r--cint_array.c1237
1 files changed, 1237 insertions, 0 deletions
diff --git a/cint_array.c b/cint_array.c
new file mode 100644
index 00000000..8ec09239
--- /dev/null
+++ b/cint_array.c
@@ -0,0 +1,1237 @@
+/*
+ * cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "awk.h"
+
+extern FILE *output_fp;
+extern void indent(int indent_level);
+extern NODE **is_integer(NODE *symbol, NODE *subs);
+
+/*
+ * NHAT --- maximum size of a leaf array (2^NHAT).
+ * THRESHOLD --- Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
+ */
+
+static int NHAT = 10;
+static long THRESHOLD;
+
+/* What is the optimium NHAT ? timing results suggest that 10 is a good choice,
+ * although differences aren't that significant for > 10.
+ */
+
+
+static NODE **cint_array_init(NODE *symbol, NODE *subs);
+static NODE **is_uinteger(NODE *symbol, NODE *subs);
+static NODE **cint_lookup(NODE *symbol, NODE *subs);
+static NODE **cint_exists(NODE *symbol, NODE *subs);
+static NODE **cint_clear(NODE *symbol, NODE *subs);
+static NODE **cint_remove(NODE *symbol, NODE *subs);
+static NODE **cint_list(NODE *symbol, NODE *t);
+static NODE **cint_copy(NODE *symbol, NODE *newsymb);
+static NODE **cint_dump(NODE *symbol, NODE *ndump);
+#ifdef ARRAYDEBUG
+static NODE **cint_option(NODE *opt, NODE *val);
+static void cint_print(NODE *symbol);
+#endif
+
+array_ptr cint_array_func[] = {
+ cint_array_init,
+ is_uinteger,
+ cint_lookup,
+ cint_exists,
+ cint_clear,
+ cint_remove,
+ cint_list,
+ cint_copy,
+ cint_dump,
+#ifdef ARRAYDEBUG
+ cint_option,
+#endif
+};
+
+static inline int cint_hash(long k);
+static inline NODE **cint_find(NODE *symbol, long k, int h1);
+
+static inline NODE *make_node(NODETYPE type);
+
+static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
+static NODE **tree_exists(NODE *tree, long k);
+static void tree_clear(NODE *tree);
+static int tree_remove(NODE *symbol, NODE *tree, long k);
+static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
+static long tree_list(NODE *tree, NODE **list, unsigned int flags);
+static inline NODE **tree_find(NODE *tree, long k, int i);
+static void tree_info(NODE *tree, NODE *ndump, const char *aname);
+static size_t tree_kilobytes(NODE *tree);
+#ifdef ARRAYDEBUG
+static void tree_print(NODE *tree, size_t bi, int indent_level);
+#endif
+
+static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
+static inline NODE **leaf_exists(NODE *array, long k);
+static void leaf_clear(NODE *array);
+static int leaf_remove(NODE *symbol, NODE *array, long k);
+static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
+static long leaf_list(NODE *array, NODE **list, unsigned int flags);
+static void leaf_info(NODE *array, NODE *ndump, const char *aname);
+#ifdef ARRAYDEBUG
+static void leaf_print(NODE *array, size_t bi, int indent_level);
+#endif
+
+/* powers of 2 table upto 2^30 */
+static const long power_two_table[] = {
+ 1, 2, 4, 8, 16, 32, 64,
+ 128, 256, 512, 1024, 2048, 4096,
+ 8192, 16384, 32768, 65536, 131072, 262144,
+ 524288, 1048576, 2097152, 4194304, 8388608, 16777216,
+ 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
+};
+
+
+#define ISUINT(a, s) ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
+ && (s)->numbr >= 0)
+
+/*
+ * To store 2^n integers, allocate top-level array of size n, elements
+ * of which are 1-Dimensional (leaf-array) of geometrically increasing
+ * size (power of 2).
+ *
+ * [0] --> [ 0 ]
+ * [1] --> [ 1 ]
+ * |2| --> [ 2 | 3 ]
+ * |3| --> [ 4 | 5 | 6 | 7 ]
+ * |.|
+ * |k| --> [ 2^(k - 1)| ... | 2^k - 1 ]
+ * ...
+ *
+ * For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
+ *
+ * The idea for the geometrically increasing array sizes is from:
+ * Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
+ * Bagwell, Phil (2002).
+ * http://infoscience.epfl.ch/record/64410/files/techlists.pdf
+ *
+ * Disadvantage:
+ * Worst case memory waste > 99% and will happen when each of the
+ * leaf arrays contains only a single element. Even with consecutive
+ * integers, memory waste can be as high as 50%.
+ *
+ * Solution: Hashed Array Trees (HATs).
+ *
+ */
+
+/* cint_array_init --- check relevant environment variables */
+
+static NODE **
+cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
+{
+ long newval;
+
+ if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
+ NHAT = newval;
+ THRESHOLD = power_two_table[NHAT + 1];
+ return (NODE **) ! NULL;
+}
+
+
+/* is_uinteger --- test if the subscript is an integer >= 0 */
+
+NODE **
+is_uinteger(NODE *symbol, NODE *subs)
+{
+ if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
+ return (NODE **) ! NULL;
+ return NULL;
+}
+
+
+/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
+
+static NODE **
+cint_lookup(NODE *symbol, NODE *subs)
+{
+ NODE **lhs;
+ long k;
+ int h1 = -1, m, li;
+ NODE *tn, *xn;
+ long cint_size, capacity;
+
+ k = -1;
+ if (ISUINT(symbol, subs)) {
+ k = subs->numbr; /* k >= 0 */
+ h1 = cint_hash(k); /* h1 >= NHAT */
+ if ((lhs = cint_find(symbol, k, h1)) != NULL)
+ return lhs;
+ }
+ xn = symbol->xarray;
+ if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
+ return lhs;
+
+ /* It's not there, install it */
+
+ if (k < 0)
+ goto xinstall;
+
+ m = h1 - 1; /* m >= (NHAT- 1) */
+
+ /* Estimate capacity upper bound.
+ * capacity upper bound = current capacity + leaf array size.
+ */
+ li = m > NHAT ? m : NHAT;
+ while (li >= NHAT) {
+ /* leaf-array of a HAT */
+ li = (li + 1) / 2;
+ }
+ capacity = symbol->array_capacity + power_two_table[li];
+
+ cint_size = (xn == NULL) ? symbol->table_size
+ : (symbol->table_size - xn->table_size);
+ assert(cint_size >= 0);
+ if ((capacity - cint_size) > THRESHOLD)
+ goto xinstall;
+
+ if (symbol->nodes == NULL) {
+ symbol->array_capacity = 0;
+ assert(symbol->table_size == 0);
+
+ /* nodes[0] .. nodes[NHAT- 1] not used */
+ emalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
+ memset(symbol->nodes, '\0', INT32_BIT * sizeof(NODE *));
+ }
+
+ symbol->table_size++; /* one more element in array */
+
+ tn = symbol->nodes[h1];
+ if (tn == NULL) {
+ tn = make_node(Node_array_tree);
+ symbol->nodes[h1] = tn;
+ }
+
+ if (m < NHAT)
+ return tree_lookup(symbol, tn, k, NHAT, 0);
+ return tree_lookup(symbol, tn, k, m, power_two_table[m]);
+
+xinstall:
+
+ symbol->table_size++;
+ if (xn == NULL) {
+ extern array_ptr int_array_func[];
+ extern array_ptr str_array_func[];
+
+ xn = symbol->xarray = make_array();
+ xn->vname = symbol->vname; /* shallow copy */
+
+ /* Avoid using assoc_lookup(xn, subs) which may lead
+ * to infinite recursion.
+ */
+
+ if (is_integer(xn, subs))
+ xn->array_funcs = int_array_func;
+ else
+ xn->array_funcs = str_array_func;
+ xn->flags |= XARRAY;
+ }
+ return xn->alookup(xn, subs);
+}
+
+
+/* cint_exists --- test whether an index is in the array or not. */
+
+static NODE **
+cint_exists(NODE *symbol, NODE *subs)
+{
+ NODE *xn;
+
+ if (ISUINT(symbol, subs)) {
+ long k = subs->numbr;
+ NODE **lhs;
+ if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
+ return lhs;
+ }
+ if ((xn = symbol->xarray) == NULL)
+ return NULL;
+ return xn->aexists(xn, subs);
+}
+
+
+/* cint_clear --- flush all the values in symbol[] */
+
+static NODE **
+cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
+{
+ size_t i;
+ NODE *tn;
+
+ assert(symbol->nodes != NULL);
+
+ if (symbol->xarray != NULL) {
+ NODE *xn = symbol->xarray;
+ assoc_clear(xn);
+ freenode(xn);
+ symbol->xarray = NULL;
+ }
+
+ for (i = NHAT; i < INT32_BIT; i++) {
+ tn = symbol->nodes[i];
+ if (tn != NULL) {
+ tree_clear(tn);
+ freenode(tn);
+ }
+ }
+
+ efree(symbol->nodes);
+ init_array(symbol); /* re-initialize symbol */
+ return NULL;
+}
+
+
+/* cint_remove --- remove an index from the array */
+
+static NODE **
+cint_remove(NODE *symbol, NODE *subs)
+{
+ long k;
+ int h1;
+ NODE *tn, *xn = symbol->xarray;
+
+ if (symbol->table_size == 0)
+ return NULL;
+
+ if (! ISUINT(symbol, subs))
+ goto xremove;
+
+ assert(symbol->nodes != NULL);
+
+ k = subs->numbr;
+ h1 = cint_hash(k);
+ tn = symbol->nodes[h1];
+ if (tn == NULL || ! tree_remove(symbol, tn, k))
+ goto xremove;
+
+ if (tn->table_size == 0) {
+ freenode(tn);
+ symbol->nodes[h1] = NULL;
+ }
+
+ symbol->table_size--;
+
+ if (xn == NULL && symbol->table_size == 0) {
+ efree(symbol->nodes);
+ init_array(symbol); /* re-initialize array 'symbol' */
+ } else if(xn != NULL && symbol->table_size == xn->table_size) {
+ /* promote xn to symbol */
+
+ xn->flags &= ~XARRAY;
+ xn->parent_array = symbol->parent_array;
+ efree(symbol->nodes);
+ *symbol = *xn;
+ freenode(xn);
+ }
+
+ return (NODE **) ! NULL;
+
+xremove:
+ xn = symbol->xarray;
+ if (xn == NULL || xn->aremove(xn, subs) == NULL)
+ return NULL;
+ if (xn->table_size == 0) {
+ freenode(xn);
+ symbol->xarray = NULL;
+ }
+ symbol->table_size--;
+ assert(symbol->table_size > 0);
+
+ return (NODE **) ! NULL;
+}
+
+
+/* cint_copy --- duplicate input array "symbol" */
+
+static NODE **
+cint_copy(NODE *symbol, NODE *newsymb)
+{
+ NODE **old, **new;
+ size_t i;
+
+ assert(symbol->nodes != NULL);
+
+ /* allocate new table */
+ emalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
+ memset(new, '\0', INT32_BIT * sizeof(NODE *));
+
+ old = symbol->nodes;
+ for (i = NHAT; i < INT32_BIT; i++) {
+ if (old[i] == NULL)
+ continue;
+ new[i] = make_node(Node_array_tree);
+ tree_copy(newsymb, old[i], new[i]);
+ }
+
+ if (symbol->xarray != NULL) {
+ NODE *xn, *n;
+ xn = symbol->xarray;
+ n = make_array();
+ n->vname = newsymb->vname;
+ (void) xn->acopy(xn, n);
+ newsymb->xarray = n;
+ } else
+ newsymb->xarray = NULL;
+
+ newsymb->nodes = new;
+ newsymb->table_size = symbol->table_size;
+ newsymb->array_capacity = symbol->array_capacity;
+ newsymb->flags = symbol->flags;
+
+ return NULL;
+}
+
+
+/* cint_list --- return a list of items */
+
+static NODE**
+cint_list(NODE *symbol, NODE *t)
+{
+ NODE **list = NULL;
+ NODE *tn, *xn;
+ unsigned long k = 0, num_elems, list_size;
+ size_t j, ja, jd;
+ int elem_size = 1;
+
+ num_elems = symbol->table_size;
+ if (num_elems == 0)
+ return NULL;
+
+ if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
+ num_elems = 1;
+
+ if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
+ elem_size = 2;
+ list_size = num_elems * elem_size;
+
+ if (symbol->xarray != NULL) {
+ xn = symbol->xarray;
+ list = xn->alist(xn, t);
+ assert(list != NULL);
+ t->flags &= ~(AASC|ADESC);
+ if (num_elems == 1 || num_elems == xn->table_size)
+ return list;
+ erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
+ k = elem_size * xn->table_size;
+ } else
+ emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
+
+
+ if ((t->flags & AINUM) == 0) /* not sorting by "index num" */
+ t->flags &= ~(AASC|ADESC);
+
+ /* populate it with index in ascending or descending order */
+
+ for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
+ j = (t->flags & ADESC) ? jd-- : ja++;
+ tn = symbol->nodes[j];
+ if (tn == NULL)
+ continue;
+ k += tree_list(tn, list + k, t->flags);
+ if (k >= list_size)
+ return list;
+ }
+ return list;
+}
+
+
+/* cint_dump --- dump array info */
+
+static NODE **
+cint_dump(NODE *symbol, NODE *ndump)
+{
+ NODE *tn, *xn = NULL;
+ int indent_level;
+ size_t i;
+ long cint_size = 0, xsize = 0;
+ AWKNUM kb = 0;
+ extern AWKNUM int_kilobytes(NODE *symbol);
+ extern AWKNUM str_kilobytes(NODE *symbol);
+ extern array_ptr int_array_func[];
+
+ indent_level = ndump->alevel;
+
+ if (symbol->xarray != NULL) {
+ xn = symbol->xarray;
+ xsize = xn->table_size;
+ }
+ cint_size = symbol->table_size - xsize;
+
+ if ((symbol->flags & XARRAY) == 0)
+ fprintf(output_fp, "%s `%s'\n",
+ (symbol->parent_array == NULL) ? "array" : "sub-array",
+ array_vname(symbol));
+ indent_level++;
+ indent(indent_level);
+ fprintf(output_fp, "array_func: cint_array_func\n");
+ if (symbol->flags != 0) {
+ indent(indent_level);
+ fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
+ }
+ indent(indent_level);
+ fprintf(output_fp, "NHAT: %d\n", NHAT);
+ indent(indent_level);
+ fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
+ indent(indent_level);
+ fprintf(output_fp, "table_size: %ld (total), %ld (cint), %ld (int + str)\n",
+ symbol->table_size, cint_size, xsize);
+ indent(indent_level);
+ fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
+ indent(indent_level);
+ fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);
+
+ for (i = NHAT; i < INT32_BIT; i++) {
+ tn = symbol->nodes[i];
+ if (tn == NULL)
+ continue;
+ /* Node_array_tree + HAT */
+ kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
+ }
+ kb += (INT32_BIT * sizeof(NODE *)) / 1024.0; /* symbol->nodes */
+ kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0; /* value nodes in Node_array_leaf(s) */
+ if (xn != NULL) {
+ if (xn->array_funcs == int_array_func)
+ kb += int_kilobytes(xn);
+ else
+ kb += str_kilobytes(xn);
+ }
+
+ indent(indent_level);
+ fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
+
+ /* dump elements */
+
+ if (ndump->adepth >= 0) {
+ const char *aname;
+
+ fprintf(output_fp, "\n");
+ aname = make_aname(symbol);
+ for (i = NHAT; i < INT32_BIT; i++) {
+ tn = symbol->nodes[i];
+ if (tn != NULL)
+ tree_info(tn, ndump, aname);
+ }
+ }
+
+ if (xn != NULL) {
+ fprintf(output_fp, "\n");
+ xn->adump(xn, ndump);
+ }
+
+#ifdef ARRAYDEBUG
+ if (ndump->adepth < -999)
+ cint_print(symbol);
+#endif
+
+ return NULL;
+}
+
+
+/* cint_hash --- locate the HAT for a given number 'k' */
+
+static inline int
+cint_hash(long k)
+{
+ uint32_t num, r, shift;
+
+ assert(k >= 0);
+ if (k == 0)
+ return NHAT;
+ num = k;
+
+ /* Find the Floor(log base 2 of 32-bit integer) */
+
+ /* Warren Jr., Henry S. (2002). Hacker's Delight.
+ * Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
+ *
+ * r = 0;
+ * if (num >= 1<<16) { num >>= 16; r += 16; }
+ * if (num >= 1<< 8) { num >>= 8; r += 8; }
+ * if (num >= 1<< 4) { num >>= 4; r += 4; }
+ * if (num >= 1<< 2) { num >>= 2; r += 2; }
+ * if (num >= 1<< 1) { r += 1; }
+ */
+
+
+ /* Slightly different code copied from:
+ *
+ * http://www-graphics.stanford.edu/~seander/bithacks.html
+ * Bit Twiddling Hacks
+ * By Sean Eron Anderson
+ * seander@cs.stanford.edu
+ * Individually, the code snippets here are in the public domain
+ * (unless otherwise noted) — feel free to use them however you please.
+ * The aggregate collection and descriptions are © 1997-2005
+ * Sean Eron Anderson. The code and descriptions are distributed in the
+ * hope that they will be useful, but WITHOUT ANY WARRANTY and without
+ * even the implied warranty of merchantability or fitness for a particular
+ * purpose.
+ *
+ */
+
+ r = (num > 0xFFFF) << 4; num >>= r;
+ shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
+ shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
+ shift = (num > 0x03) << 1; num >>= shift; r |= shift;
+ r |= (num >> 1);
+
+ /* We use a single HAT for 0 <= num < 2^NHAT */
+ if (r < NHAT)
+ return NHAT;
+
+ return (1 + r);
+}
+
+
+/* cint_find --- locate the integer subscript */
+
+static inline NODE **
+cint_find(NODE *symbol, long k, int h1)
+{
+ NODE *tn;
+
+ if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
+ return NULL;
+ return tree_exists(tn, k);
+}
+
+
+#ifdef ARRAYDEBUG
+
+static NODE **
+cint_option(NODE *opt, NODE *val)
+{
+ NODE *tmp;
+ NODE **ret = (NODE **) ! NULL;
+
+ tmp = force_string(opt);
+ (void) force_number(val);
+ if (strcmp(tmp->stptr, "NHAT") == 0)
+ NHAT = (int) val->numbr;
+ else
+ ret = NULL;
+ return ret;
+}
+
+
+/* cint_print --- print structural info */
+
+static void
+cint_print(NODE *symbol)
+{
+ NODE *tn;
+ size_t i;
+
+ fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
+ (unsigned long) symbol->table_size);
+ for (i = NHAT; i < INT32_BIT; i++) {
+ tn = symbol->nodes[i];
+ if (tn == NULL)
+ continue;
+ tree_print(tn, i, 1);
+ }
+}
+
+#endif
+
+
+/*------------------------ Hashed Array Trees -----------------------------*/
+
+/*
+ * HATs: Hashed Array Trees
+ * Fast variable-length arrays
+ * Edward Sitarski
+ * http://www.drdobbs.com/architecture-and-design/184409965
+ *
+ * HAT has a top-level array containing a power of two
+ * number of leaf arrays. All leaf arrays are the same size as the
+ * top-level array. A full HAT can hold n^2 elements,
+ * where n (some power of 2) is the size of each leaf array.
+ * [i/n][i & (n - 1)] locates the `i th' element in a HAT.
+ *
+ */
+
+/*
+ * A half HAT is defined here as a HAT with a top-level array of size n^2/2
+ * and holds the first n^2/2 elements.
+ *
+ * 1. 2^8 elements can be stored in a full HAT of size 2^4.
+ * 2. 2^9 elements can be stored in a half HAT of size 2^5.
+ * 3. When the number of elements is some power of 2, it
+ * can be stored in a full or a half HAT.
+ * 4. When the number of elements is some power of 2, it
+ * can be stored in a HAT (full or half) with HATs as leaf elements
+ * (full or half), and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
+ * array dimension) with each leaf array being a HAT of size 2^2).
+ *
+ * IMPLEMENTATION DETAILS:
+ * 1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
+ * of Node_array_leaf.
+ *
+ * 2. A HAT of HATS of 2^12 elements needs
+ * 2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
+ * ~ 2^9 house-keeping NODEs.
+ *
+ * 3. When a leaf array (or leaf HAT) becomes empty, the memory
+ * is deallocated, and when there is no leaf array (or leaf HAT) left,
+ * the HAT is deleted.
+ *
+ * 4. A HAT stores the base (first) element, and locates the leaf array/HAT
+ * for the `i th' element using integer division
+ * (i - base)/n where n is the size of the top-level array.
+ *
+ */
+
+/* make_node --- initialize a NODE */
+
+static inline NODE *
+make_node(NODETYPE type)
+{
+ NODE *n;
+ getnode(n);
+ memset(n, '\0', sizeof(NODE));
+ n->type = type;
+ return n;
+}
+
+
+/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
+
+static NODE **
+tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
+{
+ NODE **lhs;
+ NODE *tn;
+ int i, n;
+ size_t size;
+ long num = k;
+
+ /*
+ * HAT size (size of Top & Leaf array) = 2^n
+ * where n = Floor ((m + 1)/2). For an odd value of m,
+ * only the first half of the HAT is needed.
+ */
+
+ n = (m + 1) / 2;
+
+ if (tree->table_size == 0) {
+ size_t actual_size;
+ NODE **table;
+
+ assert(tree->nodes == NULL);
+
+ /* initialize top-level array */
+ size = actual_size = power_two_table[n];
+ tree->array_base = base;
+ tree->array_size = size;
+ tree->table_size = 0; /* # of elements in the array */
+ if (n > m/2) {
+ /* only first half of the array used */
+ actual_size /= 2;
+ tree->flags |= HALFHAT;
+ }
+ emalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
+ memset(table, '\0', actual_size * sizeof(NODE *));
+ tree->nodes = table;
+ } else
+ size = tree->array_size;
+
+ num -= tree->array_base;
+ i = num / size; /* top-level array index */
+ assert(i >= 0);
+
+ if ((lhs = tree_find(tree, k, i)) != NULL)
+ return lhs;
+
+ /* It's not there, install it */
+
+ tree->table_size++;
+ base += (size * i);
+ tn = tree->nodes[i];
+ if (n > NHAT) {
+ if (tn == NULL)
+ tn = tree->nodes[i] = make_node(Node_array_tree);
+ return tree_lookup(symbol, tn, k, n, base);
+ } else {
+ if (tn == NULL)
+ tn = tree->nodes[i] = make_node(Node_array_leaf);
+ return leaf_lookup(symbol, tn, k, size, base);
+ }
+}
+
+
+/* tree_exists --- test whether integer subscript `k' exists or not */
+
+static NODE **
+tree_exists(NODE *tree, long k)
+{
+ int i;
+ NODE *tn;
+
+ i = (k - tree->array_base) / tree->array_size;
+ assert(i >= 0);
+ tn = tree->nodes[i];
+ if (tn == NULL)
+ return NULL;
+ if (tn->type == Node_array_tree)
+ return tree_exists(tn, k);
+ return leaf_exists(tn, k);
+}
+
+/* tree_clear --- flush all the values */
+
+static void
+tree_clear(NODE *tree)
+{
+ NODE *tn;
+ size_t j, hsize;
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+
+ for (j = 0; j < hsize; j++) {
+ tn = tree->nodes[j];
+ if (tn == NULL)
+ continue;
+ if (tn->type == Node_array_tree)
+ tree_clear(tn);
+ else
+ leaf_clear(tn);
+ freenode(tn);
+ }
+
+ efree(tree->nodes);
+ memset(tree, '\0', sizeof(NODE));
+ tree->type = Node_array_tree;
+}
+
+
+/* tree_remove --- If the integer subscript is in the HAT, remove it */
+
+static int
+tree_remove(NODE *symbol, NODE *tree, long k)
+{
+ int i;
+ NODE *tn;
+
+ i = (k - tree->array_base) / tree->array_size;
+ assert(i >= 0);
+ tn = tree->nodes[i];
+ if (tn == NULL)
+ return FALSE;
+
+ if (tn->type == Node_array_tree
+ && ! tree_remove(symbol, tn, k))
+ return FALSE;
+ else if (tn->type == Node_array_leaf
+ && ! leaf_remove(symbol, tn, k))
+ return FALSE;
+
+ if (tn->table_size == 0) {
+ freenode(tn);
+ tree->nodes[i] = NULL;
+ }
+
+ /* one less item in array */
+ if (--tree->table_size == 0) {
+ efree(tree->nodes);
+ memset(tree, '\0', sizeof(NODE));
+ tree->type = Node_array_tree;
+ }
+ return TRUE;
+}
+
+
+/* tree_find --- locate an interger subscript in the HAT */
+
+static inline NODE **
+tree_find(NODE *tree, long k, int i)
+{
+ NODE *tn;
+
+ assert(tree->nodes != NULL);
+ tn = tree->nodes[i];
+ if (tn != NULL) {
+ if (tn->type == Node_array_tree)
+ return tree_exists(tn, k);
+ return leaf_exists(tn, k);
+ }
+ return NULL;
+}
+
+
+/* tree_list --- return a list of items in the HAT */
+
+static long
+tree_list(NODE *tree, NODE **list, unsigned int flags)
+{
+ NODE *tn;
+ size_t j, cj, hsize;
+ long k = 0;
+
+ assert(list != NULL);
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+
+ for (j = 0; j < hsize; j++) {
+ cj = (flags & ADESC) ? (hsize - 1 - j) : j;
+ tn = tree->nodes[cj];
+ if (tn == NULL)
+ continue;
+ if (tn->type == Node_array_tree)
+ k += tree_list(tn, list + k, flags);
+ else
+ k += leaf_list(tn, list + k, flags);
+ if ((flags & ADELETE) != 0 && k >= 1)
+ return k;
+ }
+ return k;
+}
+
+
+/* tree_copy --- duplicate a HAT */
+
+static void
+tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
+{
+ NODE **old, **new;
+ size_t j, hsize;
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+
+ emalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
+ memset(new, '\0', hsize * sizeof(NODE *));
+ newtree->nodes = new;
+ newtree->array_base = tree->array_base;
+ newtree->array_size = tree->array_size;
+ newtree->table_size = tree->table_size;
+ newtree->flags = tree->flags;
+
+ old = tree->nodes;
+ for (j = 0; j < hsize; j++) {
+ if (old[j] == NULL)
+ continue;
+ if (old[j]->type == Node_array_tree) {
+ new[j] = make_node(Node_array_tree);
+ tree_copy(newsymb, old[j], new[j]);
+ } else {
+ new[j] = make_node(Node_array_leaf);
+ leaf_copy(newsymb, old[j], new[j]);
+ }
+ }
+}
+
+
+/* tree_info --- print index, value info */
+
+static void
+tree_info(NODE *tree, NODE *ndump, const char *aname)
+{
+ NODE *tn;
+ size_t j, hsize;
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+
+ for (j = 0; j < hsize; j++) {
+ tn = tree->nodes[j];
+ if (tn == NULL)
+ continue;
+ if (tn->type == Node_array_tree)
+ tree_info(tn, ndump, aname);
+ else
+ leaf_info(tn, ndump, aname);
+ }
+}
+
+
+/* tree_kilobytes --- calculate memory consumption of a HAT */
+
+static size_t
+tree_kilobytes(NODE *tree)
+{
+ NODE *tn;
+ size_t j, hsize;
+ size_t sz = 0;
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+ for (j = 0; j < hsize; j++) {
+ tn = tree->nodes[j];
+ if (tn == NULL)
+ continue;
+ sz += sizeof(NODE); /* Node_array_tree or Node_array_leaf */
+ if (tn->type == Node_array_tree)
+ sz += tree_kilobytes(tn);
+ }
+ sz += hsize * sizeof(NODE *); /* tree->nodes */
+ return sz;
+}
+
+#ifdef ARRAYDEBUG
+
+/* tree_print --- print the HAT structures */
+
+static void
+tree_print(NODE *tree, size_t bi, int indent_level)
+{
+ NODE *tn;
+ size_t j, hsize;
+
+ indent(indent_level);
+
+ hsize = tree->array_size;
+ if ((tree->flags & HALFHAT) != 0)
+ hsize /= 2;
+ fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n", bi,
+ (tree->flags & HALFHAT) ? "HH" : "H",
+ (unsigned long) hsize, (unsigned long) tree->table_size);
+
+ for (j = 0; j < hsize; j++) {
+ tn = tree->nodes[j];
+ if (tn == NULL)
+ continue;
+ if (tn->type == Node_array_tree)
+ tree_print(tn, j, indent_level + 1);
+ else
+ leaf_print(tn, j, indent_level + 1);
+ }
+}
+#endif
+
+/*--------------------- leaf (linear 1-D) array --------------------*/
+
+/* leaf_lookup --- find an integer subscript in the array; Install it if
+ it isn't there.
+*/
+
+static inline NODE **
+leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
+{
+ NODE **lhs;
+
+ if (array->nodes == NULL) {
+ array->table_size = 0; /* sanity */
+ array->array_size = size;
+ array->array_base = base;
+ emalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
+ memset(array->nodes, '\0', size * sizeof(NODE *));
+ symbol->array_capacity += size;
+ }
+
+ lhs = array->nodes + (k - base); /* leaf element */
+ if (*lhs == NULL) {
+ array->table_size++; /* one more element in leaf array */
+ *lhs = dupnode(Nnull_string);
+ }
+ return lhs;
+}
+
+
+/* leaf_exists --- check if the array contains an integer subscript */
+
+static inline NODE **
+leaf_exists(NODE *array, long k)
+{
+ NODE **lhs;
+ lhs = array->nodes + (k - array->array_base);
+ return (*lhs != NULL) ? lhs : NULL;
+}
+
+
+/* leaf_clear --- flush all values in the array */
+
+static void
+leaf_clear(NODE *array)
+{
+ long i, size = array->array_size;
+ NODE *r;
+
+ for (i = 0; i < size; i++) {
+ r = array->nodes[i];
+ if (r == NULL)
+ continue;
+ if (r->type == Node_var_array) {
+ assoc_clear(r); /* recursively clear all sub-arrays */
+ efree(r->vname);
+ freenode(r);
+ } else
+ unref(r);
+ }
+ efree(array->nodes);
+ array->nodes = NULL;
+ array->array_size = array->table_size = 0;
+}
+
+
+/* leaf_remove --- remove an integer subscript from the array */
+
+static int
+leaf_remove(NODE *symbol, NODE *array, long k)
+{
+ NODE **lhs;
+
+ lhs = array->nodes + (k - array->array_base);
+ if (*lhs == NULL)
+ return FALSE;
+ *lhs = NULL;
+ if (--array->table_size == 0) {
+ efree(array->nodes);
+ array->nodes = NULL;
+ symbol->array_capacity -= array->array_size;
+ array->array_size = 0; /* sanity */
+ }
+ return TRUE;
+}
+
+
+/* leaf_copy --- duplicate a leaf array */
+
+static void
+leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
+{
+ NODE **old, **new;
+ long size, i;
+
+ size = array->array_size;
+ emalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
+ memset(new, '\0', size * sizeof(NODE *));
+ newarray->nodes = new;
+ newarray->array_size = size;
+ newarray->array_base = array->array_base;
+ newarray->flags = array->flags;
+ newarray->table_size = array->table_size;
+
+ old = array->nodes;
+ for (i = 0; i < size; i++) {
+ if (old[i] == NULL)
+ continue;
+ if (old[i]->type == Node_val)
+ new[i] = dupnode(old[i]);
+ else {
+ NODE *r;
+ r = make_array();
+ r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
+ r->parent_array = newsymb;
+ new[i] = assoc_copy(old[i], r);
+ }
+ }
+}
+
+
+/* leaf_list --- return a list of items */
+
+static long
+leaf_list(NODE *array, NODE **list, unsigned int flags)
+{
+ NODE *r, *subs;
+ long num, i, ci, k = 0;
+ long size = array->array_size;
+ static char buf[100];
+
+ for (i = 0; i < size; i++) {
+ ci = (flags & ADESC) ? (size - 1 - i) : i;
+ r = array->nodes[ci];
+ if (r == NULL)
+ continue;
+
+ /* index */
+ num = array->array_base + ci;
+ if (flags & AISTR) {
+ sprintf(buf, "%ld", num);
+ subs = make_string(buf, strlen(buf));
+ subs->numbr = num;
+ subs->flags |= (NUMCUR|NUMINT);
+ } else {
+ subs = make_number((AWKNUM) num);
+ subs->flags |= (INTIND|NUMINT);
+ }
+ list[k++] = subs;
+
+ /* value */
+ if (flags & AVALUE) {
+ if (r->type == Node_val) {
+ if ((flags & AVNUM) != 0)
+ (void) force_number(r);
+ else if ((flags & AVSTR) != 0)
+ r = force_string(r);
+ }
+ list[k++] = r;
+ }
+ if ((flags & ADELETE) != 0 && k >= 1)
+ return k;
+ }
+
+ return k;
+}
+
+
+/* leaf_info --- print index, value info */
+
+static void
+leaf_info(NODE *array, NODE *ndump, const char *aname)
+{
+ NODE *subs, *val;
+ size_t i, size;
+
+ size = array->array_size;
+
+ subs = make_number((AWKNUM) 0.0);
+ subs->flags |= (INTIND|NUMINT);
+ for (i = 0; i < size; i++) {
+ val = array->nodes[i];
+ if (val == NULL)
+ continue;
+ subs->numbr = array->array_base + i;
+ assoc_info(subs, val, ndump, aname);
+ }
+ unref(subs);
+}
+
+#ifdef ARRAYDEBUG
+
+/* leaf_print --- print the leaf-array structure */
+
+
+static void
+leaf_print(NODE *array, size_t bi, int indent_level)
+{
+ indent(indent_level);
+ fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n", bi,
+ (unsigned long) array->array_size,
+ (unsigned long) array->table_size);
+}
+#endif