summaryrefslogtreecommitdiffstats
path: root/mkid.c
diff options
context:
space:
mode:
Diffstat (limited to 'mkid.c')
-rw-r--r--mkid.c999
1 files changed, 0 insertions, 999 deletions
diff --git a/mkid.c b/mkid.c
deleted file mode 100644
index daee166..0000000
--- a/mkid.c
+++ /dev/null
@@ -1,999 +0,0 @@
-/* mkid.c -- build an identifer database
- Copyright (C) 1986, 1995 Greg McGary
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <limits.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-
-#include <config.h>
-#include "strxtra.h"
-#include "alloc.h"
-#include "idfile.h"
-#include "token.h"
-#include "bitops.h"
-#include "misc.h"
-#include "filenames.h"
-#include "hash.h"
-#include "scanners.h"
-
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#endif
-
-struct summary
-{
- struct token **sum_tokens;
- unsigned char const *sum_hits;
- struct summary *sum_parent;
- union {
- struct summary *u_kids[8]; /* when sum_level > 0 */
- struct idarg *u_files[8]; /* when sum_level == 0 */
- } sum_u;
-#define sum_kids sum_u.u_kids
-#define sum_files sum_u.u_files
- unsigned long sum_tokens_size;
- unsigned long sum_hits_count;
- int sum_free_index;
- int sum_level;
-};
-
-#define MAX_LEVELS 5 /* log_8 of the max # of files: log_8(32768) == 5 */
-
-struct token
-{
- unsigned short tok_count;
- unsigned char tok_flags;
- unsigned char tok_hits[MAX_LEVELS];
- char tok_name[1];
-};
-
-char *bitsset __P((char *s1, char const *s2, int n));
-char *bitsclr __P((char *s1, char const *s2, int n));
-char *bitsand __P((char *s1, char const *s2, int n));
-char *bitsxor __P((char *s1, char const *s2, int n));
-int bitstst __P((char const *s1, char const *s2, int n));
-int bitsany __P((char const *s, int n));
-struct token *make_token __P((char const *name, int));
-void scan_1_file __P((char const *(*get_token) (FILE*, int*), FILE *source_FILE));
-struct idarg *parse_idargs __P((int argc, char **argv));
-struct idarg *parse_idargs_from_FILE __P((FILE *arg_FILE, struct idarg *idarg));
-void scan_files __P((struct idarg *idarg));
-void report_statistics __P((void));
-
-unsigned long token_hash_1 __P((void const *key));
-unsigned long token_hash_2 __P((void const *key));
-int token_hash_cmp __P((void const *x, void const *y));
-
-void write_idfile __P((char const *id_file, struct idarg *idargs));
-void bump_current_hits_signature __P((void));
-void init_hits_signature __P((int i));
-int bit_to_index __P((int bit));
-int token_qsort_cmp __P((void const *x, void const *y));
-void free_summary_tokens __P((void));
-void summarize __P((void));
-void assert_hits __P((struct summary *summary));
-void write_hits __P((FILE *fp, struct summary *summary, unsigned char const *tail_hits));
-void sign_token __P((struct token *token));
-void add_token_to_summary __P((struct summary *summary, struct token *token));
-void init_summary __P((void));
-struct summary *make_sibling_summary __P((struct summary *summary));
-int count_vec_size __P((struct summary *summary, unsigned char const *tail_hits));
-int count_buf_size __P((struct summary *summary, unsigned char const *tail_hits));
-void usage __P((void));
-
-struct hash_table token_table;
-struct hash_table file_table;
-
-/* Miscellaneous statistics */
-long input_chars;
-long name_tokens;
-long number_tokens;
-long string_tokens;
-long literal_tokens;
-long comment_tokens;
-long occurrences;
-long heap_size;
-long hits_length = 0;
-long tokens_length = 0;
-long output_length = 0;
-
-int verbose_flag = 0;
-int statistics_flag = 1;
-
-int args_count = 0; /* # of args to save */
-int scan_count = 0; /* # of files to scan */
-int file_name_count = 0; /* # of files in database */
-int levels = 0; /* ceil(log(8)) of file_name_count */
-
-unsigned char current_hits_signature[MAX_LEVELS];
-#define INIT_TOKENS_SIZE(level) (1 << ((level) + 13))
-struct summary *summary_root;
-struct summary *summary_leaf;
-
-char PWD_buf[BUFSIZ]; /* The current working directory */
-char absolute_idfile_name[BUFSIZ]; /* The absolute name of the database */
-char const *id_file_name = IDFILE;
-
-char const *program_name;
-
-void
-usage (void)
-{
- fprintf (stderr, "\
-Usage: %s [-v] [-f<idfile>] [(+|-)l[<lang>]] [(+|-)S<scanarg>] [-a<argfile>] [-] [files...]\n\
- -v Verbose: print reports of progress\n\
- -a<file> Open file for arguments\n\
- - Read newline-separated args from stdin\n\
- -l<lang> Force files to be scanned as <lang> until +l<lang>\n\
- -S<lang>-<arg> Pass arg to <lang> scanner\n\
- -S.<suffix>=<lang> Scan files with .<suffix> as <lang>\n\
- -S<lang>? Print usage documentation for <lang>\n\
-\n\
-Version %s",
- program_name, VERSION);
-#ifdef __DATE__
- fprintf (stderr, "; Made %s %s", __DATE__, __TIME__);
-#endif
- fputc ('\n', stderr);
- exit (1);
-}
-
-void *sbrk ();
-
-int
-main (int argc, char **argv)
-{
- struct idarg *idarg_0;
- char const *sbrk0;
-
- program_name = basename ((argc--, *argv++));
- init_scanners ();
-
- idarg_0 = parse_idargs (argc, argv);
- if (idarg_0 == NULL)
- {
- fprintf (stderr, "Nothing to do...\n");
- return 0;
- }
-
- sbrk0 = (char const *) sbrk (0);
- hash_init (&token_table, scan_count * 64, token_hash_1, token_hash_2, token_hash_cmp);
-
- get_PWD (PWD_buf);
- strcpy (absolute_idfile_name, span_file_name (PWD_buf, id_file_name));
- if (access (id_file_name, 06) < 0
- && (errno != ENOENT || access (dirname (id_file_name), 06) < 0))
- {
- filerr ("modify", id_file_name);
- return 1;
- }
-
- init_hits_signature (0);
- init_summary ();
-
- scan_files (idarg_0);
-
- if (token_table.ht_fill == 0)
- return 0;
-
- free_summary_tokens ();
- free (token_table.ht_vec);
-
- write_idfile (id_file_name, idarg_0);
- heap_size = (char const *) sbrk (0) - sbrk0;
-
- if (statistics_flag)
- report_statistics ();
- return 0;
-}
-
-void
-scan_files (struct idarg *idarg)
-{
- int keep_lang = 0;
-
- for ( ; idarg->ida_next; idarg = idarg->ida_next)
- {
- char const *(*scanner) __P((FILE*, int*));
- FILE *source_FILE;
- char *arg = idarg->ida_arg;
- char const *lang_name = NULL;
- char const *suff;
- char const *filter;
-
- if (idarg->ida_index < 0)
- {
- int op = *arg++;
- switch (*arg++)
- {
- case 'l':
- if (*arg == '\0')
- {
- keep_lang = 0;
- lang_name = NULL;
- break;
- }
- if (op == '+')
- keep_lang = 1;
- lang_name = arg;
- break;
- case 'S':
- set_scan_args (op, strdup (arg));
- break;
- default:
- usage ();
- }
- continue;
- }
- if (!(idarg->ida_flags & IDA_SCAN_ME))
- goto skip;
-
- suff = strrchr (arg, '.');
- if (lang_name == NULL)
- {
- if (suff == NULL)
- suff = "";
- lang_name = get_lang_name (suff);
- if (lang_name == NULL)
- lang_name = get_lang_name ("");
- if (lang_name == NULL)
- {
- fprintf (stderr, "%s: No language assigned to suffix: `%s'\n", program_name, suff);
- goto skip;
- }
- }
- scanner = get_scanner (lang_name);
- if (scanner == NULL)
- {
- fprintf (stderr, "%s: No scanner for language: `%s'\n", program_name, lang_name);
- goto skip;
- }
- filter = get_filter (suff);
- source_FILE = open_source_FILE (arg, filter);
- if (source_FILE == NULL)
- goto skip;
- if (verbose_flag)
- {
- printf ("%s: ", lang_name);
- printf (filter ? filter : "%s", arg);
- fflush (stdout);
- }
- scan_1_file (scanner, source_FILE);
- if (verbose_flag)
- putchar ('\n');
- close_source_FILE (source_FILE, filter);
- skip:
- if (!keep_lang)
- lang_name = NULL;
- if (idarg->ida_index < file_name_count)
- {
- if (current_hits_signature[0] & 0x80)
- summarize ();
- bump_current_hits_signature ();
- }
- }
-}
-
-void
-report_statistics (void)
-{
- printf ("Name=%ld, ", name_tokens);
- printf ("Number=%ld, ", number_tokens);
- printf ("String=%ld, ", string_tokens);
- printf ("Literal=%ld, ", literal_tokens);
- printf ("Comment=%ld\n", comment_tokens);
-
- printf ("Files=%d, ", scan_count);
- printf ("Tokens=%ld, ", occurrences);
- printf ("Bytes=%ld Kb, ", input_chars / 1024);
- printf ("Heap=%ld Kb, ", heap_size / 1024);
- printf ("Output=%ld (%ld tok, %ld hit)\n", output_length, tokens_length, hits_length);
-
- printf ("Load=%ld/%ld=%.2f, ", token_table.ht_fill, token_table.ht_size,
- (double) token_table.ht_fill / (double) token_table.ht_size);
- printf ("Rehash=%d, ", token_table.ht_rehashes);
- printf ("Probes=%ld/%ld=%.2f, ", token_table.ht_probes, token_table.ht_lookups,
- (double) token_table.ht_probes / (double) token_table.ht_lookups);
- printf ("Freq=%ld/%ld=%.2f\n", occurrences, token_table.ht_fill,
- (double) occurrences / (double) token_table.ht_fill);
-}
-
-struct idarg *
-parse_idargs (int argc, char **argv)
-{
- struct idarg *idarg;
- struct idarg *idarg_0;
- char *arg;
- int op;
- FILE *arg_FILE = NULL;
- int args_from = 0;
- enum {
- AF_CMDLINE = 0x1, /* file args came on command line */
- AF_FILE = 0x2, /* file args came from a file (-f<file>) */
- AF_USAGE = 0x8
- }; /* no file args necessary: usage query */
-
- idarg = idarg_0 = CALLOC (struct idarg, 1);
-
- /* Process some arguments, and snarf-up some others for processing
- later. */
- while (argc)
- {
- arg = (argc--, *argv++);
- if (*arg != '-' && *arg != '+')
- {
- /* arguments are from command line (not pipe) */
- args_from |= AF_CMDLINE;
- idarg->ida_arg = arg;
- idarg->ida_flags = IDA_SCAN_ME;
- idarg->ida_index = file_name_count++;
- scan_count++;
- idarg = (idarg->ida_next = CALLOC (struct idarg, 1));
-
- continue;
- }
- op = *arg++;
- switch (*arg++)
- {
- case '\0':
- args_from |= AF_FILE;
- idarg = parse_idargs_from_FILE (stdin, idarg);
- break;
- case 'a':
- arg_FILE = fopen (arg, "r");
- if (arg_FILE == NULL)
- filerr ("open", arg);
- else
- {
- args_from |= AF_FILE;
- idarg = parse_idargs_from_FILE (arg_FILE, idarg);
- }
- break;
- case 'f':
- id_file_name = arg;
- break;
- case 'v':
- verbose_flag = 1;
- break;
- case 'S':
- if (strchr (&arg[-2], '?'))
- {
- set_scan_args (op, arg);
- args_from |= AF_USAGE;
- }
- /*FALLTHROUGH */
- case 'l':
- idarg->ida_arg = &arg[-2];
- idarg->ida_index = -1;
- idarg = (idarg->ida_next = CALLOC (struct idarg, 1));
-
- args_count++;
- break;
- default:
- usage ();
- }
- }
-
- if (args_from & AF_USAGE)
- exit (0);
- /* File args should only come from one place. Ding the user if
- arguments came from multiple places, or if none were supplied at
- all. */
- switch (args_from)
- {
- case AF_CMDLINE:
- case AF_FILE:
- if (file_name_count > 0)
- break;
- /*FALLTHROUGH */
- case 0:
- fprintf (stderr, "%s: Use -u, -f<file>, or cmd-line for file args!\n", program_name);
- usage ();
- default:
- fprintf (stderr, "%s: Use only one of: -u, -f<file>, or cmd-line for file args!\n", program_name);
- usage ();
- }
-
- if (scan_count == 0)
- return NULL;
-
- return idarg_0;
-}
-
-
-/* Cons up a list of idarg as supplied in a file. */
-struct idarg *
-parse_idargs_from_FILE (FILE *arg_FILE, struct idarg *idarg)
-{
- int file_count;
- char buf[BUFSIZ];
- char *arg;
-
- file_count = 0;
- while (fgets (buf, sizeof (buf), arg_FILE))
- {
- idarg->ida_arg = arg = strndup (buf, strlen (buf) - 1);
- if (*arg == '+' || *arg == '-')
- idarg->ida_index = -1;
- else
- {
- idarg->ida_flags = IDA_SCAN_ME;
- idarg->ida_index = file_name_count++;
- scan_count++;
- }
- idarg = idarg->ida_next = CALLOC (struct idarg, 1);
- }
- return idarg;
-}
-
-void
-scan_1_file (get_token_t get_token, FILE *source_FILE)
-{
- struct stat stat_buf;
- struct token **slot;
- char const *key;
- int bytes = 0;
- int total_tokens = 0;
- int new_tokens = 0;
- int distinct_tokens = 0;
- int flags;
- struct token *token;
-
- if (fstat (fileno (source_FILE), &stat_buf) == 0)
- {
- bytes = stat_buf.st_size;
- input_chars += bytes;
- }
-
- while ((key = (*get_token) (source_FILE, &flags)) != NULL)
- {
- if (*key == '\0')
- continue;
- total_tokens++;
- slot = (struct token **) hash_lookup (&token_table, key - offsetof (struct token, tok_name));
- token = *slot;
- if (token)
- {
- token->tok_flags |= flags;
- if (token->tok_count < USHRT_MAX)
- token->tok_count++;
- if (!(token->tok_hits[0] & current_hits_signature[0]))
- {
- sign_token (token);
- distinct_tokens++;
- }
- } else {
- *slot = token = make_token (key, flags);
- sign_token (token);
- distinct_tokens++;
- new_tokens++;
- if (token_table.ht_fill++ >= token_table.ht_capacity)
- rehash (&token_table);
- }
- }
- if (verbose_flag)
- {
- printf (" uniq=%d/%d", distinct_tokens, total_tokens);
- if (total_tokens != 0)
- printf ("=%.2f", (double) distinct_tokens / (double) total_tokens);
- printf (", new=%d/%d", new_tokens, distinct_tokens);
- if (distinct_tokens != 0)
- printf ("=%.2f", (double) new_tokens / (double) distinct_tokens);
- }
-}
-
-/* As the database is written, may need to adjust the file names. If
- we are generating the ID file in a remote directory, then adjust
- the file names to be relative to the location of the ID database.
-
- (This would be a common useage if you want to make a database for a
- directory which you have no write access to, so you cannot create
- the ID file.) */
-void
-write_idfile (char const *file_name, struct idarg *idarg)
-{
- struct token **tokens;
- int i;
- FILE *id_FILE;
- struct idhead idh;
- int fixup_names;
- char *lsl;
- int buf_size;
- int vec_size;
- int tok_size;
- int max_buf_size = 0;
- int max_vec_size = 0;
-
- if (verbose_flag)
- printf ("Sorting tokens...\n");
- assert (summary_root->sum_hits_count == token_table.ht_fill);
- tokens = REALLOC (summary_root->sum_tokens, struct token *, token_table.ht_fill);
- qsort (tokens, token_table.ht_fill, sizeof (struct token *), token_qsort_cmp);
-
- if (verbose_flag)
- printf ("Writing `%s'...\n", file_name);
- lsl = strrchr (relative_file_name (PWD_buf, absolute_idfile_name), '/');
- if (lsl == NULL)
- {
- /* The database is in the cwd, don't adjust the names */
- fixup_names = 0;
- }
- else
- {
- /* The database is not in cwd, adjust names so they are relative
- to the location of the database, make absolute_idfile_name just be the
- directory path to ID. */
- fixup_names = 1;
- *(lsl + 1) = '\0';
- }
- id_FILE = fopen (file_name, "w+b");
- if (id_FILE == NULL)
- {
- filerr ("create", file_name);
- exit (1);
- }
- idh.idh_magic[0] = IDH_MAGIC_0;
- idh.idh_magic[1] = IDH_MAGIC_1;
- idh.idh_version = IDH_VERSION;
- idh.idh_flags = IDH_COUNTS;
-
- /* write out the list of pathnames */
- fseek (id_FILE, sizeof_idhead (), 0);
- idh.idh_args_offset = ftell (id_FILE);
- for ( ; idarg->ida_next; idarg = idarg->ida_next)
- {
- if (*idarg->ida_arg != '-' && fixup_names)
- fputs (relative_file_name (absolute_idfile_name, span_file_name (PWD_buf, idarg->ida_arg)), id_FILE);
- else
- fputs (idarg->ida_arg, id_FILE);
- putc ('\0', id_FILE);
- }
- idh.idh_files = file_name_count;
-
- /* write out the list of identifiers */
-
- putc ('\0', id_FILE);
- putc ('\0', id_FILE);
- idh.idh_tokens_offset = ftell (id_FILE);
-
- for (i = 0; i < token_table.ht_fill; i++, tokens++)
- {
- struct token *token = *tokens;
- occurrences += token->tok_count;
- if (token->tok_flags & TOK_NUMBER)
- number_tokens++;
- if (token->tok_flags & TOK_NAME)
- name_tokens++;
- if (token->tok_flags & TOK_STRING)
- string_tokens++;
- if (token->tok_flags & TOK_LITERAL)
- literal_tokens++;
- if (token->tok_flags & TOK_COMMENT)
- comment_tokens++;
-
- fputs (token->tok_name, id_FILE);
- putc ('\0', id_FILE);
- if (token->tok_count > 0xff)
- token->tok_flags |= TOK_SHORT_COUNT;
- putc (token->tok_flags, id_FILE);
- putc (token->tok_count & 0xff, id_FILE);
- if (token->tok_flags & TOK_SHORT_COUNT)
- putc (token->tok_count >> 8, id_FILE);
-
- vec_size = count_vec_size (summary_root, token->tok_hits + levels);
- buf_size = count_buf_size (summary_root, token->tok_hits + levels);
- hits_length += buf_size;
- tok_size = strlen (token->tok_name) + 1;
- tokens_length += tok_size;
- buf_size += tok_size + sizeof (token->tok_flags) + sizeof (token->tok_count) + 2;
- if (buf_size > max_buf_size)
- max_buf_size = buf_size;
- if (vec_size > max_vec_size)
- max_vec_size = vec_size;
-
- write_hits (id_FILE, summary_root, token->tok_hits + levels);
- putc ('\0', id_FILE);
- putc ('\0', id_FILE);
- }
- assert_hits (summary_root);
- idh.idh_tokens = token_table.ht_fill;
- output_length = ftell (id_FILE);
- idh.idh_end_offset = output_length - 2;
- idh.idh_buf_size = max_buf_size;
- idh.idh_vec_size = max_vec_size;
-
- write_idhead (id_FILE, &idh);
- fclose (id_FILE);
-}
-
-unsigned long
-token_hash_1 (void const *key)
-{
- return_STRING_HASH_1 (((struct token const *) key)->tok_name);
-}
-
-unsigned long
-token_hash_2 (void const *key)
-{
- return_STRING_HASH_2 (((struct token const *) key)->tok_name);
-}
-
-int
-token_hash_cmp (void const *x, void const *y)
-{
- return_STRING_COMPARE (((struct token const *) x)->tok_name,
- ((struct token const *) y)->tok_name);
-}
-
-int
-token_qsort_cmp (void const *x, void const *y)
-{
- return_STRING_COMPARE ((*(struct token const *const *) x)->tok_name,
- (*(struct token const *const *) y)->tok_name);
-}
-
-struct token *
-make_token (char const *name, int flags)
-{
- struct token *token = (struct token *) malloc (sizeof (struct token) + strlen (name));
-
- if (!token)
- {
- fprintf (stderr, "malloc failure! \n");
- exit (1);
- }
- token->tok_count = 1;
- token->tok_flags = flags;
- memset (token->tok_hits, 0, sizeof (token->tok_hits));
- strcpy (token->tok_name, name);
-
- return token;
-}
-
-/* ///////////// summary stuff //////////////////////////////////////////// */
-
-void
-bump_current_hits_signature (void)
-{
- unsigned char *hits = current_hits_signature;
- while (*hits & 0x80)
- *hits++ = 1;
- *hits <<= 1;
-}
-
-void
-init_hits_signature (int i)
-{
- unsigned char *hits = current_hits_signature;
- unsigned char const *end = &current_hits_signature[MAX_LEVELS];
- while (hits < end)
- {
- *hits = 1 << (i & 7);
- i >>= 3;
- hits++;
- }
-}
-
-int
-bit_to_index (int bit)
-{
- int i = 0;
- while (bit >>= 1)
- i++;
- return i;
-}
-
-void
-free_summary_tokens (void)
-{
- struct summary *summary = summary_leaf;
- while (summary != summary_root)
- {
- free (summary->sum_tokens);
- summary = summary->sum_parent;
- }
-}
-
-void
-summarize (void)
-{
- unsigned char const *hits_sig = current_hits_signature;
- struct summary *summary = summary_leaf;
-
- do
- {
- unsigned long count = summary->sum_hits_count;
- unsigned char *hits = MALLOC (unsigned char, count + 1);
- unsigned int level = summary->sum_level;
- struct token **tokens = summary->sum_tokens;
- unsigned long init_size = INIT_TOKENS_SIZE (summary->sum_level);
-
- if (verbose_flag)
- {
- char const *fmt;
- if (count < init_size / 2)
- fmt = "level %d: %ld < %ld/2\n";
- else if (count > init_size * 2)
- fmt = "level %d: %ld > %ld*2\n";
- else if (count < init_size)
- fmt = "level %d: %ld < %ld\n";
- else if (count > init_size)
- fmt = "level %d: %ld > %ld\n";
- else
- fmt = "level %d: %ld == %ld\n";
- printf (fmt, summary->sum_level, count, init_size);
- }
-
- qsort (tokens, count, sizeof (struct token *), token_qsort_cmp);
- summary->sum_hits = hits;
- while (count--)
- {
- unsigned char *hit = &(*tokens++)->tok_hits[level];
- *hits++ = *hit;
- *hit = 0;
- }
- *hits++ = 0;
- if (summary->sum_parent)
- {
- free (summary->sum_tokens);
- summary->sum_tokens = 0;
- }
- summary = summary->sum_parent;
- }
- while (*++hits_sig & 0x80);
- summary_leaf = make_sibling_summary (summary_leaf);
-}
-
-void
-init_summary (void)
-{
- unsigned long size = INIT_TOKENS_SIZE (0);
- summary_root = summary_leaf = CALLOC (struct summary, 1);
- summary_root->sum_tokens_size = size;
- summary_root->sum_tokens = MALLOC (struct token *, size);
-}
-
-struct summary *
-make_sibling_summary (struct summary *summary)
-{
- struct summary *parent = summary->sum_parent;
- unsigned long size;
-
- if (parent == NULL)
- {
- levels++;
- summary_root = summary->sum_parent = parent = CALLOC (struct summary, 1);
- parent->sum_level = levels;
- parent->sum_kids[0] = summary;
- parent->sum_hits_count = summary->sum_hits_count;
- parent->sum_free_index = 1;
- size = INIT_TOKENS_SIZE (levels);
- if (summary->sum_tokens_size >= size)
- {
- parent->sum_tokens_size = summary->sum_tokens_size;
- parent->sum_tokens = summary->sum_tokens;
- }
- else
- {
- parent->sum_tokens_size = size;
- parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
- }
- summary->sum_tokens = 0;
- }
- if (parent->sum_free_index == 8)
- parent = make_sibling_summary (parent);
- summary = CALLOC (struct summary, 1);
- summary->sum_level = parent->sum_level - 1;
- parent->sum_kids[parent->sum_free_index++] = summary;
- summary->sum_parent = parent;
- size = INIT_TOKENS_SIZE (summary->sum_level);
- summary->sum_tokens_size = size;
- summary->sum_tokens = MALLOC (struct token *, size);
- return summary;
-}
-
-int
-count_vec_size (struct summary *summary, unsigned char const *tail_hits)
-{
- struct summary **kids;
- unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits);
-
- kids = summary->sum_kids;
- if (*kids == NULL)
- {
- static char bits_per_nybble[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 };
- return bits_per_nybble[hits & 0xf] + bits_per_nybble[hits >> 4];
- }
- else
- {
- int bit;
- int count = 0;
- --tail_hits;
- for (bit = 1; bit & 0xff; bit <<= 1, ++kids)
- if (bit & hits)
- count += count_vec_size (*kids, tail_hits);
- return count;
- }
-}
-
-int
-count_buf_size (struct summary *summary, unsigned char const *tail_hits)
-{
- struct summary **kids;
- unsigned int hits = (summary->sum_hits ? *summary->sum_hits : *tail_hits);
-
- kids = summary->sum_kids;
- if (*kids == NULL)
- return 1;
- else
- {
- int bit;
- int count = 1;
- --tail_hits;
- for (bit = 1; bit & 0xff; bit <<= 1, ++kids)
- if (bit & hits)
- count += count_buf_size (*kids, tail_hits);
- return count;
- }
-}
-
-void
-assert_hits (struct summary* summary)
-{
- struct summary **kids = summary->sum_kids;
- struct summary **end = &kids[8];
-
- assert (summary->sum_hits == NULL || *summary->sum_hits == 0);
-
- if (end[-1] == 0)
- while (*--end == 0)
- ;
- while (kids < end)
- assert_hits (*kids++);
-}
-
-void
-write_hits (FILE *fp, struct summary *summary, unsigned char const *tail_hits)
-{
- struct summary **kids;
- unsigned int hits = (summary->sum_hits ? *summary->sum_hits++ : *tail_hits);
-
- assert (hits);
- putc (hits, fp);
-
- kids = summary->sum_kids;
- if (*kids)
- {
- int bit;
- --tail_hits;
- for (bit = 1; (bit & 0xff) && *kids; bit <<= 1, ++kids)
- if (bit & hits)
- write_hits (fp, *kids, tail_hits);
- }
-}
-
-void
-sign_token (struct token *token)
-{
- unsigned char *tok_hits = token->tok_hits;
- unsigned char *hits_sig = current_hits_signature;
- unsigned char *end = &current_hits_signature[MAX_LEVELS];
- struct summary *summary = summary_leaf;
-
- while (summary)
- {
- if (*tok_hits == 0)
- add_token_to_summary (summary, token);
- if (*tok_hits & *hits_sig)
- break;
- *tok_hits |= *hits_sig;
- summary = summary->sum_parent;
- tok_hits++;
- hits_sig++;
- }
- while (hits_sig < end)
- {
- if (*tok_hits & *hits_sig)
- break;
- *tok_hits |= *hits_sig;
- tok_hits++;
- hits_sig++;
- }
-}
-
-void
-add_token_to_summary (struct summary *summary, struct token *token)
-{
- unsigned long size = summary->sum_tokens_size;
-
- if (summary->sum_hits_count >= size)
- {
- size *= 2;
- summary->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
- summary->sum_tokens_size = size;
- }
- summary->sum_tokens[summary->sum_hits_count++] = token;
-}
-
-int
-bitsany (char const *s, int n)
-{
- while (n--)
- if (*s++)
- return 1;
-
- return 0;
-}
-
-char *
-bitsset (char *s1, char const *s2, int n)
-{
- while (n--)
- *s1++ |= *s2++;
-
- return s1;
-}
-
-char *
-bitsclr (char *s1, char const *s2, int n)
-{
- while (n--)
- *s1++ &= ~*s2++;
-
- return s1;
-}
-
-#if 0
-
-char *
-bitsand (char *s1, char const *s2, int n)
-{
- while (n--)
- *s1++ &= *s2++;
-
- return s1;
-}
-
-char *
-bitsxor (char *s1, char const *s2, int n)
-{
- while (n--)
- *s1++ ^= *s2++;
-
- return s1;
-}
-
-int
-bitstst (char const *s1, char const *s2, int n)
-{
- while (n--)
- if (*s1++ & *s2++)
- return 1;
-
- return 0;
-}
-
-#endif