summaryrefslogtreecommitdiffstats
path: root/mkid.c
diff options
context:
space:
mode:
authorGreg McGary <greg@mcgary.org>1997-04-18 06:37:07 +0000
committerGreg McGary <greg@mcgary.org>1997-04-18 06:37:07 +0000
commitc0e228864da1b2fbfc85637b5524c4c90ab62fce (patch)
treec30081f0af75ba51194bd672180690d6524b0448 /mkid.c
parentad89a5d46547cb45b918926269ca2883eccf5e23 (diff)
downloadidutils-c0e228864da1b2fbfc85637b5524c4c90ab62fce.tar.gz
idutils-c0e228864da1b2fbfc85637b5524c4c90ab62fce.tar.bz2
idutils-c0e228864da1b2fbfc85637b5524c4c90ab62fce.zip
imported from mkid-3.0.7r3_0_7
Diffstat (limited to 'mkid.c')
-rw-r--r--mkid.c52
1 files changed, 31 insertions, 21 deletions
diff --git a/mkid.c b/mkid.c
index 836a2fb..d9d6078 100644
--- a/mkid.c
+++ b/mkid.c
@@ -286,6 +286,7 @@ scan_files (struct idarg *idarg)
{
printf ("%s: ", lang_name);
printf (filter ? filter : "%s", arg);
+ fflush (stdout);
}
scan_1_file (scanner, source_FILE);
if (verbose_flag)
@@ -476,6 +477,7 @@ scan_1_file (char const *(*get_token) (FILE*, int*), FILE *source_FILE)
int new_tokens = 0;
int distinct_tokens = 0;
int flags;
+ struct token *token;
if (fstat (fileno (source_FILE), &stat_buf) == 0)
{
@@ -485,8 +487,9 @@ scan_1_file (char const *(*get_token) (FILE*, int*), FILE *source_FILE)
while ((key = (*get_token) (source_FILE, &flags)) != NULL)
{
- struct token *token = *(slot = hash_lookup (key));
-
+ if (*key == '\0')
+ continue;
+ token = *(slot = hash_lookup (key));
total_tokens++;
if (token)
{
@@ -508,11 +511,14 @@ scan_1_file (char const *(*get_token) (FILE*, int*), FILE *source_FILE)
}
}
if (verbose_flag)
- printf (" uniq=%d/%d=%.2f, new=%d/%d=%.2f",
- distinct_tokens, total_tokens,
- (double) distinct_tokens / (double) total_tokens,
- new_tokens, distinct_tokens,
- (double) new_tokens / (double) distinct_tokens);
+ {
+ printf (" uniq=%d/%d", distinct_tokens, total_tokens);
+ if (total_tokens != 0)
+ printf ("=%.2f", (double) distinct_tokens / (double) total_tokens);
+ printf (", new=%d/%d", new_tokens, distinct_tokens);
+ if (distinct_tokens != 0)
+ printf ("=%.2f", (double) new_tokens / (double) distinct_tokens);
+ }
}
/* As the database is written, may need to adjust the file names. If
@@ -539,6 +545,12 @@ write_idfile (char const *file_name, struct idarg *idarg)
int max_vec_size = 0;
if (verbose_flag)
+ printf ("Sorting tokens...\n");
+ assert (summary_root->sum_hits_count == hash_fill);
+ tokens = REALLOC (summary_root->sum_tokens, struct token *, hash_fill);
+ qsort (tokens, hash_fill, sizeof (struct token *), compare_tokens);
+
+ if (verbose_flag)
printf ("Writing `%s'...\n", file_name);
lsl = strrchr (relative_file_name (PWD_name, absolute_idfile_name), '/');
if (lsl == NULL)
@@ -593,19 +605,9 @@ write_idfile (char const *file_name, struct idarg *idarg)
putc ('\0', id_FILE);
idh.idh_tokens_offset = ftell (id_FILE);
- assert (summary_root->sum_hits_count == hash_fill);
- tokens = REALLOC (summary_root->sum_tokens, struct token *, hash_fill);
- qsort (tokens, hash_fill, sizeof (struct token *), compare_tokens);
for (i = 0; i < hash_fill; i++, tokens++)
{
struct token *token = *tokens;
- if (*token->tok_name == '\0')
- {
- fprintf (stderr, "Blank token!\n");
- hash_fill--;
- i--;
- continue;
- }
occurrences += token->tok_count;
if (token->tok_flags & TOK_NUMBER)
number_tokens++;
@@ -732,7 +734,7 @@ rehash (void)
hash_size *= 2;
if (verbose_flag)
- printf ("Rehashing... (doubling size to %ld)\n", hash_size);
+ printf ("\n\tRehashing... (doubling size to %ld)\n", hash_size);
hash_rehashes++;
hash_capacity = hash_size - (hash_size >> 4);
hash_table = CALLOC (struct token *, hash_size);
@@ -905,14 +907,22 @@ make_sibling_summary (struct summary *summary)
if (parent == NULL)
{
levels++;
- size = INIT_TOKENS_SIZE (levels);
summary_root = summary->sum_parent = parent = CALLOC (struct summary, 1);
parent->sum_level = levels;
parent->sum_kids[0] = summary;
parent->sum_hits_count = summary->sum_hits_count;
parent->sum_free_index = 1;
- parent->sum_tokens_size = size;
- parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
+ size = INIT_TOKENS_SIZE (levels);
+ if (summary->sum_tokens_size >= size)
+ {
+ parent->sum_tokens_size = summary->sum_tokens_size;
+ parent->sum_tokens = summary->sum_tokens;
+ }
+ else
+ {
+ parent->sum_tokens_size = size;
+ parent->sum_tokens = REALLOC (summary->sum_tokens, struct token *, size);
+ }
summary->sum_tokens = 0;
}
if (parent->sum_free_index == 8)