home *** CD-ROM | disk | FTP | other *** search
- /* bucket.c - The routines for playing with hash buckets. */
-
- /* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
- Copyright (C) 1990, 1991 Free Software Foundation, Inc.
-
- GDBM is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- GDBM is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GDBM; see the file COPYING. If not, write to
- the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
- You may contact the author by:
- e-mail: phil@cs.wwu.edu
- us-mail: Philip A. Nelson
- Computer Science Department
- Western Washington University
- Bellingham, WA 98226
- phone: (206) 676-3035
-
- *************************************************************************/
-
-
- #include "gdbmdefs.h"
-
-
- /* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
- void
- _gdbm_new_bucket (dbf, bucket, bits)
- gdbm_file_info *dbf;
- hash_bucket *bucket;
- int bits;
- {
- int index;
-
- /* Initialize the avail block. */
- bucket->av_count = 0;
-
- /* Set the information fields first. */
- bucket->bucket_bits = bits;
- bucket->count = 0;
-
- /* Initialize all bucket elements. */
- for (index = 0; index < dbf->header->bucket_elems; index++)
- bucket->h_table[index].hash_value = -1;
- }
-
-
-
- /* Find a bucket for DBF that is pointed to by the bucket directory from
- location DIR_INDEX. The bucket cache is first checked to see if it
- is already in memory. If not, a bucket may be tossed to read the new
- bucket. In any case, the requested bucket is make the "current" bucket
- and dbf->bucket points to the correct bucket. */
- void
- _gdbm_get_bucket (dbf, dir_index)
- gdbm_file_info *dbf;
- long dir_index;
- {
- long bucket_adr; /* The address of the correct hash bucket. */
- int num_bytes; /* The number of bytes read. */
- long file_pos; /* The return address for lseek. */
- int index; /* Loop index. */
-
- /* Initial set up. */
- dbf->bucket_dir = dir_index;
- bucket_adr = dbf->dir [dir_index];
-
- /* Is that one is not alread current, we must find it. */
- if (dbf->cache_entry->ca_adr != bucket_adr)
- {
- /* Look in the cache. */
- for (index = 0; index < CACHE_SIZE; index++)
- if (dbf->bucket_cache[index].ca_adr == bucket_adr)
- {
- dbf->bucket = dbf->bucket_cache[index].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[index];
- return;
- }
-
- /* It is not in the cache, read it from the disk. */
- dbf->last_read = (dbf->last_read + 1) % CACHE_SIZE;
- dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
- dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
- dbf->cache_entry->ca_data.elem_loc = -1;
- dbf->cache_entry->ca_changed = FALSE;
-
- /* Read the bucket. */
- file_pos = lseek (dbf->desc, bucket_adr, L_SET);
- if (file_pos != bucket_adr)
- _gdbm_fatal (dbf, "lseek error");
-
- num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
- if (num_bytes != dbf->header->bucket_size)
- _gdbm_fatal (dbf, "read error");
- }
-
- return;
- }
-
-
- /* Split the current bucket. This includes moving all items in the bucket to
- a new bucket. This doesn't require any disk reads because all hash values
- are stored in the buckets. Splitting the current bucket may require
- doubling the size of the hash directory. */
- void
- _gdbm_split_bucket (dbf, next_insert)
- gdbm_file_info *dbf;
- long next_insert;
- {
- hash_bucket *bucket[2]; /* Pointers to the new buckets. */
-
- int new_bits; /* The number of bits for the new buckets. */
- int cache_0; /* Location in the cache for the buckets. */
- int cache_1;
- long adr_0; /* File address of the new bucket 0. */
- long adr_1; /* File address of the new bucket 1. */
- avail_elem old_bucket; /* Avail Struct for the old bucket. */
-
- long dir_start0; /* Used in updating the directory. */
- long dir_start1;
- long dir_end;
-
- long *new_dir; /* Pointer to the new directory. */
- long dir_adr; /* Address of the new directory. */
- long dir_size; /* Size of the new directory. */
- long old_adr[31]; /* Address of the old directories. */
- long old_size[31]; /* Size of the old directories. */
- int old_count; /* Number of old directories. */
-
- int index; /* Used in array indexing. */
- int index1; /* Used in array indexing. */
- int elem_loc; /* Location in new bucket to put element. */
- bucket_element *old_el; /* Pointer into the old bucket. */
- int select; /* Used to index bucket during movement. */
-
-
- /* No directories are yet old. */
- old_count = 0;
-
- while (dbf->bucket->count == dbf->header->bucket_elems)
- {
- /* Initialize the "new" buckets in the cache. */
- do
- {
- dbf->last_read = (dbf->last_read + 1) % CACHE_SIZE;
- cache_0 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
- bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
- if (dbf->bucket_cache[cache_0].ca_changed)
- _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
- do
- {
- dbf->last_read = (dbf->last_read + 1) % CACHE_SIZE;
- cache_1 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
- bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
- if (dbf->bucket_cache[cache_1].ca_changed)
- _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
- new_bits = dbf->bucket->bucket_bits+1;
- _gdbm_new_bucket (dbf, bucket[0], new_bits);
- _gdbm_new_bucket (dbf, bucket[1], new_bits);
- adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- dbf->bucket_cache[cache_0].ca_adr = adr_0;
- adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- dbf->bucket_cache[cache_1].ca_adr = adr_1;
-
-
-
- /* Double the directory size if necessary. */
- if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
- {
- dir_size = dbf->header->dir_size * 2;
- dir_adr = _gdbm_alloc (dbf, dir_size);
- new_dir = (long *) malloc (dir_size);
- if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
- for (index = 0; index < dbf->header->dir_size/sizeof (long); index++)
- {
- new_dir[2*index] = dbf->dir[index];
- new_dir[2*index+1] = dbf->dir[index];
- }
-
- /* Update header. */
- old_adr[old_count] = dbf->header->dir;
- dbf->header->dir = dir_adr;
- old_size[old_count] = dbf->header->dir_size;
- dbf->header->dir_size = dir_size;
- dbf->header->dir_bits = new_bits;
- old_count++;
-
- /* Now update dbf. */
- dbf->header_changed = TRUE;
- dbf->bucket_dir *= 2;
- free (dbf->dir);
- dbf->dir = new_dir;
- }
-
- /* Copy all elements in dbf->bucket into the new buckets. */
- for (index = 0; index < dbf->header->bucket_elems; index++)
- {
- old_el = & (dbf->bucket->h_table[index]);
- select = (int) (old_el->hash_value >> (31-new_bits)) & 1;
- elem_loc = (int) (old_el->hash_value % dbf->header->bucket_elems);
- while (bucket[select]->h_table[elem_loc].hash_value != -1)
- elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
- bucket[select]->h_table[elem_loc] = *old_el;
- bucket[select]->count += 1;
- }
-
- /* Allocate avail space for the bucket[1]. */
- bucket[1]->bucket_avail[0].av_adr
- = _gdbm_alloc (dbf, dbf->header->block_size);
- bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
- bucket[1]->av_count = 1;
-
- /* Copy the avail elements in dbf->bucket to bucket[0]. */
- bucket[0]->av_count = dbf->bucket->av_count;
- index = 0;
- index1 = 0;
- if (bucket[0]->av_count == BUCKET_AVAIL)
- {
- /* The avail is full, move the first one to bucket[1]. */
- _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
- bucket[1]->bucket_avail,
- &bucket[1]->av_count);
- index = 1;
- bucket[0]->av_count --;
- }
- for (; index < dbf->bucket->av_count; index++)
- {
- bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
- }
-
- /* Update the directory. We have new file addresses for both buckets. */
- dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
- dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
- dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
- dir_start0 = dir_start1 - (dir_end - dir_start1);
- for (index = dir_start0; index < dir_start1; index++)
- dbf->dir[index] = adr_0;
- for (index = dir_start1; index < dir_end; index++)
- dbf->dir[index] = adr_1;
-
-
- /* Set changed flags. */
- dbf->bucket_cache[cache_0].ca_changed = TRUE;
- dbf->bucket_cache[cache_1].ca_changed = TRUE;
- dbf->bucket_changed = TRUE;
- dbf->directory_changed = TRUE;
- dbf->second_changed = TRUE;
-
- /* Update the cache! */
- dbf->bucket_dir = (int) (next_insert >> (31-dbf->header->dir_bits));
-
- /* Invalidate old cache entry. */
- old_bucket.av_adr = dbf->cache_entry->ca_adr;
- old_bucket.av_size = dbf->header->bucket_size;
- dbf->cache_entry->ca_adr = 0;
- dbf->cache_entry->ca_changed = FALSE;
-
- /* Set dbf->bucket to the proper bucket. */
- if (dbf->dir[dbf->bucket_dir] == adr_0)
- {
- dbf->bucket = bucket[0];
- dbf->cache_entry = &dbf->bucket_cache[cache_0];
- _gdbm_put_av_elem (old_bucket,
- bucket[1]->bucket_avail,
- &bucket[1]->av_count);
- }
- else
- {
- dbf->bucket = bucket[1];
- dbf->cache_entry = &dbf->bucket_cache[cache_1];
- _gdbm_put_av_elem (old_bucket,
- bucket[0]->bucket_avail,
- &bucket[0]->av_count);
- }
-
- }
-
- /* Get rid of old directories. */
- for (index = 0; index < old_count; index++)
- _gdbm_free (dbf, old_adr[index], old_size[index]);
- }
-
-
- /* The only place where a bucket is written. CA_ENTRY is the
- cache entry containing the bucket to be written. */
- void
- _gdbm_write_bucket (dbf, ca_entry)
- gdbm_file_info *dbf;
- cache_elem *ca_entry;
- {
- int num_bytes; /* The return value for write. */
- long file_pos; /* The return value for lseek. */
-
- file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
- if (file_pos != ca_entry->ca_adr)
- _gdbm_fatal (dbf, "lseek error");
- num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
- if (num_bytes != dbf->header->bucket_size)
- _gdbm_fatal (dbf, "write error");
- ca_entry->ca_changed = FALSE;
- ca_entry->ca_data.hash_val = -1;
- ca_entry->ca_data.elem_loc = -1;
-
- }
-