home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources
- From: chris@mimsy.umd.edu (Chris Torek)
- Subject: [comp.lang.c] Re: hash function for text in C
- Message-ID: <1990Oct18.184451.18734@math.lsa.umich.edu>
- Date: Thu, 18 Oct 90 18:44:51 GMT
-
- Archive-name: hash/18-Oct-90
- Original-posting-by: chris@mimsy.umd.edu (Chris Torek)
- Original-subject: Re: hash function for text in C
- Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti)
-
- [Reposted from comp.lang.c.
- Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).]
-
- The following implements expanding chained hash tables for strings and
- numbers. This is unmodified from a special-purpose program (having to
- do with reading a large number of files with name=id pairs and doing
- various operations on them). Chain lengths are not monitored; instead,
- the table is invariably expanded whenever it becomes 2/3 full. Entries
- may be neither removed nor modified.
-
- An essentially-unrelated function called `string' maintains a string
- pool such that string(x) == string(y) iff strcmp(x,y)==0 (the program
- typically stores each name several times, and often needs to test for
- name equality, so this helps there).
-
- : Run this shell script with "sh" not "csh"
- PATH=/bin:/usr/bin:/usr/ucb:/etc:$PATH
- export PATH
- all=false
- if [ x$1 = x-a ]; then
- all=true
- fi
- echo Extracting hash.h
- sed 's/^X//' <<'//go.sysin dd *' >hash.h
- X/*
- X * $Id: hash.h,v 1.1 90/09/24 23:58:38 chris Exp $
- X *
- X * Hash table entries keep track of name (or id) = value pairs.
- X * Values are simply pointers. Hash tables themselves are named
- X * (for debugging).
- X */
- X
- Xstruct hashtab;
- X
- X/*
- X * The `ins' functions return nonzero if the old value existed,
- X * without changing the value.
- X * The iterate functions calls a given function with name,value
- X * or id,value pairs.
- X */
- Xstruct hashtab *ht_new(); /* given name, create new hash table */
- Xchar *ht_nget(); /* given table and name, get value */
- Xchar *ht_iget(); /* given table and ID, get value */
- Xint ht_nins(); /* given table and name, insert new value */
- Xint ht_iins(); /* given table and id, insert new value */
- Xvoid ht_niterate(); /* given table and function, iterate */
- Xvoid ht_iiterate(); /* given table and function, iterate */
- X
- X/*
- X * Some things that ought not to be here, but are anyway.
- X * goodnumber() takes a number of objects and a size and returns
- X * a new number of objects, such that malloc(goodnumber(n,size)*size)
- X * calls malloc with a `good' size value (resulting in less wasted
- X * memory). emalloc is malloc with program-abort on out-of-memory.
- X * string() makes a `read-only' copy of a string, reusing the previous
- X * copy if any.
- X */
- Xint goodnumber(); /* given n & sizeof, return new n */
- Xchar *emalloc(); /* malloc, exit on error */
- Xchar *string(); /* make an `ideal' copy of a string */
- //go.sysin dd *
- if [ `wc -c < hash.h` != 1415 ]; then
- made=false
- echo error transmitting hash.h --
- echo length should be 1415, not `wc -c < hash.h`
- else
- made=true
- fi
- if $all; then
- echo ' Changing owner to bin'
- chown bin hash.h
- else
- echo ' Original owner was bin'
- fi
- if $made; then
- chmod 444 hash.h
- echo -n ' '; ls -ld hash.h
- fi
- echo Extracting hash.c
- sed 's/^X//' <<'//go.sysin dd *' >hash.c
- X#ifndef lint
- Xstatic char rcsid[] = "$Id: hash.c,v 1.2 90/10/02 13:32:23 chris Exp $";
- X#endif
- X
- X/*
- X * Hash table routines.
- X */
- X
- X#include <stdio.h>
- X#include <stdlib.h>
- X#include <string.h>
- X#include "hash.h"
- X
- X/*
- X * Hash table entries keep track of name=value pairs.
- X * The names may be numeric IDs instead (by having a null name).
- X */
- Xstruct hashent {
- X struct hashent *h_next;/* next in chain */
- X int h_hash; /* hash value or ID */
- X char *h_name; /* name (null if from numeric ID) */
- X char *h_value; /* value */
- X};
- X
- Xstruct hashtab {
- X int ht_size; /* size (power of 2) */
- X int ht_mask; /* == ht_size - 1 */
- X int ht_used; /* number of entries used */
- X int ht_lim; /* when to expand */
- X struct hashent **ht_tab;/* base of table */
- X char ht_name[1]; /* table name; actually larger */
- X};
- X
- Xextern char *progname;
- X
- Xchar *
- Xemalloc(n)
- X size_t n;
- X{
- X register char *p = malloc(n);
- X
- X if (p == NULL) {
- X (void) fprintf(stderr, "%s: out of memory\n", progname);
- X exit(1);
- X }
- X return (p);
- X}
- X
- X/* round up to next multiple of y, where y is a power of 2 */
- X#define ROUND(x, y) (((x) + (y) - 1) & ~((y) - 1))
- X
- X/* compute a `good' number of objects to allocate via malloc */
- Xint
- Xgoodnumber(n, s)
- X int n;
- X size_t s;
- X{
- X
- X /* 16384 is a guess at a good page size for malloc */
- X /* 32 is a guess at malloc's overhead */
- X return ((int)((ROUND(n * s + 32, 16384) - 32) / s));
- X}
- X
- X/*
- X * Make a new hash table.
- X */
- Xstruct hashtab *
- Xht_new(name)
- X char *name;
- X{
- X register struct hashtab *ht;
- X register struct hashent **h;
- X register int n;
- X
- X ht = (struct hashtab *)emalloc(sizeof *ht + strlen(name));
- X ht->ht_tab = h = (struct hashent **)emalloc(128 * sizeof *h);
- X ht->ht_size = 128;
- X ht->ht_mask = 127;
- X for (n = 128; --n >= 0;)
- X *h++ = NULL;
- X ht->ht_used = 0;
- X ht->ht_lim = (128 * 2) / 3;
- X (void) strcpy(ht->ht_name, name);
- X return (ht);
- X}
- X
- X/*
- X * Expand an existing hash table.
- X */
- Xstatic void
- Xht_expand(ht)
- X register struct hashtab *ht;
- X{
- X register int n = ht->ht_size * 2, i;
- X register struct hashent *p, **h, **oldh, *q;
- X
- X h = (struct hashent **)emalloc(n * sizeof *h);
- X for (i = n; --i >= 0;)
- X *h++ = NULL;
- X h -= n;
- X oldh = ht->ht_tab;
- X n--;
- X for (i = ht->ht_size; --i >= 0;) {
- X for (p = *oldh++; p != NULL; p = q) {
- X q = p->h_next;
- X p->h_next = h[p->h_hash & n];
- X h[p->h_hash & n] = p;
- X }
- X }
- X free((char *)ht->ht_tab);
- X ht->ht_tab = h;
- X ht->ht_mask = n;
- X ht->ht_size = ++n;
- X ht->ht_lim = (n * 2) / 3;
- X}
- X
- X/*
- X * Make a new hash entry. Its h_next will be NULL.
- X */
- Xstatic struct hashent *
- Xnewhashent(hash, name, value)
- X int hash;
- X char *name, *value;
- X{
- X static struct hashent *hfree;
- X register struct hashent *h;
- X register int n, nalloc;
- X
- X if ((h = hfree) != NULL)
- X hfree = h->h_next;
- X else {
- X nalloc = goodnumber(2, sizeof *h); /* need at least 2 */
- X hfree = h = (struct hashent *)emalloc(nalloc * sizeof *h) + 1;
- X for (n = nalloc - 2; --n >= 0; h++)
- X h->h_next = h + 1;
- X h->h_next = NULL;
- X h -= nalloc - 1;
- X }
- X h->h_next = NULL;
- X h->h_hash = hash;
- X h->h_name = name;
- X h->h_value = value;
- X return (h);
- X}
- X
- X#define HASH(str, h, p) \
- X for (p = str, h = 0; *p;) h = (h << 5) - h + *p++
- X
- X/*
- X * Look up a name=value.
- X */
- Xchar *
- Xht_nget(ht, name)
- X register struct hashtab *ht;
- X char *name;
- X{
- X register char *p;
- X register int hash;
- X register struct hashent *h;
- X
- X HASH(name, hash, p);
- X p = name;
- X for (h = ht->ht_tab[hash & ht->ht_mask]; h != NULL; h = h->h_next)
- X if (h->h_hash == hash && h->h_name != NULL &&
- X strcmp(h->h_name, p) == 0)
- X return (h->h_value);
- X return (NULL);
- X}
- X
- X/*
- X * Look up an ID=value.
- X */
- Xchar *
- Xht_iget(ht, id)
- X register struct hashtab *ht;
- X register int id;
- X{
- X register struct hashent *h;
- X
- X for (h = ht->ht_tab[id & ht->ht_mask]; h != NULL; h = h->h_next)
- X if (h->h_hash == id && h->h_name == NULL)
- X return (h->h_value);
- X return (NULL);
- X}
- X
- X/*
- X * Insert (do not clobber) a name=value.
- X * Return zero on success.
- X */
- Xint
- Xht_nins(ht, name, value)
- X register struct hashtab *ht;
- X char *name, *value;
- X{
- X register char *p;
- X register int hash;
- X register struct hashent *h, **hp;
- X
- X HASH(name, hash, p);
- X p = name;
- X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL;
- X hp = &h->h_next)
- X if (h->h_hash == hash && h->h_name != NULL &&
- X strcmp(h->h_name, p) == 0)
- X return (-1);
- X *hp = newhashent(hash, name, value);
- X if (++ht->ht_used > ht->ht_lim)
- X ht_expand(ht);
- X return (0);
- X}
- X
- X/*
- X * Insert (do not clobber) an ID=value.
- X * Return zero on success.
- X */
- Xint
- Xht_iins(ht, id, value)
- X register struct hashtab *ht;
- X register int id;
- X char *value;
- X{
- X register struct hashent *h, **hp;
- X
- X for (hp = &ht->ht_tab[id & ht->ht_mask]; (h = *hp) != NULL;
- X hp = &h->h_next)
- X if (h->h_hash == id && h->h_name == NULL)
- X return (-1);
- X *hp = newhashent(id, (char *)NULL, value);
- X if (++ht->ht_used > ht->ht_lim)
- X ht_expand(ht);
- X return (0);
- X}
- X
- X/*
- X * Stash a copy of a string away; it will never be freed.
- X */
- Xstatic char *
- Xpoolstr(s)
- X char *s;
- X{
- X register char *p;
- X register size_t l = strlen(s) + 1;
- X static char *poolp;
- X static size_t nleft;
- X
- X if (nleft < l)
- X poolp = emalloc(nleft = goodnumber(l, 1));
- X bcopy(s, p = poolp, l);
- X poolp += l;
- X return (p);
- X}
- X
- X/*
- X * Generate a single unique copy of the given string.
- X */
- Xchar *
- Xstring(s)
- X char *s;
- X{
- X register char *p;
- X register int hash;
- X register struct hashent *h, **hp;
- X static struct hashtab *ht;
- X
- X if (ht == NULL)
- X ht = ht_new("strings");
- X HASH(s, hash, p);
- X p = s;
- X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL;
- X hp = &h->h_next)
- X if (h->h_hash == hash && strcmp(h->h_name, p) == 0)
- X return (h->h_name);
- X *hp = h = newhashent(hash, poolstr(s), (char *)NULL);
- X if (++ht->ht_used > ht->ht_lim)
- X ht_expand(ht);
- X return (h->h_name);
- X}
- X
- X/*
- X * Call fn on all the name=value pairs.
- X */
- Xvoid
- Xht_niterate(ht, fn)
- X struct hashtab *ht;
- X register void (*fn)();
- X{
- X register struct hashent *h, **hp;
- X register int n;
- X
- X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;)
- X for (h = *hp++; h != NULL; h = h->h_next)
- X if (h->h_name != NULL)
- X (*fn)(h->h_name, h->h_value);
- X}
- X
- X/*
- X * Call fn on all the id=value pairs.
- X */
- Xvoid
- Xht_iiterate(ht, fn)
- X struct hashtab *ht;
- X register void (*fn)();
- X{
- X register struct hashent *h, **hp;
- X register int n;
- X
- X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;)
- X for (h = *hp++; h != NULL; h = h->h_next)
- X if (h->h_name == NULL)
- X (*fn)(h->h_hash, h->h_value);
- X}
- //go.sysin dd *
- if [ `wc -c < hash.c` != 6291 ]; then
- made=false
- echo error transmitting hash.c --
- echo length should be 6291, not `wc -c < hash.c`
- else
- made=true
- fi
- if $all; then
- echo ' Changing owner to bin'
- chown bin hash.c
- else
- echo ' Original owner was bin'
- fi
- if $made; then
- chmod 444 hash.c
- echo -n ' '; ls -ld hash.c
- fi
- --
- In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 405 2750)
- Domain: chris@cs.umd.edu Path: uunet!mimsy!chris
-