home *** CD-ROM | disk | FTP | other *** search
- /* WIDE AREA INFORMATION SERVER SOFTWARE:
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer.
-
- Brewster@think.com
- */
-
- /* Include file for the irhash.c file.
- Implements the building functions in irext.h */
-
- #ifndef IRHASH_H
- #define IRHASH_H
-
- #include "cdialect.h"
- #include "cutil.h"
- #include "irlex.h"
- #include "hash.h"
- #include "irlex.h" /* for MAX_WORD_LENGTH */
-
- /* the amount of memory for word occurances (bytes) */
- #define WORD_MEMORY_INIT_BLOCK_SIZE 10
-
- /* this is the maximum number of occurances that will be stored in the
- * disk table. The number of occurances will reflect the total number in
- * all files. The theory is that if a word is very common, then it
- * is not very useful in descriminating between files. Also, if it
- * is very common, then it takes up alot of space.
- * Maybe this should be dependent on the number of documents indexed.
- * Therefore if a word is in every document, then it probably does not mean
- * much.
- * In increasing this, it may not keep all the references in the
- * inverted file because the max length of an index block is governed
- * by a size that can be represented in INDEX_BLOCK_SIZE_SIZE bytes.
- */
- #define MAX_OCCURANCES 20000L
-
- /* this is a flag to be put in the number_of_occurances field of a
- word_entry so that it is always greater than the limit and no words will be
- collected. */
- #define STOP_WORD_FLAG 0x40000000
-
- #ifdef __cplusplus
- /* declare these as C style functions */
- extern "C"
- {
- #endif /* def __cplusplus */
-
- unsigned char *make_word_occurrance_block _AP((long size));
-
- void free_word_occurance_block _AP((unsigned char *block));
-
- void flush_word_occurance_buffers _AP((void));
-
- void gc_word_occurance_buffers _AP((hashtable * the_word_memory_hashtable));
-
- void add_stop_words _AP((hashtable *the_word_memory_hashtable));
-
- long write_bytes_to_memory _AP((long value,long size,unsigned char* ptr));
-
- #ifdef __cplusplus
- }
- #endif /* def __cplusplus */
-
- #endif /* nded IRHASH_H */
-